libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
pwizmsrunreader.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/msrun/private/pwizmsrunreader.cpp
3 * \date 29/05/2018
4 * \author Olivier Langella
5 * \brief MSrun file reader base on proteowizard library
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 * Contributors:
27 * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28 *implementation
29 ******************************************************************************/
30
31
32#include <QDebug>
33
34#include "pwizmsrunreader.h"
35
36#include <pwiz/data/msdata/DefaultReaderList.hpp>
37
38
39#include "../../utils.h"
40#include "../../pappsoexception.h"
41#include "../../exception/exceptionnotfound.h"
42#include "../../exception/exceptionnotpossible.h"
43
44
45// int pwizMsRunReaderMetaTypeId =
46// qRegisterMetaType<pappso::PwizMsRunReader>("pappso::PwizMsRunReader");
47
48
49namespace pappso
50{
51
52
54 : MsRunReader(msrun_id_csp)
55{
56 // The initialization needs to be done immediately so that we get the pwiz
57 // MsDataPtr corresponding to the right ms_run_id in the parameter. That
58 // pointer will be set to msp_msData.
59
60 initialize();
61}
62
63
64void
66{
67 std::string file_name_std =
69
70 // Make a backup of the current locale
71 std::string env_backup = setlocale(LC_ALL, "");
72 // struct lconv *lc = localeconv();
73
74 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
75 //<< "env_backup=" << env_backup.c_str() << "lc->decimal_point"
76 //<< lc->decimal_point;
77
78 // Now actually search the useful MSDataPtr to the member variable.
79
80 pwiz::msdata::DefaultReaderList defaultReaderList;
81
82 std::vector<pwiz::msdata::MSDataPtr> msDataPtrVector;
83
84 try
85 {
86 defaultReaderList.read(file_name_std, msDataPtrVector);
87 }
88 catch(std::exception &error)
89 {
90 qDebug() << QString("Failed to read the data from file %1")
91 .arg(QString::fromStdString(file_name_std));
92
93 throw(PappsoException(
94 QString("Error reading file %1 in PwizMsRunReader, for msrun %2:\n%3")
95 .arg(mcsp_msRunId->getFileName())
96 .arg(mcsp_msRunId.get()->toString())
97 .arg(error.what())));
98 }
99
100 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
101 //<< "The number of runs is:" << msDataPtrVector.size()
102 //<< "The number of spectra in first run is:"
103 //<< msDataPtrVector.at(0)->run.spectrumListPtr->size();
104
105 // Single-run file handling here.
106
107 // Specific case of the MGF data format: we do not have a run id for that kind
108 // of data. In this case there must be a single run!
109
110 if(mcsp_msRunId->getRunId().isEmpty())
111 {
112 if(msDataPtrVector.size() != 1)
113 throw(
114 ExceptionNotPossible("For the kind of file at hand there can only be "
115 "one run in the file."));
116
117 // At this point we know the single msDataPtr is the one we are looking
118 // for.
119
120 msp_msData = msDataPtrVector.front();
121 }
122 else
123 {
124 // Multi-run file handling here.
125 for(auto &msDataPtr : msDataPtrVector)
126 {
127 qDebug() << "msDataPtr->run.id=" << msDataPtr->run.id.c_str();
128 qDebug() << "mcsp_msRunId->getRunId()=" << mcsp_msRunId->getRunId();
129 qDebug() << "mcsp_msRunId->getXmlId()=" << mcsp_msRunId->getXmlId();
130 qDebug() << "mcsp_msRunId->getSampleName()=" << mcsp_msRunId->getSampleName();
131 if(msDataPtr->run.id == mcsp_msRunId->getRunId().toStdString())
132 {
133 msp_msData = msDataPtr;
134
135 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
136 //<< "Found the right MSDataPtr for run id.";
137
138 break;
139 }
140 }
141 }
142
143 if(msp_msData == nullptr)
144 {
146 QString("Could not find a pwiz MSDataPtr matching the requested run id : %1")
147 .arg(mcsp_msRunId.get()->toString())));
148 }
149
150
151 // check if this MS run can be used with scan numbers
152 // MS:1000490 Agilent instrument model
153 pwiz::cv::CVID native_id_format =
154 pwiz::msdata::id::getDefaultNativeIDFormat(*msp_msData.get());
155
156 // msp_msData.get()->getDefaultNativeIDFormat();
157
158 if(native_id_format == pwiz::cv::CVID::MS_Thermo_nativeID_format)
159 {
160 m_hasScanNumbers = true;
161 }
162 else
163 {
164 m_hasScanNumbers = false;
165 }
166
167 if(mcsp_msRunId.get()->getMsDataFormat() == MsDataFormat::mzXML)
168 {
169 m_hasScanNumbers = true;
170 }
171}
172
176
177
178const OboPsiModTerm
180{
181
182 OboPsiModTerm term;
183
184 term.m_accession = "MS:1000824";
185 term.m_name = "no nativeID format";
186 term.m_definition =
187 "No nativeID format indicates that the file tagged with this term does not "
188 "contain spectra that can have a nativeID format.";
189
190
191 pwiz::cv::CVID cvid =
192 pwiz::msdata::id::getDefaultNativeIDFormat(*(msp_msData.get()));
193
194 switch(cvid)
195 {
196 case pwiz::cv::MS_Thermo_nativeID_format:
197 term.m_accession = "MS:1000768";
198 term.m_name = "Thermo nativeID format";
199 term.m_definition =
200 "Native format defined by controllerType=xsd:nonNegativeInteger "
201 "controllerNumber=xsd:positiveInteger scan=xsd:positiveInteger.";
202 break;
203 default:
204 break;
205 }
206 return term;
207}
208
209pwiz::msdata::SpectrumPtr
210PwizMsRunReader::getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list,
211 std::size_t spectrum_index,
212 bool want_binary_data) const
213{
214 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp;
215
216 try
217 {
218 native_pwiz_spectrum_sp =
219 p_spectrum_list->spectrum(spectrum_index, want_binary_data);
220 }
221 catch(std::runtime_error &error)
222 {
223 qDebug() << "getPwizSpectrumPtr error " << error.what() << " "
224 << typeid(error).name();
225
226 throw ExceptionNotFound(QObject::tr("Pwiz spectrum index %1 not found in "
227 "MS file std::runtime_error :\n%2")
228 .arg(spectrum_index)
229 .arg(error.what()));
230 }
231 catch(std::exception &error)
232 {
233 qDebug() << "getPwizSpectrumPtr error " << error.what()
234 << typeid(error).name();
235
236 throw ExceptionNotFound(
237 QObject::tr("Pwiz spectrum index %1 not found in MS file :\n%2")
238 .arg(spectrum_index)
239 .arg(error.what()));
240 }
241
242 if(native_pwiz_spectrum_sp.get() == nullptr)
243 {
244 throw ExceptionNotFound(
245 QObject::tr(
246 "Pwiz spectrum index %1 not found in MS file : null pointer")
247 .arg(spectrum_index));
248 }
249
250 return native_pwiz_spectrum_sp;
251}
252
253
254bool
256 pwiz::msdata::Spectrum *spectrum_p,
257 QualifiedMassSpectrum &qualified_mass_spectrum) const
258{
259
260 // We now have to set the retention time at which this mass spectrum
261 // was acquired. This is the scan start time.
262
263 if(!spectrum_p->scanList.scans[0].hasCVParam(
264 pwiz::msdata::MS_scan_start_time))
265 {
266 if(mcsp_msRunId.get()->getMsDataFormat() == MsDataFormat::MGF)
267 { // MGF could not have scan start time
268 qualified_mass_spectrum.setRtInSeconds(-1);
269 }
270 else
271 {
273 "The spectrum has no scan start time value set."));
274 }
275 }
276 else
277 {
278 pwiz::data::CVParam retention_time_cv_param =
279 spectrum_p->scanList.scans[0].cvParam(pwiz::msdata::MS_scan_start_time);
280
281 // Try to get the units of the retention time value.
282
283 std::string unit_name = retention_time_cv_param.unitsName();
284 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
285 //<< "Unit name for the retention time:"
286 //<< QString::fromStdString(unit_name);
287
288 if(unit_name == "second")
289 {
290 qualified_mass_spectrum.setRtInSeconds(
291 retention_time_cv_param.valueAs<double>());
292 }
293 else if(unit_name == "minute")
294 {
295 qualified_mass_spectrum.setRtInSeconds(
296 retention_time_cv_param.valueAs<double>() * 60);
297 }
298 else
299 throw(
300 ExceptionNotPossible("Could not determine the unit for the "
301 "scan start time value."));
302 }
303
304 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
305 //<< "Retention time for spectrum is:"
306 //<< qualified_mass_spectrum.getRtInSeconds();
307
308 // Old version not checking unit (by default unit is minutes for RT,
309 // not seconds)
310 //
311 // pappso_double retentionTime =
312 // QString(spectrum_p->scanList.scans[0]
313 //.cvParam(pwiz::msdata::MS_scan_start_time)
314 //.value.c_str())
315 //.toDouble();
316 // qualified_mass_spectrum.setRtInSeconds(retentionTime);
317
318 return true;
319}
320
321
322bool
324 pwiz::msdata::Spectrum *spectrum_p,
325 QualifiedMassSpectrum &qualified_mass_spectrum) const
326{
327 // Not all the acquisitions have ion mobility data. We need to test
328 // that:
329
330 if(spectrum_p->scanList.scans[0].hasCVParam(
331 pwiz::msdata::MS_ion_mobility_drift_time))
332 {
333
334 // qDebug() << "as strings:"
335 //<< QString::fromStdString(
336 // spectrum_p->scanList.scans[0]
337 //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
338 //.valueAs<std::string>());
339
340 pappso_double driftTime =
341 spectrum_p->scanList.scans[0]
342 .cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
343 .valueAs<double>();
344
345 // qDebug() << "driftTime:" << driftTime;
346
347 // Old version requiring use of QString.
348 // pappso_double driftTime =
349 // QString(spectrum_p->scanList.scans[0]
350 //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
351 //.value.c_str())
352 //.toDouble();
353
354 // Now make positively sure that the obtained value is correct.
355 // Note that I suffered a lot with Waters Synapt data that
356 // contained apparently correct drift time XML element that in
357 // fact contained either NaN or inf. When such mass spectra were
358 // encountered, the mz,i data were bogus and crashed the data
359 // loading functions. We just want to skip this kind of bogus mass
360 // spectrum by letting the caller know that the drift time was
361 // bogus ("I" is Filippo Rusconi).
362
363 if(std::isnan(driftTime) || std::isinf(driftTime))
364 {
365 // qDebug() << "detected as nan or inf.";
366
367 return false;
368 }
369 else
370 {
371 // The mzML standard stipulates that drift times are in
372 // milliseconds.
373 qualified_mass_spectrum.setDtInMilliSeconds(driftTime);
374 }
375 }
376 // End of
377 // if(spectrum_p->scanList.scans[0].hasCVParam(
378 // pwiz::msdata::MS_ion_mobility_drift_time))
379 else
380 {
381 // Not a bogus mass spectrum but also not a drift spectrum, set -1
382 // as the drift time value.
383 qualified_mass_spectrum.setDtInMilliSeconds(-1);
384 }
385
386 return true;
387}
388
389
392 const MassSpectrumId &massSpectrumId,
393 pwiz::msdata::Spectrum *spectrum_p,
394 bool want_binary_data,
395 bool &ok) const
396{
397 // qDebug();
398
399 std::string env;
400 env = setlocale(LC_ALL, "");
401 setlocale(LC_ALL, "C");
402
403 QualifiedMassSpectrum qualified_mass_spectrum(massSpectrumId);
404
405 try
406 {
407
408 // We want to store the ms level for this spectrum
409
410 int msLevel =
411 (spectrum_p->cvParam(pwiz::msdata::MS_ms_level).valueAs<int>());
412
413 qualified_mass_spectrum.setMsLevel(msLevel);
414
415 if(!spectrum_p->scanList.scans[0].hasCVParam(
416 pwiz::msdata::MS_peak_list_scans))
417 {
418
419 // qDebug() << spectrum_p->cvParam(pwiz::msdata::MS_peak_list_scans)
420 // .valueAs<double>();
421 qualified_mass_spectrum.setParameterValue(
423 spectrum_p->cvParam(pwiz::msdata::MS_peak_list_scans)
424 .valueAs<double>());
425 }
426 // We want to know if this spectrum is a fragmentation spectrum obtained
427 // from a selected precursor ion.
428
429 std::size_t precursor_list_size = spectrum_p->precursors.size();
430
431 // qDebug() << "For spectrum at index:" <<
432 // massSpectrumId.getSpectrumIndex()
433 //<< "msLevel:" << msLevel
434 //<< "with number of precursors:" << precursor_list_size;
435
436 if(precursor_list_size > 0)
437 {
438
439 // Sanity check
440 if(msLevel < 2)
441 {
442 qDebug() << "Going to throw: msLevel cannot be less than two for "
443 "a spectrum that has items in its Precursor list.";
444
446 "msLevel cannot be less than two for "
447 "a spectrum that has items in its Precursor list."));
448 }
449
450 // See what is the first precursor in the list.
451
452 for(auto &precursor : spectrum_p->precursors)
453 {
454
455 // Set this variable ready as we need that default value in
456 // certain circumstances.
457
458 std::size_t precursor_spectrum_index =
459 std::numeric_limits<std::size_t>::max();
460
461 // The spectrum ID of the precursor might be empty.
462
463 if(precursor.spectrumID.empty())
464 {
465 // qDebug() << "The precursor's spectrum ID is empty.";
466
467 if(mcsp_msRunId.get()->getMsDataFormat() == MsDataFormat::MGF)
468 {
469 // qDebug()
470 //<< "Format is MGF, precursor's spectrum ID can be
471 // empty.";
472 }
473 else
474 {
475 // When performing Lumos Fusion fragmentation experiments
476 // in Tune mode and with recording, the first spectrum of
477 // the list is a fragmentation spectrum (ms level 2) that
478 // has no identity for the precursor spectrum because
479 // there is no full scan accquisition.
480 }
481 }
482 // End of
483 // if(precursor.spectrumID.empty())
484 else
485 {
486 // We could get a native precursor spectrum id, so convert
487 // that native id to a spectrum index.
488
489 qualified_mass_spectrum.setPrecursorNativeId(
490 QString::fromStdString(precursor.spectrumID));
491
492 if(qualified_mass_spectrum.getPrecursorNativeId().isEmpty())
493 {
494 // qDebug() << "The native id of the precursor spectrum is
495 // empty.";
496 }
497
498 // Get the spectrum index of the spectrum that contained the
499 // precursor ion.
500
501 precursor_spectrum_index =
502 msp_msData->run.spectrumListPtr->find(precursor.spectrumID);
503
504 // Note that the Mascot MGF format has a peculiar handling of
505 // the precursor ion stuff so we cannot throw.
506 if(precursor_spectrum_index ==
507 msp_msData->run.spectrumListPtr->size())
508 {
509 if(mcsp_msRunId.get()->getMsDataFormat() !=
511 {
513 "Failed to find the index of the "
514 "precursor ion's spectrum."));
515 }
516 }
517
518 qualified_mass_spectrum.setPrecursorSpectrumIndex(
519 precursor_spectrum_index);
520
521 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
522 // "()"
523 //<< "Set the precursor spectrum index to:"
524 //<< qualified_mass_spectrum.getPrecursorSpectrumIndex()
525 //<< "for qualified mass spectrum:"
526 //<< &qualified_mass_spectrum;
527 }
528
529 if(!precursor.selectedIons.size())
530 {
531 qDebug()
532 << "Going to throw The spectrum has msLevel > 1 but the "
533 "precursor ions's selected ions list is empty..";
534
535 throw(
536 ExceptionNotPossible("The spectrum has msLevel > 1 but the "
537 "precursor ions's selected ions "
538 "list is empty."));
539 }
540
541 pwiz::msdata::SelectedIon &ion =
542 *(precursor.selectedIons.begin());
543
544 // selected ion m/z
545
546 pappso_double selected_ion_mz =
547 QString(
548 ion.cvParam(pwiz::cv::MS_selected_ion_m_z).value.c_str())
549 .toDouble();
550
551 // selected ion peak intensity
552 //<cvParam cvRef="MS" accession="MS:1000042"
553 // value="910663.949707031" name="peak intensity"
554 // unitAccession="MS:1000131" unitName="number of detector counts"
555 // unitCvRef="MS" />
556
557 pappso_double selected_ion_peak_intensity =
558 QString(ion.cvParam(pwiz::cv::MS_peak_intensity).value.c_str())
559 .toDouble();
560
561 // charge state
562
563 unsigned int selected_ion_charge_state =
564 QString(ion.cvParam(pwiz::cv::MS_charge_state).value.c_str())
565 .toUInt();
566
567 // At this point we can craft a new PrecursorIonData instance and
568 // push it back to the vector.
569
570 PrecursorIonData precursor_ion_data(selected_ion_mz,
571 selected_ion_charge_state,
572 selected_ion_peak_intensity);
573
574 qualified_mass_spectrum.appendPrecursorIonData(
575 precursor_ion_data);
576
577 // General sum-up
578
579 // qDebug()
580 //<< "Appended new PrecursorIonData:"
581 //<< "mz:"
582 //<< qualified_mass_spectrum.getPrecursorIonData().back().mz
583 //<< "charge:"
584 //<< qualified_mass_spectrum.getPrecursorIonData().back().charge
585 //<< "intensity:"
586 //<< qualified_mass_spectrum.getPrecursorIonData()
587 //.back()
588 //.intensity;
589 }
590 // End of
591 // for(auto &precursor : spectrum_p->precursors)
592 }
593 // End of
594 // if(precursor_list_size > 0)
595 else
596 {
597 // Sanity check
598
599 // Unfortunately, logic here is defeated by some vendors that have
600 // files with MS2 spectra without <precursorList>. Thus we have
601 // spectrum_p->precursors.size() == 0 and msLevel > 1.
602
603 // if(msLevel != 1)
604 //{
605 // throw(
606 // ExceptionNotPossible("msLevel cannot be different than 1 if "
607 //"there is not a single precursor ion."));
608 //}
609 }
610
611 // Sanity check.
612
613 if(precursor_list_size !=
614 qualified_mass_spectrum.getPrecursorIonData().size())
615 {
616 qDebug() << "Going to throw The number of precursors in the file is "
617 "different from the number of precursors in memory.";
618
620 QObject::tr("The number of precursors in the file is different "
621 "from the number of precursors in memory."));
622 }
623
624 // if(precursor_list_size == 1)
625 //{
626 // qDebug() << "Trying to get the mz value of the unique precursor ion:"
627 //<< qualified_mass_spectrum.getPrecursorMz();
628 //}
629
630 processRetentionTime(spectrum_p, qualified_mass_spectrum);
631
632 processDriftTime(spectrum_p, qualified_mass_spectrum);
633
634 // for(pwiz::data::CVParam cv_param : ion.cvParams)
635 //{
636 // pwiz::msdata::CVID param_id = cv_param.cvid;
637 // qDebug() << param_id;
638 // qDebug() << cv_param.cvid.c_str();
639 // qDebug() << cv_param.name().c_str();
640 // qDebug() << cv_param.value.c_str();
641 //}
642
643 if(want_binary_data)
644 {
645
646 // Fill-in MZIntensityPair vector for convenient access to binary
647 // data
648
649 std::vector<pwiz::msdata::MZIntensityPair> pairs;
650 spectrum_p->getMZIntensityPairs(pairs);
651
652 MassSpectrum spectrum;
653 double tic = 0;
654 // std::size_t iterCount = 0;
655
656 // Iterate through the m/z-intensity pairs
657 for(std::vector<pwiz::msdata::MZIntensityPair>::const_iterator
658 it = pairs.begin(),
659 end = pairs.end();
660 it != end;
661 ++it)
662 {
663 //++iterCount;
664
665 // qDebug() << "it->mz " << it->mz << " it->intensity" <<
666 // it->intensity;
667 if(it->intensity)
668 {
669 spectrum.push_back(DataPoint(it->mz, it->intensity));
670 tic += it->intensity;
671 }
672 }
673
674 if(mcsp_msRunId.get()->getMsDataFormat() == MsDataFormat::MGF)
675 {
676 // Sort peaks by mz
677 spectrum.sortMz();
678 }
679
680 // lc = localeconv ();
681 // qDebug() << " env=" << localeconv () << " lc->decimal_point "
682 // << lc->decimal_point;
683 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()
684 // "<< spectrum.size();
685 MassSpectrumSPtr spectrum_sp = spectrum.makeMassSpectrumSPtr();
686 qualified_mass_spectrum.setMassSpectrumSPtr(spectrum_sp);
687
688 // double sumY =
689 // qualified_mass_spectrum.getMassSpectrumSPtr()->sumY(); qDebug()
690 // <<
691 // __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
692 //<< "iterCount:" << iterCount << "Spectrum size "
693 //<< spectrum.size() << "with tic:" << tic
694 //<< "and sumY:" << sumY;
695 }
696 else
697 qualified_mass_spectrum.setMassSpectrumSPtr(nullptr);
698 }
699 catch(PappsoException &errorp)
700 {
701 qDebug() << "Going to throw";
702
704 QObject::tr("Error reading data using the proteowizard library: %1")
705 .arg(errorp.qwhat()));
706 }
707 catch(std::exception &error)
708 {
709 qDebug() << "Going to throw";
710
712 QObject::tr("Error reading data using the proteowizard library: %1")
713 .arg(error.what()));
714 }
715
716 // setlocale(LC_ALL, env.c_str());
717
718 ok = true;
719
720 // qDebug() << "QualifiedMassSpectrum: " <<
721 // qualified_mass_spectrum.toString();
722 return qualified_mass_spectrum;
723}
724
725
728 bool want_binary_data,
729 bool &ok) const
730{
731
732 std::string env;
733 env = setlocale(LC_ALL, "");
734 // struct lconv *lc = localeconv();
735
736 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
737 //<< "env=" << env.c_str()
738 //<< "lc->decimal_point:" << lc->decimal_point;
739
740 setlocale(LC_ALL, "C");
741
742 MassSpectrumId massSpectrumId(mcsp_msRunId);
743
744 if(msp_msData == nullptr)
745 {
746 setlocale(LC_ALL, env.c_str());
747 return (QualifiedMassSpectrum(massSpectrumId));
748 }
749
750 // const bool want_binary_data = true;
751
752 pwiz::msdata::SpectrumListPtr spectrum_list_p =
753 msp_msData->run.spectrumListPtr;
754
755 if(spectrum_index == spectrum_list_p.get()->size())
756 {
757 setlocale(LC_ALL, env.c_str());
758 throw ExceptionNotFound(
759 QObject::tr("The spectrum index cannot be equal to the size of the "
760 "spectrum list."));
761 }
762
763 // At this point we know the spectrum index might be sane, so store it in
764 // the mass spec id object.
765 massSpectrumId.setSpectrumIndex(spectrum_index);
766
767 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
768 getPwizSpectrumPtr(spectrum_list_p.get(), spectrum_index, want_binary_data);
769
770 setlocale(LC_ALL, env.c_str());
771
772 massSpectrumId.setNativeId(
773 QString::fromStdString(native_pwiz_spectrum_sp->id));
774
776 massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
777}
778
779
780bool
781PwizMsRunReader::accept(const QString &file_name) const
782{
783 // We want to know if we can handle the file_name.
784 pwiz::msdata::ReaderList reader_list;
785
786 std::string reader_type = reader_list.identify(file_name.toStdString());
787
788 if(!reader_type.empty())
789 return true;
790
791 return false;
792}
793
794
796PwizMsRunReader::massSpectrumSPtr(std::size_t spectrum_index)
797{
798 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
799 return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumSPtr();
800}
801
803PwizMsRunReader::massSpectrumCstSPtr(std::size_t spectrum_index)
804{
805 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
806 return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumCstSPtr();
807}
808
810PwizMsRunReader::qualifiedMassSpectrum(std::size_t spectrum_index,
811 bool want_binary_data) const
812{
813
814 QualifiedMassSpectrum spectrum;
815 bool ok = false;
816
817 spectrum =
818 qualifiedMassSpectrumFromPwizMSData(spectrum_index, want_binary_data, ok);
819
820 if(mcsp_msRunId->getMsDataFormat() == pappso::MsDataFormat::MGF)
821 {
822 if(spectrum.getRtInSeconds() == 0)
823 {
824 // spectrum = qualifiedMassSpectrumFromPwizMSData(scan_num - 1);
825 }
826 }
827
828 // if(!ok)
829 // qDebug() << "Encountered a mass spectrum for which the status is bad.";
830
831 return spectrum;
832}
833
834
835void
841
842void
844 [[maybe_unused]] const MsRunReadConfig &config,
846{
847 qDebug();
849}
850
851void
853 SpectrumCollectionHandlerInterface &handler, unsigned int ms_level)
854{
855
857 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()";
858
859 // We want to iterate in the pwiz-spectrum-list and for each pwiz-spectrum
860 // create a pappso-spectrum (QualifiedMassSpectrum). Once the pappso mass
861 // spectrum has been fully qualified (that is, the member data have been
862 // set), it is transferred to the handler passed as parameter to this
863 // function for the consumer to do what it wants with it.
864
865 // Does the handler consuming the mass spectra read from file want these
866 // mass spectra to hold the binary data arrays (mz/i vectors)?
867
868 const bool want_binary_data = handler.needPeakList();
869
870
871 std::string env;
872 env = setlocale(LC_ALL, "");
873 setlocale(LC_ALL, "C");
874
875
876 // We access the pwiz-mass-spectra via the spectrumListPtr that sits in the
877 // run member of msp_msData.
878
879 pwiz::msdata::SpectrumListPtr spectrum_list_p =
880 msp_msData->run.spectrumListPtr;
881
882 // We'll need it to perform the looping in the spectrum list.
883 std::size_t spectrum_list_size = spectrum_list_p.get()->size();
884
885 // qDebug() << "The spectrum list has size:" << spectrum_list_size;
886
887 // Inform the handler of the spectrum list so that it can handle feedback to
888 // the user.
889 handler.spectrumListHasSize(spectrum_list_size);
890
891 // Iterate in the full list of spectra.
892
893 for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
894 {
895
896 // If the user of this reader instance wants to stop reading the
897 // spectra, then break this loop.
898 if(handler.shouldStop())
899 {
900 qDebug() << "The operation was cancelled. Breaking the loop.";
901 break;
902 }
903
904 // Get the native pwiz-spectrum from the spectrum list.
905 // Note that this pointer is a shared pointer from pwiz.
906
907 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
908 getPwizSpectrumPtr(spectrum_list_p.get(), iter, want_binary_data);
909
910 /*
911 * we want to load metadata of the spectrum even if it does not contain
912 peaks
913
914 * if(!native_pwiz_spectrum_sp->hasBinaryData())
915 {
916 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
917 "
918 ()"
919 //<< "native pwiz spectrum is empty, continuing.";
920 continue;
921 }
922 */
923
924 // Instantiate the mass spectrum id that will hold critical information
925 // like the the native id string and the spectrum index.
926
927 MassSpectrumId massSpectrumId(mcsp_msRunId, iter /* spectrum index*/);
928
929 // Get the spectrum native id as a QString to store it in the mass
930 // spectrum id class. This is will allow later to refer to the same
931 // spectrum starting back from the file.
932
933 QString native_id = QString::fromStdString(native_pwiz_spectrum_sp->id);
934 massSpectrumId.setNativeId(native_id);
935
936 // Finally, instantiate the qualified mass spectrum with its id. This
937 // function will continue performing pappso-spectrum detailed
938 // qualification.
939
940 bool ok = false;
941
942 QualifiedMassSpectrum qualified_mass_spectrum =
944 massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
945
946 if(!ok)
947 {
948 // qDebug() << "Encountered a mass spectrum for which the returned "
949 //"status is bad.";
950 continue;
951 }
952
953 // Before handing the mass spectrum out to the handler, see if the
954 // native mass spectrum was empty or not.
955
956 // if(!native_pwiz_spectrum_sp->defaultArrayLength)
957 // qDebug() << "The mass spectrum has not defaultArrayLength";
958
959 qualified_mass_spectrum.setEmptyMassSpectrum(
960 !native_pwiz_spectrum_sp->defaultArrayLength);
961
962 // The handler will receive the index of the mass spectrum in the
963 // current run via the mass spectrum id member datum.
964 if(ms_level == 0)
965 {
966 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
967 }
968 else
969 {
970 if(qualified_mass_spectrum.getMsLevel() == ms_level)
971 {
972 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
973 }
974 }
975 }
976
977 setlocale(LC_ALL, env.c_str());
978 // End of
979 // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
980
981 // Now let the loading handler know that the loading of the data has ended.
982 // The handler might need this "signal" to perform additional tasks or to
983 // cleanup cruft.
984
985 // qDebug() << "Loading ended";
986 handler.loadingEnded();
987}
988
989void
992{
993 qDebug();
995 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()";
996
997 // We want to iterate in the pwiz-spectrum-list and for each pwiz-spectrum
998 // create a pappso-spectrum (QualifiedMassSpectrum). Once the pappso mass
999 // spectrum has been fully qualified (that is, the member data have been
1000 // set), it is transferred to the handler passed as parameter to this
1001 // function for the consumer to do what it wants with it.
1002
1003 // Does the handler consuming the mass spectra read from file want these
1004 // mass spectra to hold the binary data arrays (mz/i vectors)?
1005
1006 const bool want_binary_data = config.needPeakList();
1007
1008
1009 std::string env;
1010 env = setlocale(LC_ALL, "");
1011 setlocale(LC_ALL, "C");
1012
1013
1014 // We access the pwiz-mass-spectra via the spectrumListPtr that sits in the
1015 // run member of msp_msData.
1016
1017 pwiz::msdata::SpectrumListPtr spectrum_list_p =
1018 msp_msData->run.spectrumListPtr;
1019
1020 // We'll need it to perform the looping in the spectrum list.
1021 std::size_t spectrum_list_size = spectrum_list_p.get()->size();
1022
1023 // qDebug() << "The spectrum list has size:" << spectrum_list_size;
1024
1025 // Inform the handler of the spectrum list so that it can handle feedback to
1026 // the user.
1027 handler.spectrumListHasSize(spectrum_list_size);
1028
1029 // Iterate in the full list of spectra.
1030
1031 for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
1032 {
1033
1034
1035 // If the user of this reader instance wants to stop reading the
1036 // spectra, then break this loop.
1037 if(handler.shouldStop())
1038 {
1039 qDebug() << "The operation was cancelled. Breaking the loop.";
1040 break;
1041 }
1042
1043 // Get the native pwiz-spectrum from the spectrum list.
1044 // Note that this pointer is a shared pointer from pwiz.
1045
1046 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
1047 getPwizSpectrumPtr(spectrum_list_p.get(), iter, want_binary_data);
1048
1049 /*
1050 * we want to load metadata of the spectrum even if it does not contain
1051 peaks
1052
1053 * if(!native_pwiz_spectrum_sp->hasBinaryData())
1054 {
1055 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
1056 "
1057 ()"
1058 //<< "native pwiz spectrum is empty, continuing.";
1059 continue;
1060 }
1061 */
1062
1063 // Instantiate the mass spectrum id that will hold critical information
1064 // like the the native id string and the spectrum index.
1065
1066 MassSpectrumId massSpectrumId(mcsp_msRunId, iter /* spectrum index*/);
1067
1068 // Get the spectrum native id as a QString to store it in the mass
1069 // spectrum id class. This is will allow later to refer to the same
1070 // spectrum starting back from the file.
1071
1072 QString native_id = QString::fromStdString(native_pwiz_spectrum_sp->id);
1073 massSpectrumId.setNativeId(native_id);
1074
1075 // Finally, instantiate the qualified mass spectrum with its id. This
1076 // function will continue performing pappso-spectrum detailed
1077 // qualification.
1078
1079 bool ok = false;
1080
1081 QualifiedMassSpectrum qualified_mass_spectrum =
1083 massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
1084
1085 if(!ok)
1086 {
1087 // qDebug() << "Encountered a mass spectrum for which the returned "
1088 //"status is bad.";
1089 continue;
1090 }
1091
1092 // Before handing the mass spectrum out to the handler, see if the
1093 // native mass spectrum was empty or not.
1094
1095 // if(!native_pwiz_spectrum_sp->defaultArrayLength)
1096 // qDebug() << "The mass spectrum has not defaultArrayLength";
1097
1098 qualified_mass_spectrum.setEmptyMassSpectrum(
1099 !native_pwiz_spectrum_sp->defaultArrayLength);
1100
1101 // The handler will receive the index of the mass spectrum in the
1102 // current run via the mass spectrum id member datum.
1103
1104 if(config.acceptMsLevel(qualified_mass_spectrum.getMsLevel()))
1105 {
1107 qualified_mass_spectrum.getRtInSeconds()))
1108 {
1109 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
1110 }
1111 }
1112 }
1113
1114 setlocale(LC_ALL, env.c_str());
1115 // End of
1116 // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
1117
1118 // Now let the loading handler know that the loading of the data has ended.
1119 // The handler might need this "signal" to perform additional tasks or to
1120 // cleanup cruft.
1121
1122 // qDebug() << "Loading ended";
1123 handler.loadingEnded();
1124}
1125
1126std::size_t
1128{
1129 return msp_msData->run.spectrumListPtr.get()->size();
1130}
1131
1132bool
1134{
1135 return m_hasScanNumbers;
1136}
1137
1138bool
1140{
1141 msp_msData = nullptr;
1142 return true;
1143}
1144
1145bool
1147{
1148 if(msp_msData == nullptr)
1149 {
1150 initialize();
1151 }
1152 return true;
1153}
1154
1155
1158 std::size_t spectrum_index, pappso::PrecisionPtr precision) const
1159{
1160
1161 QualifiedMassSpectrum mass_spectrum =
1162 qualifiedMassSpectrum(spectrum_index, false);
1163
1164 return newXicCoordSPtrFromQualifiedMassSpectrum(mass_spectrum, precision);
1165}
1166
1169 const pappso::QualifiedMassSpectrum &mass_spectrum,
1170 pappso::PrecisionPtr precision) const
1171{
1172 XicCoordSPtr xic_coord = std::make_shared<XicCoord>();
1173
1174 xic_coord.get()->rtTarget = mass_spectrum.getRtInSeconds();
1175
1176 xic_coord.get()->mzRange = MzRange(mass_spectrum.getPrecursorMz(), precision);
1177
1178 return xic_coord;
1179}
1180
1181} // namespace pappso
void setNativeId(const QString &native_id)
void setSpectrumIndex(std::size_t index)
Class to represent a mass spectrum.
void sortMz()
Sort the DataPoint instances of this spectrum.
MassSpectrumSPtr makeMassSpectrumSPtr() const
bool acceptMsLevel(std::size_t ms_level) const
bool acceptRetentionTimeInSeconds(double retention_time_in_seconds) const
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition msrunreader.h:63
MsRunIdCstSPtr mcsp_msRunId
virtual const QString & qwhat() const
virtual pappso::XicCoordSPtr newXicCoordSPtrFromSpectrumIndex(std::size_t spectrum_index, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum index
pwiz::msdata::MSDataPtr msp_msData
virtual void readSpectrumCollectionByMsLevel(SpectrumCollectionHandlerInterface &handler, unsigned int ms_level) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler by Ms Levels
virtual void readSpectrumCollectionWithMsrunReadConfig(const MsRunReadConfig &config, SpectrumCollectionHandlerInterface &handler)
bool processDriftTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
virtual MassSpectrumCstSPtr massSpectrumCstSPtr(std::size_t spectrum_index) override
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizMSData(std::size_t spectrum_index, bool want_binary_data, bool &ok) const
PwizMsRunReader(MsRunIdCstSPtr &msrun_id_csp)
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizSpectrumPtr(const MassSpectrumId &massSpectrumId, pwiz::msdata::Spectrum *spectrum_p, bool want_binary_data, bool &ok) const
virtual void readSpectrumCollection2(const MsRunReadConfig &config, SpectrumCollectionHandlerInterface &handler) override
virtual QualifiedMassSpectrum qualifiedMassSpectrum(std::size_t spectrum_index, bool want_binary_data=true) const override
get a QualifiedMassSpectrum class given its scan number
virtual bool hasScanNumbers() const override
tells if spectra can be accessed using scan numbers by default, it returns false. Only overrided func...
bool processRetentionTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
virtual bool acquireDevice() override
acquire data back end device
virtual void initialize() override
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
virtual bool accept(const QString &file_name) const override
tells if the reader is able to handle this file must be implemented by private MS run reader,...
virtual std::size_t spectrumListSize() const override
get the totat number of spectrum conained in the MSrun data file
pwiz::msdata::SpectrumPtr getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list, std::size_t spectrum_index, bool want_binary_data) const
virtual bool releaseDevice() override
release data back end device if a the data back end is released, the developper has to use acquireDev...
virtual pappso::XicCoordSPtr newXicCoordSPtrFromQualifiedMassSpectrum(const pappso::QualifiedMassSpectrum &mass_spectrum, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum
const OboPsiModTerm getOboPsiModTermNativeIDFormat() const
get OboPsiModTerm corresponding to the nativeID format format of mz data
virtual MassSpectrumSPtr massSpectrumSPtr(std::size_t spectrum_index) override
get a MassSpectrumSPtr class given its spectrum index
Class representing a fully specified mass spectrum.
uint getMsLevel() const
Get the mass spectrum level.
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
void setPrecursorNativeId(const QString &native_id)
Set the scan native id of the precursor ion.
const std::vector< PrecursorIonData > & getPrecursorIonData() const
void setDtInMilliSeconds(pappso_double rt)
Set the drift time in milliseconds.
const QString & getPrecursorNativeId() const
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
void setMsLevel(uint ms_level)
Set the mass spectrum level.
void setPrecursorSpectrumIndex(std::size_t precursor_scan_num)
Set the scan number of the precursor ion.
pappso_double getPrecursorMz(bool *ok=nullptr) const
get precursor mz
MassSpectrumSPtr getMassSpectrumSPtr() const
Get the MassSpectrumSPtr.
void setParameterValue(QualifiedMassSpectrumParameter parameter, const QVariant &value)
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
void setEmptyMassSpectrum(bool is_empty_mass_spectrum)
interface to collect spectrums from the MsRunReader class
virtual bool needPeakList() const =0
tells if we need the peak list (if we want the binary data) for each spectrum
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum)=0
static std::string toUtf8StandardString(const QString &text)
Definition utils.cpp:164
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
@ MGF
Mascot format.
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
double pappso_double
A type definition for doubles.
Definition types.h:50
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
std::shared_ptr< MassSpectrum > MassSpectrumSPtr
std::shared_ptr< XicCoord > XicCoordSPtr
Definition xiccoord.h:43
MSrun file reader base on proteowizard library.