58 return QString(
"%1 %2").arg(
name).arg(
value);
64 const QFileInfo &mzident_file)
68 qDebug() << mzident_file.absoluteFilePath() <<
"'";
83 if(!
readFile(mzident_file.absoluteFilePath()))
89 QObject::tr(
"Error reading %1 not mzIdentML file :\n %2")
90 .arg(mzident_file.absoluteFilePath())
96 .arg(mzident_file.absoluteFilePath())
111 .arg(mzident_file.absoluteFilePath())
112 .arg(other_err.
qwhat()));
128 QStringList fasta_files;
131 fasta_files << pair_searchdb.second.file;
142 psm_protein.
isTarget = !pair_sequence.second.is_decoy;
143 psm_protein.
protein_sp = pair_sequence.second.protein_sp;
153 for(std::pair<
const QString, std::vector<SpectrumIdentificationResult>> &pair_spectra :
296 QCborMap identification_engine_parameters;
333 identification_engine_parameters.insert(
354 identification_engine_parameters.toCborValue());
385 QString analysisSoftwareId;
406 if(software_name ==
"SpecOMS")
412 else if(software_name ==
"DeepProt")
433 else if(cv_param.
accession ==
"MS:1002048")
437 else if(cv_param.
accession ==
"MS:1001946")
452 switch(analysisSotwareNameFound)
465 QObject::tr(
"identification results from %1 are not supported yet, "
468 .arg(software_name));
474 std::pair<QString, IdentificationEngine>(analysisSoftwareId, analysisSotwareNameFound));
476 if(it.second ==
false)
478 it.first->second = analysisSotwareNameFound;
513 QObject::tr(
"Error in MzIdentML input file, %1 no DBSequence")
533 QString accession_description = dbsequence.
accession;
552 accession_description.append(
" ").append(dbsequence.
description);
554 else if(cv_param.
accession ==
"MS:1001195")
575 QObject::tr(
"Error in MzIdentML/DBSequence unexpected %1 tag")
583 dbsequence.
protein_sp = std::make_shared<Protein>(accession_description, dbsequence.
sequence);
622 std::shared_ptr<pappso::Peptide> peptide_sp;
631 peptide_sp = std::make_shared<Peptide>(
m_qxmlStreamReader.readElementText().simplified());
637 QObject::tr(
"Error in MzIdentML/Peptide no PeptideSequence"));
654 bool is_cv_param =
false;
697 qInfo() <<
"MzIdentMlSaxHandler::endElement_"
698 "Modification unknown "
709 QObject::tr(
"Error in MzIdentML/Peptide/Modification/cvParam "
710 "modification accession %1 not found")
717 peptide_sp.get()->addAaModification(modif, 0);
721 peptide_sp.get()->addAaModification(modif, modification.
location - 1);
728 QObject::tr(
"Error in MzIdentML/Peptide/Modification "
734 if(is_cv_param ==
false)
741 modif_without_cvparam =
743 peptide_sp.get()->getAa(0).getAminoAcidChar(),
745 peptide_sp.get()->addAaModification(modif_without_cvparam, 0);
749 modif_without_cvparam =
751 peptide_sp.get()->getAa(modification.
location - 1).getAminoAcidChar(),
753 peptide_sp.get()->addAaModification(modif_without_cvparam,
762 m_qxmlStreamReader.raiseError(QObject::tr(
"Error in MzIdentML/Peptide unexpected %1 tag")
768 m_PeptideIdMap.insert(std::pair<QString, PeptideSp>(xml_id, peptide_sp));
782 QObject::tr(
"dBSequence_ref %1 not defined")
787 pe.
protein = itprot->second.protein_sp;
794 QObject::tr(
"peptide_ref %1 not defined")
835 QObject::tr(
"Error in MzIdentML/AnalysisCollection, unexpected %1 t")
873 QObject::tr(
"Error in MzIdentML/DataCollection, unexpected %1 tag")
904 QObject::tr(
"Error in MzIdentML/DataCollection/Inputs, unexpected %1 tag")
948 if(search_database.
file.isEmpty())
950 m_qxmlStreamReader.raiseError(QObject::tr(
"SearchDatabase id %1 location is empty").arg(
id));
1004 QObject::tr(
"Error in "
1005 "MzIdentML/DataCollection/AnalysisData/"
1006 "SpectrumIdentificationList, unexpected %1 tag")
1020 QObject::tr(
"Error in MzIdentML/DataCollection/AnalysisData, "
1021 "unexpected %1 tag")
1035 QString spectra_id =
m_qxmlStreamReader.attributes().value(
"spectraData_ref").toString();
1041 "m_mzidSpectraDataIdMap")
1047 it_insert_ident.first->second.push_back({});
1049 it_insert_ident.first->second.back();
1050 spectrum_identification_result.
cvParamList.clear();
1073 spectrum_identification_result.
cvParamList.push_back(cv_param);
1084 QObject::tr(
"Error in "
1085 "MzIdentML/DataCollection/AnalysisData/"
1086 "SpectrumIdentificationList/"
1087 "SpectrumIdentificationResult, unexpected %1 tag")
1100 spectrum_identification_result.
scanNum = 0;
1104 if(spectrum_identification_result.
spectrumID.startsWith(
"index="))
1108 spectrum_identification_result.
spectrumID.mid(6).toULongLong(&is_ok);
1113 "SpectrumIdentificationResult id %2")
1114 .arg(spectrum_identification_result.
spectrumID)
1115 .arg(spectrum_identification_result.
id));
1122 for(
auto cvParam : spectrum_identification_result.
cvParamList)
1125 qDebug() << cvParam.toString();
1126 if(cvParam.accession ==
"MS:1001115")
1128 spectrum_identification_result.
scanNum = cvParam.value.toUInt();
1130 else if(cvParam.accession ==
"MS:1003062")
1133 spectrum_identification_result.
spectrumIndex = cvParam.value.toUInt();
1135 else if((cvParam.accession ==
"MS:1000016") || (cvParam.accession ==
"MS:1000894"))
1150 spectrum_identification_result.
retentionTime = cvParam.value.toDouble();
1153 if((spectrum_identification_result.
scanNum == 0) &&
1156 m_qxmlStreamReader.raiseError(QObject::tr(
"scan number or spectrum index not found in "
1157 "SpectrumIdentificationResult id %1")
1158 .arg(spectrum_identification_result.
id));
1165 QObject::tr(
"retention time not found in SpectrumIdentificationResult id %1")
1166 .arg(spectrum_identification_result.
id));
1170 for(
auto spectrumIdentificationItem :
1187 .mzidPeptideEvidenceList.clear();
1221 QObject::tr(
"peptideEvidence_ref %1 not defined")
1226 .mzidPeptideEvidenceList.push_back(itpeptideEvidence->second);
1241 .userParamList.push_back(user_param);
1247 "MzIdentML/DataCollection/AnalysisData/"
1248 "SpectrumIdentificationList/"
1249 "SpectrumIdentificationResult/"
1250 "SpectrumIdentificationItem, unexpected %1 tag")
1487 if(spectrum_ident_item.chargeState != charge)
1490 if(spectrum_ident_item.experimentalMassToCharge != exp_mz)
1508 std::map<QString, std::vector<std::size_t>> map_protein_positions;
1512 map_protein_positions.insert({it_peptide_evidence.protein.get()->getAccession(), {}});
1513 it_insert.first->second.push_back(it_peptide_evidence.start);
1517 for(
auto &it_prot_pos : map_protein_positions)
1550 std::find_if(cv_param_list.begin(), cv_param_list.end(), [](
const CvParam &cv_param) {
1551 return cv_param.accession ==
"MS:1001330";
1553 if(it_find != cv_param_list.end())
1560 std::find_if(cv_param_list.begin(), cv_param_list.end(), [](
const CvParam &cv_param) {
1561 return cv_param.accession ==
"MS:1001331";
static AaModificationP getInstance(const QString &accession)
static AaModificationP getInstanceCustomizedMod(pappso_double modificationMass)
excetion to use when an item type is not recognized
virtual const QString & qwhat() const
static AaModificationP guessAaModificationPbyMonoisotopicMassDelta(Enums::AminoAcidChar aa, pappso_double mass)
static AaModificationP translateAaModificationFromUnimod(const QString &unimod_accession)
static QString getVersion()
QString errorString() const
QXmlStreamReader m_qxmlStreamReader
virtual bool readFile(const QString &fileName)
overrides QCborStreamWriter base class to provide convenient functions
pappso::cbor::CborStreamWriter * mp_cborWriter
UserParam readUserParam()
std::map< QString, PeptideSp > m_PeptideIdMap
store association between xml ID and peptide sequence
MzIdentMlReader(pappso::UiMonitorInterface *p_monitor, pappso::cbor::CborStreamWriter *p_output, const QFileInfo &mzident_file)
void readSpectrumIdentification()
IdentificationEngine m_identificationEngine
bool readPeptideEvidence()
virtual void readStream() override
@ SpecOMS
SpecOMS C++ implementation.
@ MSGFplus
MS:1002048 "MS-GF+ software used to analyze the spectra." [PSI:PI].
@ XTandem
MS:1001476 X!Tandem was used to analyze the spectra.
@ PEAKS_Studio
PEAKS Studio.
PsmProteinMap m_proteinMap
bool readAnalysisCollectionItem()
bool readSequenceCollectionItem()
bool writeTandemEval(const std::vector< CvParam > &cv_param_list)
void readSpectrumIdentificationResult()
bool readSearchDatabase()
QCborMap m_cborParameterMap
void writeSpectrumIdentificationItem(const SpectrumIdentificationItem &spectrum_identification_item)
void readAnalysisProtocolCollection()
std::map< QString, MzidPeptideEvidence > m_MzidPeptideEvidenceIdMap
store association between xml ID and peptide evidence
pappso::UiMonitorInterface * mp_monitor
void writeSpectrumIdentificationResult(const SpectrumIdentificationResult &spectrum_identificatio_result)
bool readAnalysisSoftware()
std::map< QString, IdentificationEngine > m_IdentificationEngineMap
store association between xml ID and an identification engine
std::map< QString, MzidSpectraData > m_mzidSpectraDataIdMap
store association between xml ID and SpectraData
std::map< QString, MzidSearchDatabase > m_mzidSearchDatabaseIdMap
store association between xml ID and fasta files
std::map< QString, std::vector< SpectrumIdentificationResult > > m_spectrumIdentificationResultBySpectraIdMap
store all identification results by spectra xml id
bool readDataCollectionItem()
virtual ~MzIdentMlReader()
void readSpectrumIdentificationItem(SpectrumIdentificationResult &spectrum_identification_result)
std::map< QString, MzidDBSequence > m_MzidDBSequenceIdMap
store association between xml ID and DBSequence
QString m_analysisSoftwareVersion
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
const AaModification * AaModificationP
double monoisotopicMassDelta
std::vector< CvParam > cvParamList
QString searchDatabase_ref
std::shared_ptr< Protein > protein_sp
std::vector< CvParam > cvParamList
std::vector< MzidPeptideEvidence > mzidPeptideEvidenceList
std::vector< SpectrumIdentificationItem > spectrumIdentificationItemList
std::vector< UserParam > userParamList
std::vector< CvParam > cvParamList
std::size_t spectrumIndex
std::shared_ptr< Protein > protein_sp