libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
sagereader.cpp
Go to the documentation of this file.
1/**
2 * \file input/sage/sagereader.cpp
3 * \date 21/08/2024
4 * \author Olivier Langella
5 * \brief read data files from Sage output
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2024 Olivier Langella
10 *<Olivier.Langella@universite-paris-saclay.fr>.
11 *
12 * This file is part of i2MassChroQ.
13 *
14 * i2MassChroQ is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * i2MassChroQ is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with i2MassChroQ. If not, see <http://www.gnu.org/licenses/>.
26 *
27 ******************************************************************************/
28
29#include "sagereader.h"
30#include <QJsonObject>
31#include <QJsonArray>
32#include <odsstream/tsvreader.h>
33#include <odsstream/odsexception.h>
34#include <qurl.h>
35#include "sagetsvhandler.h"
40
43 const pappso::cbor::psm::SageFileReader &sage_file_reader,
44 const QString &sage_json_file)
45 : m_sageFileReader(sage_file_reader)
46{
47 mp_monitor = p_monitor;
48 mp_cborWriter = p_output;
49 m_jsonAbsoluteFilePath = sage_json_file;
50}
51
55
56const QString &
61
62
63void
68
74
75void
77 const QString &sequence_in)
78{
79 QString accession = description_in.split(" ", Qt::SkipEmptyParts).at(0);
80 try
81 {
82 const PsmProtein &psm_protein = mp_self->m_psmProteinMap.getByAccession(accession);
83 psm_protein.protein_sp.get()->setSequence(sequence_in);
84 psm_protein.protein_sp.get()->setDescription(description_in);
85 }
87 {
88 }
89 try
90 {
91 accession = accession.prepend(m_decoyTag);
92 const PsmProtein &psm_protein = mp_self->m_psmProteinMap.getByAccession(accession);
93 psm_protein.protein_sp.get()->setSequence(sequence_in);
94 psm_protein.protein_sp.get()->setDescription(description_in);
95 psm_protein.protein_sp.get()->reverse();
96 }
98 {
99 }
100}
101
107
108
109void
111{
112
113 extractMzmlPathList(m_sageFileReader.getJsonDocument());
114 // getTsvFilePath(mp_identificationDataSource->getJsonDocument().object());
115 QUrl tsv_url(getTsvFilePath(m_sageFileReader.getJsonDocument()));
116 QFileInfo tsv_file_info(tsv_url.toLocalFile());
117 if(!tsv_url.isLocalFile())
118 {
119 tsv_file_info.setFile(getTsvFilePath(m_sageFileReader.getJsonDocument()));
120 }
122 try
123 {
124 TsvReader tsv_reader(handler);
125
126 QFile tsv_file(tsv_file_info.absoluteFilePath());
127 tsv_reader.parse(tsv_file);
128 tsv_file.close();
129 }
130 catch(OdsException &error_ods)
131 {
132 throw pappso::PappsoException(QObject::tr("Error reading %1 file:\n %2")
133 .arg(tsv_file_info.absoluteFilePath())
134 .arg(error_ods.qwhat()));
135 }
136
137
138 // collect protein sequences
139 QFile fastaFile(getFastaFilePath(m_sageFileReader.getJsonDocument()));
140 SageReader::FastaSeq seq(this);
141 pappso::FastaReader reader(seq);
142 reader.parse(fastaFile);
143
144 qDebug();
145 mp_cborWriter->append("protein_map");
147
148
149 mp_cborWriter->append("sample_list");
150 mp_cborWriter->startArray();
151 try
152 {
153 handler.writeSampleList();
154 }
155 catch(OdsException &error_ods)
156 {
157 throw pappso::PappsoException(QObject::tr("Error reading %1 file:\n %2")
158 .arg(tsv_file_info.absoluteFilePath())
159 .arg(error_ods.qwhat()));
160 }
161
162 mp_cborWriter->endArray();
163}
164
165void
167{
168
169 QJsonObject sage_object = json_doc.object();
170 QJsonValue json_mzml_path_list = sage_object.value("mzml_paths");
171 if(json_mzml_path_list.isUndefined())
172 {
173 throw pappso::ExceptionNotFound(QObject::tr("mzml_paths not found in Sage json document"));
174 }
175 m_mzmlPathList.clear();
176
177 for(auto path_mzml : json_mzml_path_list.toArray())
178 {
179 m_mzmlPathList << path_mzml.toString();
180 }
181}
182
183const QString &
184pappso::cbor::psm::SageReader::getMzmlPath(const QString &file_msrun) const
185{
186 for(auto &file_path : m_mzmlPathList)
187 {
188 if(file_path.endsWith(file_msrun))
189 return file_path;
190 }
192 QObject::tr("MS run %1 not found in Sage json document").append(file_msrun));
193}
194
195
196QString
198{
199 QString path;
200 QJsonObject sage_object = json_doc.object();
201 QJsonValue output_path = sage_object.value("output_paths");
202 if(output_path.isUndefined())
203 {
204 throw pappso::ExceptionNotFound(QObject::tr("output_paths not found in Sage json document"));
205 }
206
207 if(!output_path.isArray())
208 {
209 throw pappso::ExceptionNotFound(QObject::tr("output_paths is not an array"));
210 }
211 for(auto element : output_path.toArray())
212 {
213 if(element.isString())
214 {
215 if(element.toString().endsWith(".tsv"))
216 {
217 path = element.toString();
218 }
219 }
220 }
221 return path;
222}
223
224QString
226{
227 QString path;
228 QJsonObject sage_object = json_doc.object();
229 QJsonValue database = sage_object.value("database");
230 if(database.isUndefined())
231 {
232 throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
233 }
234 path = database.toObject().value("fasta").toString();
235 if(path.isEmpty())
236 {
237 throw pappso::ExceptionNotFound(QObject::tr("fasta value is empty"));
238 }
239 return path;
240}
241
242
243std::vector<pappso::cbor::psm::SageReader::SageModification>
245{
246 std::vector<SageReader::SageModification> list;
247 QJsonObject sage_object = m_sageFileReader.getJsonDocument().object();
248 QJsonValue database = sage_object.value("database");
249 if(database.isUndefined())
250 {
251 throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
252 }
253
254 QJsonValue static_mods = database.toObject().value("static_mods");
255 if(static_mods.isUndefined())
256 {
257 throw pappso::ExceptionNotFound(QObject::tr("static_mods not found in Sage json document"));
258 }
259 for(QString residue_str : static_mods.toObject().keys())
260 {
261 SageModification modif;
262 modif.residue = residue_str.at(0);
264 (Enums::AminoAcidChar)modif.residue.toLatin1(),
265 static_mods.toObject().value(residue_str).toDouble());
266 modif.strModification =
267 QString::number(static_mods.toObject().value(residue_str).toDouble(), 'f', 6);
268 if(modif.strModification.isEmpty())
269 {
270 throw pappso::PappsoException(QObject::tr(" modif.strModification is empty"));
271 }
272 if(modif.modification->getMass() < 0)
273 {
274 modif.strModification = QString("[%1]").arg(modif.strModification);
275 }
276 else
277 {
278 modif.strModification = QString("[+%1]").arg(modif.strModification);
279 }
280 list.push_back(modif);
281 }
282 return list;
283}
284
285std::vector<pappso::cbor::psm::SageReader::SageModification>
287{
288 std::vector<SageReader::SageModification> list;
289 QJsonObject sage_object = m_sageFileReader.getJsonDocument().object();
290 QJsonValue database = sage_object.value("database");
291 if(database.isUndefined())
292 {
293 throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
294 }
295
296 QJsonValue var_mods = database.toObject().value("variable_mods");
297 if(var_mods.isUndefined())
298 {
299 throw pappso::ExceptionNotFound(QObject::tr("static_mods not found in Sage json document"));
300 }
301 for(QString residue_str : var_mods.toObject().keys())
302 {
303 SageModification modif;
304 modif.residue = residue_str.at(0);
305 for(QJsonValue one_mass : var_mods.toObject().value(residue_str).toArray())
306 {
308 (Enums::AminoAcidChar)modif.residue.toLatin1(), one_mass.toDouble());
309 modif.strModification = QString::number(one_mass.toDouble(), 'f', 6);
310 if(modif.strModification.isEmpty())
311 {
312 throw pappso::PappsoException(QObject::tr(" modif.strModification is empty"));
313 }
314 if(modif.modification->getMass() < 0)
315 {
316 modif.strModification = QString("[%1]").arg(modif.strModification);
317 }
318 else
319 {
320 modif.strModification = QString("[+%1]").arg(modif.strModification);
321 }
322 list.push_back(modif);
323 }
324 }
325 return list;
326}
327
328QString
330{
331 QString path;
332 QJsonObject sage_object = m_sageFileReader.getJsonDocument().object();
333 QJsonValue database = sage_object.value("database");
334 if(database.isUndefined())
335 {
336 throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
337 }
338 path = database.toObject().value("decoy_tag").toString();
339 if(path.isEmpty())
340 {
341 throw pappso::ExceptionNotFound(QObject::tr("decoy_tag value is empty"));
342 }
343 return path;
344}
345
pappso_double getMass() const
void parse(QFile &fastaFile)
static AaModificationP guessAaModificationPbyMonoisotopicMassDelta(Enums::AminoAcidChar aa, pappso_double mass)
Definition utils.cpp:658
overrides QCborStreamWriter base class to provide convenient functions
void setSequence(const QString &description_in, const QString &sequence_in) override
const QString & getMzmlPath(const QString &file_msrun) const
SageReader(pappso::UiMonitorInterface *p_monitor, pappso::cbor::CborStreamWriter *p_output, const SageFileReader &sage_file_reader, const QString &sage_json_file)
std::vector< SageModification > getStaticModificationList() const
pappso::cbor::CborStreamWriter * mp_cborWriter
Definition sagereader.h:99
const QString & getmJsonAbsoluteFilePath() const
void extractMzmlPathList(const QJsonDocument &json_doc)
pappso::cbor::CborStreamWriter & getCborStreamWriter() const
const SageFileReader & m_sageFileReader
Definition sagereader.h:97
std::vector< SageModification > getVariableModificationList() const
QString getFastaFilePath(const QJsonDocument &json_doc)
const SageFileReader & getSageFileReader() const
QString getTsvFilePath(const QJsonDocument &json_doc)
pappso::UiMonitorInterface * mp_monitor
Definition sagereader.h:98
std::shared_ptr< Protein > protein_sp