libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
pappso::spectree::SpecTree Class Reference

#include <spectree.h>

Classes

struct  SpecTreeNode
struct  MapSimilarityCountElement
struct  MapSimilarityCount

Public Member Functions

 SpecTree (const BucketClustering &bucket_clustering)
virtual ~SpecTree ()
QString toString () const
const std::vector< std::size_t > & getSpectrumFirstNodeIndex () const
 get the adress map of sepctrum index and their first node index
void xtract (UiMonitorInterface &monitor, SpecXtractInterface &spec_xtract, std::size_t minimum_count, std::size_t cart_id_range_max, std::size_t cart_id_range_min, std::size_t target_cart_id_max, std::size_t target_cart_id_min) const
std::size_t extractSpectrumPairSimilarityCount (std::size_t spectrum_a_index, std::size_t spectrum_b_index) const
 get the number of common component for a pair of spectrum

Private Member Functions

void addNewNode (const SpecTreeNode &node)
void manageSideAccess (std::vector< std::size_t > &spectrumLastNodeIndex)
void walkBackInBranchFromNode (const SpecTree::SpecTreeNode &start_node, MapSimilarityCount &map_count, std::size_t minimum_count, std::size_t target_cart_id_max, std::size_t target_cart_id_min) const
std::size_t walkBackInBranchFromNodeToTarget (const SpecTree::SpecTreeNode &start_node, std::size_t spectrum_index_target) const
void extractSpectrumSimilarityCount (MapSimilarityCount &map_count, std::size_t minimum_count, std::size_t spectrum_index, std::size_t target_cart_id_max, std::size_t target_cart_id_min) const
 get a map of similarities for a given spectrum index

Private Attributes

std::vector< SpecTreeNodem_nodeList
std::vector< std::size_t > m_spectrumFirstNodeIndex

Static Private Attributes

static constexpr std::size_t index_not_defined {std::numeric_limits<std::size_t>::max()}

Detailed Description

Todo
write docs

Definition at line 52 of file spectree.h.

Constructor & Destructor Documentation

◆ SpecTree()

pappso::spectree::SpecTree::SpecTree ( const BucketClustering & bucket_clustering)

Build a SpecTree with BucketClustering

Definition at line 48 of file spectree.cpp.

49{
50
51
52 std::vector<Bucket> bins = bucket_clustering.asSortedList();
53 SpecTreeNode node;
54 node.value = bins[0].getCartList().at(0);
55 node.count = 1;
56 qDebug() << node.value;
57 // Specific processing for the first bucket : the first node and all its
58 // children are added in SpecTrees and the transversal accessor is updated
59 // accordingly
60
61
62 std::vector<std::size_t> spectrumLastNodeIndex;
63 spectrumLastNodeIndex.resize(bucket_clustering.getItemCartCount(), index_not_defined);
64 m_spectrumFirstNodeIndex.resize(bucket_clustering.getItemCartCount(), index_not_defined);
65
66 m_spectrumFirstNodeIndex[node.value] = m_nodeList.size() - 1;
67
68
69 m_nodeList.push_back(node);
70 manageSideAccess(spectrumLastNodeIndex);
71 for(std::size_t cpt = 1; cpt < bins[0].size(); ++cpt)
72 {
73 node.parentIndex = m_nodeList.size() - 1;
74 node.nextIndex = index_not_defined;
75 node.value = bins[0].getCartList().at(cpt);
76 node.count = 1;
77 m_nodeList.push_back(node);
78 manageSideAccess(spectrumLastNodeIndex);
79 }
80
81
82 qDebug();
83 // For the rest of the buckets, we increment the node counter in prefix
84 // common parts without creating new nodes. Once out of the prefix common
85 // part, new nodes are created similarily as they were for the first bucket.
86 for(std::size_t bins_index = 1; bins_index < bins.size(); ++bins_index)
87 {
88 qDebug() << "bins_index=" << bins_index;
89 std::size_t pos = 0;
90 bool common = false;
91 std::size_t parentNode = index_not_defined;
92
93 std::size_t previous_bin_size = bins[bins_index - 1].size();
94
95 // detection of a common first element
96 if(pos < previous_bin_size)
97 {
98 if(bins[bins_index].getCartList().at(pos) == bins[bins_index - 1].getCartList().at(pos))
99 {
100 common = true;
101 }
102 }
103 // insertion procedure for a common prefix part insertion : we go
104 // through existing nodes and increment their counter value by one
105 while(common)
106 {
107 qDebug() << "common pos=" << pos;
108 std::size_t spectrum_index = bins[bins_index].getCartList().at(pos);
109 qDebug() << "common spectrum_index=" << spectrum_index;
110 std::size_t tempNodeIndex = spectrumLastNodeIndex[spectrum_index];
111 qDebug() << "common tempNodeIndex=" << tempNodeIndex;
112 m_nodeList[tempNodeIndex].count++;
113 // checking on the prefix common part persistence conditions ; if
114 // not valid, we are not in a common prefix part anymore for the
115 // currently inserted bucket
116 if((previous_bin_size - 1 == pos) || (bins[bins_index].size() - 1 == pos))
117 {
118 qDebug() << "last one";
119 common = false;
120 }
121 else
122 {
123 qDebug() << "bins[bins_index - 1].getSpectrumIndex(pos + 1)="
124 << bins[bins_index - 1].getCartList().at(pos + 1);
125 qDebug() << "bins[bins_index].getSpectrumIndex(pos + 1)="
126 << bins[bins_index].getCartList().at(pos + 1);
127 if(bins[bins_index - 1].getCartList().at(pos + 1) !=
128 bins[bins_index].getCartList().at(pos + 1))
129 {
130 common = false;
131 }
132 }
133 parentNode = tempNodeIndex;
134 ++pos;
135
136 qDebug() << "common=" << common;
137 }
138 qDebug();
139 // insertion procedure for all the remaining nodes not in the prefix
140 // common part : we create new nodes with a counter value of 1 and
141 // update the transversal accessor and last inserted node list
142 while(pos < bins[bins_index].size())
143 {
144 std::size_t spectrum_index = bins[bins_index].getCartList().at(pos);
145
146 node.parentIndex = parentNode;
147 node.nextIndex = index_not_defined;
148 node.value = spectrum_index;
149 node.count = 1;
150 m_nodeList.push_back(node);
151 manageSideAccess(spectrumLastNodeIndex);
152 parentNode = m_nodeList.size() - 1;
153
154 ++pos;
155 }
156 }
157}
std::vector< SpecTreeNode > m_nodeList
Definition spectree.h:169
static constexpr std::size_t index_not_defined
Definition spectree.h:117
std::vector< std::size_t > m_spectrumFirstNodeIndex
Definition spectree.h:170
void manageSideAccess(std::vector< std::size_t > &spectrumLastNodeIndex)
Definition spectree.cpp:170

References pappso::spectree::BucketClustering::asSortedList(), pappso::spectree::SpecTree::SpecTreeNode::count, pappso::spectree::BucketClustering::getItemCartCount(), index_not_defined, m_nodeList, m_spectrumFirstNodeIndex, manageSideAccess(), pappso::spectree::SpecTree::SpecTreeNode::nextIndex, pappso::spectree::SpecTree::SpecTreeNode::parentIndex, and pappso::spectree::SpecTree::SpecTreeNode::value.

◆ ~SpecTree()

pappso::spectree::SpecTree::~SpecTree ( )
virtual

Destructor

Definition at line 159 of file spectree.cpp.

160{
161}

Member Function Documentation

◆ addNewNode()

void pappso::spectree::SpecTree::addNewNode ( const SpecTreeNode & node)
private

Definition at line 164 of file spectree.cpp.

165{
166 m_nodeList.push_back(node);
167}

References m_nodeList.

◆ extractSpectrumPairSimilarityCount()

std::size_t pappso::spectree::SpecTree::extractSpectrumPairSimilarityCount ( std::size_t spectrum_a_index,
std::size_t spectrum_b_index ) const

get the number of common component for a pair of spectrum

Parameters
spectrum_a_indexthe first spectrum index
spectrum_b_indexthe second spectrum index
Returns
integer the number of common component between spectrum

Definition at line 418 of file spectree.cpp.

420{
421 if(spectrum_a_index > spectrum_b_index)
422 std::swap(spectrum_a_index, spectrum_b_index);
423
424 std::size_t count_target = 0;
425 if(spectrum_b_index < m_spectrumFirstNodeIndex.size())
426 {
427 std::size_t node_index = m_spectrumFirstNodeIndex[spectrum_b_index];
428 while(node_index != index_not_defined)
429 {
430 qDebug() << "node_index=" << node_index;
431 const SpecTreeNode &current_node = m_nodeList[node_index];
432 // go back in the tree branch from the current_node
433 count_target += walkBackInBranchFromNodeToTarget(current_node, spectrum_a_index);
434 node_index = current_node.nextIndex;
435 }
436 }
437 else
438 {
439 throw pappso::ExceptionOutOfRange(
440 QObject::tr("spectrum_index %1 out of range (spectrum_index max size=%2)")
441 .arg(spectrum_b_index)
442 .arg(m_spectrumFirstNodeIndex.size()));
443 }
444 return count_target;
445}
std::size_t walkBackInBranchFromNodeToTarget(const SpecTree::SpecTreeNode &start_node, std::size_t spectrum_index_target) const
Definition spectree.cpp:361

References index_not_defined, m_nodeList, m_spectrumFirstNodeIndex, pappso::spectree::SpecTree::SpecTreeNode::nextIndex, and walkBackInBranchFromNodeToTarget().

◆ extractSpectrumSimilarityCount()

void pappso::spectree::SpecTree::extractSpectrumSimilarityCount ( MapSimilarityCount & map_count,
std::size_t minimum_count,
std::size_t spectrum_index,
std::size_t target_cart_id_max,
std::size_t target_cart_id_min ) const
private

get a map of similarities for a given spectrum index

this function can only retrieve spectrum index map lower than the spectrum index given in the parameters (check original publication for details)

Parameters
spectrum_indexthe spectrum index to retrieve similarities
spectrum_index_lower_limitlower spectrum index limit to save CPU (do not use it if you want a full similarity map)
Returns
map of spectrum_index keys containing the corresponding count value (similarity) for the targeted spectrum index

Extraction of the similarities above a given threshold between a given spectrum and all other spectra higher located in SpecTrees.

Parameters
tree_indicesSpecTrees the data has to be extracted from
spectrum_indexThe spectrum with ated in SpecTrees.
tree_indicesSpecTrees the data has to be extracted from
spectrum_indexThe spectrum with which we want to extract similarities
minimum_countThe threshold to use for the extraction
Since
0.1

Definition at line 393 of file spectree.cpp.

398{
399
400 qDebug() << " spectrum_index=" << spectrum_index;
401 // sOne;
402 // we position the current node at the first occurence of the spectrum in
403 // the transversal accessor and initialise variablethresholds accordingly
404 std::size_t node_index = m_spectrumFirstNodeIndex[spectrum_index];
405 while(node_index != index_not_defined)
406 {
407 qDebug() << "node_index=" << node_index;
408 const SpecTreeNode &current_node = m_nodeList[node_index];
409 // go back in the tree branch from the current_node
411 current_node, map_count, minimum_count, target_cart_id_max, target_cart_id_min);
412 node_index = current_node.nextIndex;
413 }
414 qDebug();
415}
void walkBackInBranchFromNode(const SpecTree::SpecTreeNode &start_node, MapSimilarityCount &map_count, std::size_t minimum_count, std::size_t target_cart_id_max, std::size_t target_cart_id_min) const
Definition spectree.cpp:307

References index_not_defined, m_nodeList, m_spectrumFirstNodeIndex, pappso::spectree::SpecTree::SpecTreeNode::nextIndex, and walkBackInBranchFromNode().

Referenced by xtract().

◆ getSpectrumFirstNodeIndex()

const std::vector< std::size_t > & pappso::spectree::SpecTree::getSpectrumFirstNodeIndex ( ) const

get the adress map of sepctrum index and their first node index

convenience function intended for testing

Returns
vector of the size of max spectrum index containing list of first node index

Definition at line 219 of file spectree.cpp.

220{
222}

References m_spectrumFirstNodeIndex.

◆ manageSideAccess()

void pappso::spectree::SpecTree::manageSideAccess ( std::vector< std::size_t > & spectrumLastNodeIndex)
private

Definition at line 170 of file spectree.cpp.

171{
172 auto spectrum_index = m_nodeList.back().value;
173 auto node_position = m_nodeList.size() - 1;
174 if(m_spectrumFirstNodeIndex[spectrum_index] == index_not_defined)
175 {
176 // first occurence of this spectrum
177 m_spectrumFirstNodeIndex[spectrum_index] = node_position;
178 spectrumLastNodeIndex[spectrum_index] = node_position;
179 }
180 else
181 {
182 if(spectrumLastNodeIndex[spectrum_index] == node_position)
183 {
184 }
185 else
186 {
187 m_nodeList[spectrumLastNodeIndex[spectrum_index]].nextIndex = node_position;
188 spectrumLastNodeIndex[spectrum_index] = node_position;
189 }
190 }
191}

References index_not_defined, m_nodeList, and m_spectrumFirstNodeIndex.

Referenced by SpecTree().

◆ toString()

QString pappso::spectree::SpecTree::toString ( ) const

Definition at line 194 of file spectree.cpp.

195{
196 QString node_str("node|spectrum|parent|next|count|\n");
197 std::size_t i = 0;
198 for(auto node : m_nodeList)
199 {
200 int parent = -1;
201 if(node.parentIndex < index_not_defined)
202 parent = node.parentIndex;
203 int next = -1;
204 if(node.nextIndex < index_not_defined)
205 next = node.nextIndex;
206 node_str.append(QString("node_%1|%2|%3|%4|%5end\n")
207 .arg(i)
208 .arg(node.value)
209 .arg(parent)
210 .arg(next)
211 .arg(node.count));
212
213 i++;
214 }
215 return node_str;
216}

References index_not_defined, and m_nodeList.

◆ walkBackInBranchFromNode()

void pappso::spectree::SpecTree::walkBackInBranchFromNode ( const SpecTree::SpecTreeNode & start_node,
MapSimilarityCount & map_count,
std::size_t minimum_count,
std::size_t target_cart_id_max,
std::size_t target_cart_id_min ) const
private

Definition at line 307 of file spectree.cpp.

312{
313
314 std::size_t parent_id = start_node.parentIndex;
315
316 qDebug() << " start node spectrum=" << start_node.value << " parent_id=" << parent_id
317 << " init_count=" << start_node.count << " minimum_count=" << minimum_count
318 << " target_cart_id_min=" << target_cart_id_min;
319
320 while(parent_id != index_not_defined)
321 {
322 // next_node :
323 const SpecTree::SpecTreeNode &current_node = m_nodeList[parent_id];
324
325 if(current_node.value < target_cart_id_min)
326 break;
327 if(current_node.value <= target_cart_id_max)
328 {
329 auto &map_id_count = map_count.map_id_count[current_node.value];
330 if(map_id_count.lastWitness == start_node.value)
331 {
332 map_id_count.count += start_node.count;
333 // if at one time, a similarity exceeds the threshold value for
334 // the first time, it is written into the proper stack
335 if((map_id_count.count >= minimum_count) && (map_id_count.aboveThreshold == false))
336 {
337 map_id_count.aboveThreshold = true;
338 map_count.aboveThreshold.push_back(current_node.value);
339 }
340 }
341 else
342 {
343 map_id_count.lastWitness = start_node.value;
344 map_id_count.count = start_node.count;
345 map_id_count.aboveThreshold = false;
346 // if at one time, a similarity exceeds the threshold value for
347 // the first time, it is written into the proper stack
348 if(map_id_count.count >= minimum_count)
349 {
350 map_id_count.aboveThreshold = true;
351 map_count.aboveThreshold.push_back(current_node.value);
352 }
353 map_count.keys.push_back(current_node.value);
354 }
355 }
356 parent_id = current_node.parentIndex;
357 }
358}

References pappso::spectree::SpecTree::MapSimilarityCount::aboveThreshold, pappso::spectree::SpecTree::SpecTreeNode::count, index_not_defined, pappso::spectree::SpecTree::MapSimilarityCount::keys, m_nodeList, pappso::spectree::SpecTree::MapSimilarityCount::map_id_count, pappso::spectree::SpecTree::SpecTreeNode::parentIndex, and pappso::spectree::SpecTree::SpecTreeNode::value.

Referenced by extractSpectrumSimilarityCount().

◆ walkBackInBranchFromNodeToTarget()

std::size_t pappso::spectree::SpecTree::walkBackInBranchFromNodeToTarget ( const SpecTree::SpecTreeNode & start_node,
std::size_t spectrum_index_target ) const
private

Definition at line 361 of file spectree.cpp.

363{
364 std::size_t parent_id = start_node.parentIndex;
365
366 while(parent_id != index_not_defined)
367 {
368 // next_node :
369 const SpecTree::SpecTreeNode &current_node = m_nodeList[parent_id];
370
371 if(current_node.value < spectrum_index_target)
372 return 0;
373 if(current_node.value == spectrum_index_target)
374 return start_node.count;
375
376 parent_id = current_node.parentIndex;
377 }
378 return 0;
379}

References pappso::spectree::SpecTree::SpecTreeNode::count, index_not_defined, m_nodeList, pappso::spectree::SpecTree::SpecTreeNode::parentIndex, and pappso::spectree::SpecTree::SpecTreeNode::value.

Referenced by extractSpectrumPairSimilarityCount().

◆ xtract()

void pappso::spectree::SpecTree::xtract ( UiMonitorInterface & monitor,
SpecXtractInterface & spec_xtract,
std::size_t minimum_count,
std::size_t cart_id_range_max,
std::size_t cart_id_range_min,
std::size_t target_cart_id_max,
std::size_t target_cart_id_min ) const

Extract all similarities above a threshold value between all spectra from the deepest one up to the given limit using the predefined extraction algorithm. The results are stored in the mentionned reporter.

Performs the extraction of the similarities above a given threshold for all spectra between the deepest one and a given index in the transversal accessor. The extracted pairs are feed to a shifter to try to improve the spectrum identification. Afterwards, the retained results are written to the given reporter. This extraction step represents most of SpecOMS execution time. Note: This function heavily evolved through the development process to match new needs, it became clumsy and heavy. A clean refactor might be appropriate.

Parameters
monitorprogress monitor, indicates progression in spectree
spec_xtractreport results of similarities to the user (write in file or consolidate)
cart_id_range_maxThe position in the transversal accessor from which the similarities must be reported (item cart id)
cart_id_range_minThe position in the transversal accessor up to which the similarities must be reported (item cart id)
target_cart_id_maxtargeted item cart id relevant for comparison
target_cart_id_mintargeted item cart id relevant for comparison

Definition at line 225 of file spectree.cpp.

232{
233
234 if(cart_id_range_max < cart_id_range_min)
235 {
236 std::swap(cart_id_range_max, cart_id_range_min);
237 }
238
239 if(cart_id_range_max < m_spectrumFirstNodeIndex.size())
240 {
241
242 monitor.setStatus(QObject::tr("starting SpecXtract operation"));
243
244 MapSimilarityCount map_count;
245 map_count.map_id_count.resize(cart_id_range_max);
246
247 // initialisation of the structure to store the compared pairs
248 // int[][] results = new int[m_count][3];
249 // int count = 0;
250
251 // iteration on the transversal accessor starting from the deepest spetrum
252 // and climbing up to limit
253 std::size_t spectra1 = cart_id_range_max;
254 monitor.setTotalSteps(cart_id_range_max - cart_id_range_min);
255 while(true)
256 {
257
258 if(monitor.shouldIstop())
259 {
260 throw pappso::ExceptionInterrupted(QObject::tr("SpecXtract job stopped"));
261 }
262 qDebug() << "spectra1=" << spectra1;
263 spec_xtract.beginItemCartExtraction(spectra1);
264 monitor.count();
265 // System.out.println(start);
266
267 map_count.keys.clear();
268 map_count.aboveThreshold.clear();
269
271 map_count, minimum_count, spectra1, target_cart_id_max, target_cart_id_min);
272
273 qDebug() << "spectra1=" << spectra1
274 << " map_count.aboveThreshold.size()=" << map_count.aboveThreshold.size()
275 << " map_count.keys.size()=" << map_count.keys.size();
276
277 for(std::size_t spectra2 : map_count.aboveThreshold)
278 {
279
280 // std::size_t spectra2 = *it;
281 qDebug() << "spectra2=" << spectra2;
282
283 // std::size_t total_extracted_count =
284 // map_count.map_id_count.at(spectra2);
285 spec_xtract.reportSimilarity(
286 spectra1, spectra2, map_count.map_id_count.at(spectra2).count);
287 }
288
289 spec_xtract.endItemCartExtraction(spectra1);
290 // monitor.count();
291 if(spectra1 == cart_id_range_min)
292 break;
293 spectra1--;
294 }
295 }
296 else
297 {
298 throw pappso::ExceptionOutOfRange(
299 QObject::tr("item cart index %1 out of range (item cart id max size=%2)")
300 .arg(cart_id_range_max)
301 .arg(m_spectrumFirstNodeIndex.size()));
302 }
303 qDebug();
304}
void extractSpectrumSimilarityCount(MapSimilarityCount &map_count, std::size_t minimum_count, std::size_t spectrum_index, std::size_t target_cart_id_max, std::size_t target_cart_id_min) const
get a map of similarities for a given spectrum index
Definition spectree.cpp:393
std::vector< MapSimilarityCountElement > map_id_count
Definition spectree.h:136

References pappso::spectree::SpecTree::MapSimilarityCount::aboveThreshold, pappso::spectree::SpecXtractInterface::beginItemCartExtraction(), pappso::UiMonitorInterface::count(), pappso::spectree::SpecXtractInterface::endItemCartExtraction(), extractSpectrumSimilarityCount(), pappso::spectree::SpecTree::MapSimilarityCount::keys, m_spectrumFirstNodeIndex, pappso::spectree::SpecTree::MapSimilarityCount::map_id_count, pappso::spectree::SpecXtractInterface::reportSimilarity(), pappso::UiMonitorInterface::setStatus(), pappso::UiMonitorInterface::setTotalSteps(), and pappso::UiMonitorInterface::shouldIstop().

Member Data Documentation

◆ index_not_defined

std::size_t pappso::spectree::SpecTree::index_not_defined {std::numeric_limits<std::size_t>::max()}
staticconstexprprivate

◆ m_nodeList

◆ m_spectrumFirstNodeIndex

std::vector<std::size_t> pappso::spectree::SpecTree::m_spectrumFirstNodeIndex
private

The documentation for this class was generated from the following files: