libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
spectree.h
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/spectree/spectree.h
3 * \date 11/12/2023
4 * \author Olivier Langella
5 * \brief Matthieu David's SpecTree structure
6 *
7 * C++ implementation of algorithm already described in :
8 * 1. David, M., Fertin, G., Rogniaux, H. & Tessier, D. SpecOMS: A Full Open
9 * Modification Search Method Performing All-to-All Spectra Comparisons within
10 * Minutes. J. Proteome Res. 16, 3030–3038 (2017).
11 *
12 * https://www.theses.fr/2019NANT4092
13 */
14
15
16/*
17 * SpecTree
18 * Copyright (C) 2023 Olivier Langella
19 * <olivier.langella@universite-paris-saclay.fr>
20 *
21 * This program is free software: you can redistribute ipetide to spectrum
22 * alignmentt and/or modify it under the terms of the GNU General Public License
23 * as published by the Free Software Foundation, either version 3 of the
24 * License, or (at your option) any later version.
25 *
26 * This program is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with this program. If not, see <http://www.gnu.org/licenses/>.
33 *
34 */
35
36#pragma once
37
38#include "bucketclustering.h"
39#include <cstdlib>
40#include <QString>
41#include "../uimonitor/uimonitorinterface.h"
42#include "specxtractinterface.h"
43
44namespace pappso
45{
46namespace spectree
47{
48
49/**
50 * @todo write docs
51 */
53{
54 public:
55 /**
56 * Build a SpecTree with BucketClustering
57 */
58 SpecTree(const BucketClustering &bucket_clustering);
59
60 /**
61 * Destructor
62 */
63 virtual ~SpecTree();
64
65 QString toString() const;
66
67 /** @brief get the adress map of sepctrum index and their first node index
68 *
69 * convenience function intended for testing
70 * @return vector of the size of max spectrum index containing list of first
71 * node index
72 */
73 const std::vector<std::size_t> &getSpectrumFirstNodeIndex() const;
74
75
76 /**
77 * Extract all similarities above a threshold value between all spectra from
78 * the deepest one up to the given limit using the predefined extraction
79 * algorithm. The results are stored in the mentionned reporter.
80 *
81 * Performs the extraction of the similarities above a given threshold for all
82 * spectra between the deepest one and a given index in the transversal
83 * accessor. The extracted pairs are feed to a shifter to try to improve the
84 * spectrum identification. Afterwards, the retained results are written to
85 * the given reporter. This extraction step represents most of SpecOMS
86 * execution time. Note: This function heavily evolved through the development
87 * process to match new needs, it became clumsy and heavy. A clean refactor
88 * might be appropriate.
89 *
90 * @param monitor progress monitor, indicates progression in spectree
91 * @param spec_xtract report results of similarities to the user (write in
92 * file or consolidate)
93 * @param cart_id_range_max The position in the transversal accessor from
94 * which the similarities must be reported
95 * @param cart_id_range_min The position in the transversal accessor up to
96 * which the similarities must be reported
97 * @param item_cart_index_lower_limit lower spectrum index limit to save CPU
98 * (do not use it if you want a full similarity map)
99 */
100 void xtract(UiMonitorInterface &monitor,
101 SpecXtractInterface &spec_xtract,
102 std::size_t minimum_count,
103 std::size_t cart_id_range_max,
104 std::size_t cart_id_range_min,
105 std::size_t target_cart_id_max,
106 std::size_t target_cart_id_min) const;
107
108 /** @brief get the number of common component for a pair of spectrum
109 * @param spectrum_a_index the first spectrum index
110 * @param spectrum_b_index the second spectrum index
111 * @return integer the number of common component between spectrum
112 */
113 std::size_t
114 extractSpectrumPairSimilarityCount(std::size_t spectrum_a_index,
115 std::size_t spectrum_b_index) const;
116
117 private:
118 static constexpr std::size_t index_not_defined{
119 std::numeric_limits<std::size_t>::max()};
121 {
122 std::size_t parentIndex = index_not_defined;
123 std::size_t nextIndex = index_not_defined;
124 std::size_t value;
125 std::size_t count;
126 };
127
129 {
130 std::size_t lastWitness = index_not_defined;
131 std::size_t count = 0;
132 bool aboveThreshold = false;
133 };
135 {
136 std::vector<std::size_t> keys;
137 std::vector<std::size_t> aboveThreshold;
138 std::vector<MapSimilarityCountElement> map_id_count;
139 };
140
141 void addNewNode(const SpecTreeNode &node);
142 void manageSideAccess(std::vector<std::size_t> &spectrumLastNodeIndex);
143 void walkBackInBranchFromNode(const SpecTree::SpecTreeNode &start_node,
144 MapSimilarityCount &map_count,
145 std::size_t minimum_count,
146 std::size_t target_cart_id_max,
147 std::size_t target_cart_id_min) const;
148
149 std::size_t
150 walkBackInBranchFromNodeToTarget(const SpecTree::SpecTreeNode &start_node,
151 std::size_t spectrum_index_target) const;
152
153 /** @brief get a map of similarities for a given spectrum index
154 *
155 * this function can only retrieve spectrum index map lower than the spectrum
156 * index given in the parameters (check original publication for details)
157 *
158 * @param spectrum_index the spectrum index to retrieve similarities
159 * @param spectrum_index_lower_limit lower spectrum index limit to save CPU
160 * (do not use it if you want a full similarity map)
161 * @return map of spectrum_index keys containing the corresponding count value
162 * (similarity) for the targeted spectrum index
163 */
164 void extractSpectrumSimilarityCount(MapSimilarityCount &map_count,
165 std::size_t minimum_count,
166 std::size_t spectrum_index,
167 std::size_t target_cart_id_max,
168 std::size_t target_cart_id_min) const;
169
170
171 private:
172 std::vector<SpecTreeNode> m_nodeList;
173 std::vector<std::size_t> m_spectrumFirstNodeIndex;
174};
175} // namespace spectree
176} // namespace pappso
rearrange itemcarts into buckets
std::vector< SpecTreeNode > m_nodeList
Definition spectree.h:172
std::vector< std::size_t > m_spectrumFirstNodeIndex
Definition spectree.h:173
yield similarities between pairs of ItemCart
#define PMSPP_LIB_DECL
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
Matthieu David's SpecTree structure.
std::vector< MapSimilarityCountElement > map_id_count
Definition spectree.h:138
std::vector< std::size_t > aboveThreshold
Definition spectree.h:137