31#include "../exception/exceptionoutofrange.h"
37 std::size_t aa_str_max_size)
41 if(aa_str_max_size > 7)
44 QObject::tr(
"aa_str_max_size exceeds max size"));
47 QString seq_str = protein.get()->getSequence();
50 for(
const QChar &aa_str : seq_str)
55 for(std::size_t i = 2; i <= aa_str_max_size; i++)
73const std::vector<std::uint8_t> &
86std::vector<std::uint32_t>
90 std::vector<std::uint32_t> fragments;
92 int max = (m_seqAaCode.size() - fragment_size);
96 auto it = m_seqAaCode.begin();
97 for(
int i = 0; i <=
max; i++)
99 fragments.push_back(codec.
codeLlc(it, fragment_size));
107const std::vector<std::uint32_t> &
115 std::size_t indice = size - 2;
116 if(indice < m_peptideCodedFragments.size())
118 return m_peptideCodedFragments.at(indice);
125std::vector<std::pair<std::size_t, std::uint32_t>>
127 const std::vector<uint32_t> &code_list_in)
const
129 std::vector<std::pair<std::size_t, std::uint32_t>> return_pos;
130 std::vector<uint32_t> code_list = code_list_in;
132 std::sort(code_list.begin(), code_list.end());
133 auto it_end = std::unique(code_list.begin(), code_list.end());
134 for(
auto it_code = code_list.begin(); it_code != it_end; it_code++)
137 std::size_t size = 2;
138 for(
auto &liste_protein_seq_code : m_peptideCodedFragments)
141 auto it_seq_position = std::find(liste_protein_seq_code.begin(),
142 liste_protein_seq_code.end(),
144 while(it_seq_position != liste_protein_seq_code.end())
147 std::size_t position =
148 std::distance(liste_protein_seq_code.begin(), it_seq_position);
149 return_pos.push_back({size, position});
151 it_seq_position = std::find(
152 ++it_seq_position, liste_protein_seq_code.end(), *it_code);
166 const std::vector<uint32_t> &code_list_from_spectrum)
const
168 std::vector<double> convolution_score;
171 std::vector<std::uint8_t>::const_iterator it_aa = m_seqAaCode.begin();
172 auto it_couple = m_peptideCodedFragments[0].begin();
173 auto it_trio = m_peptideCodedFragments[1].begin();
174 auto it_quatro = m_peptideCodedFragments[2].begin();
175 auto it_cinqo = m_peptideCodedFragments[3].begin();
176 for(std::uint8_t aa_code : m_seqAaCode)
178 convolution_score.push_back(convolutionKernel(code_list_from_spectrum,
191 return convolution_score;
196 const std::vector<uint32_t> &spectrum_code_list,
197 std::vector<std::uint8_t>::const_iterator it_aa,
198 std::vector<std::uint32_t>::const_iterator it_couple,
199 std::vector<std::uint32_t>::const_iterator it_trio,
200 std::vector<std::uint32_t>::const_iterator it_quatro,
201 std::vector<std::uint32_t>::const_iterator it_cinqo)
const
206 auto it_end = it_aa + 5;
208 double single_score = 0;
211 if(std::binary_search(spectrum_code_list.begin(),
212 spectrum_code_list.end(),
213 (std::uint32_t)(*it)))
225 auto itduo_end = it_couple + 4;
226 auto itduo = it_couple;
227 double duo_score = 0;
228 while(itduo != itduo_end)
230 if(std::binary_search(
231 spectrum_code_list.begin(), spectrum_code_list.end(), *itduo))
244 auto it3_end = it_trio + 3;
246 double trio_score = 0;
247 while(it3 != it3_end)
249 if(std::binary_search(
250 spectrum_code_list.begin(), spectrum_code_list.end(), *it3))
263 auto it4_end = it_quatro + 2;
264 auto it4 = it_quatro;
265 double quatro_score = 0;
266 while(it4 != it4_end)
269 if(std::binary_search(
270 spectrum_code_list.begin(), spectrum_code_list.end(), *it4))
279 if(std::binary_search(
280 spectrum_code_list.begin(), spectrum_code_list.end(), *it_cinqo))
288 return score * single_score * duo_score * trio_score * quatro_score;
uint8_t getAaCode(char aa_letter) const
const AaCode & getAaCode() const
uint32_t codeLlc(const QString &aa_str) const
get the lowest common denominator integer from amino acide suite string
virtual ~ProteinIntegerCode()
const std::vector< std::uint32_t > & getPeptideCodedFragment(std::size_t size) const
std::vector< std::pair< std::size_t, std::uint32_t > > match(const std::vector< uint32_t > &code_list) const
list of positions and matched codes along protein sequence
std::vector< double > convolution(const std::vector< uint32_t > &code_list_from_spectrum) const
process convolution of spectrum code list along protein sequence
const std::vector< std::uint8_t > & getSeqAaCode() const
std::vector< std::uint8_t > m_seqAaCode
double convolutionKernel(const std::vector< uint32_t > &spectrum_code_list, std::vector< std::uint8_t >::const_iterator it_aa, std::vector< std::uint32_t >::const_iterator it_couple, std::vector< std::uint32_t >::const_iterator it_trio, std::vector< std::uint32_t >::const_iterator it_quatro, std::vector< std::uint32_t >::const_iterator it_cinqo) const
ProteinIntegerCode(ProteinSp protein, const AaStringCodec &codec, std::size_t aa_str_max_size=5)
std::vector< std::vector< std::uint32_t > > m_peptideCodedFragments
std::vector< std::uint32_t > computePeptideCodeFragments(const AaStringCodec &codec, std::size_t fragment_size) const
pappso::ProteinSp getProteinSp() const
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
std::shared_ptr< const Protein > ProteinSp
shared pointer on a Protein object
@ max
maximum of intensities
transform protein amino acid sequence into vectors of amino acid codes