Skip to content

Commit

Permalink
Check the input peptide for tryptic ending as well
Browse files Browse the repository at this point in the history
  • Loading branch information
tibvdm committed Aug 23, 2024
1 parent cb7afee commit 74b4354
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 32 deletions.
12 changes: 6 additions & 6 deletions sa-index/src/peptide_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ pub fn search_proteins_for_peptide<'a>(
peptide: &str,
cutoff: usize,
equate_il: bool,
missed: bool
tryptic: bool
) -> Option<(bool, Vec<&'a Protein>)> {
let peptide = peptide.trim_end().to_uppercase();

Expand All @@ -60,7 +60,7 @@ pub fn search_proteins_for_peptide<'a>(
return None;
}

let suffix_search = searcher.search_matching_suffixes(peptide.as_bytes(), cutoff, equate_il, missed);
let suffix_search = searcher.search_matching_suffixes(peptide.as_bytes(), cutoff, equate_il, tryptic);
let (suffixes, cutoff_used) = match suffix_search {
SearchAllSuffixesResult::MaxMatches(matched_suffixes) => Some((matched_suffixes, true)),
SearchAllSuffixesResult::SearchResult(matched_suffixes) => Some((matched_suffixes, false)),
Expand All @@ -72,8 +72,8 @@ pub fn search_proteins_for_peptide<'a>(
Some((cutoff_used, proteins))
}

pub fn search_peptide(searcher: &Searcher, peptide: &str, cutoff: usize, equate_il: bool, missed: bool) -> Option<SearchResult> {
let (cutoff_used, proteins) = search_proteins_for_peptide(searcher, peptide, cutoff, equate_il, missed)?;
pub fn search_peptide(searcher: &Searcher, peptide: &str, cutoff: usize, equate_il: bool, tryptic: bool) -> Option<SearchResult> {
let (cutoff_used, proteins) = search_proteins_for_peptide(searcher, peptide, cutoff, equate_il, tryptic)?;

Some(SearchResult {
sequence: peptide.to_string(),
Expand Down Expand Up @@ -101,11 +101,11 @@ pub fn search_all_peptides(
peptides: &Vec<String>,
cutoff: usize,
equate_il: bool,
missed: bool
tryptic: bool
) -> Vec<SearchResult> {
peptides
.par_iter()
.filter_map(|peptide| search_peptide(searcher, peptide, cutoff, equate_il, missed))
.filter_map(|peptide| search_peptide(searcher, peptide, cutoff, equate_il, tryptic))
.collect()
}

Expand Down
46 changes: 20 additions & 26 deletions sa-index/src/sa_searcher.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use std::{cmp::min, ops::Deref};
use std::str::from_utf8;
use sa_mappings::proteins::{Protein, Proteins, SEPARATION_CHARACTER, TERMINATION_CHARACTER};

use crate::{
Expand Down Expand Up @@ -307,8 +306,13 @@ impl Searcher {
search_string: &[u8],
max_matches: usize,
equate_il: bool,
missed: bool
tryptic: bool
) -> SearchAllSuffixesResult {
// If we perform a tryptic search, the last character of the search string should be R or K
if tryptic && search_string[search_string.len() - 1] != b'R' && search_string[search_string.len() - 1] != b'K' {
return SearchAllSuffixesResult::NoMatches;
}

let mut matching_suffixes: Vec<i64> = vec![];
let mut il_locations = vec![];
for (i, &character) in search_string.iter().enumerate() {
Expand All @@ -326,18 +330,11 @@ impl Searcher {
let il_locations_current_suffix = &il_locations[il_locations_start..];
let current_search_string_prefix = &search_string[..skip];
let current_search_string_suffix = &search_string[skip..];

eprintln!("skip: {}, current_search_string_prefix: {}, current_search_string_suffix: {}", skip, from_utf8(current_search_string_prefix).unwrap(), from_utf8(current_search_string_suffix).unwrap());

let search_bound_result = self.search_bounds(&search_string[skip..]);

eprintln!("search_bound_result: {:?}", search_bound_result);

// if the shorter part is matched, see if what goes before the matched suffix matches
// the unmatched part of the prefix
if let BoundSearchResult::SearchResult((min_bound, max_bound)) = search_bound_result {
eprintln!("min_bound: {}, max_bound: {}", min_bound, max_bound);

// try all the partially matched suffixes and store the matching suffixes in an
// array (stop when our max number of matches is reached)
let mut sa_index = min_bound;
Expand All @@ -363,22 +360,19 @@ impl Searcher {
equate_il
))
{
if !missed {
let is_tryptic_match = (
(suffix - skip == 0)
|| self.proteins.input_string[suffix - skip - 1] == b'R'
|| self.proteins.input_string[suffix - skip - 1] == b'K'
|| self.proteins.input_string[suffix - skip - 1] == SEPARATION_CHARACTER
if tryptic && (
(suffix - skip == 0)
|| self.proteins.input_string[suffix - skip - 1] == b'R'
|| self.proteins.input_string[suffix - skip - 1] == b'K'
|| self.proteins.input_string[suffix - skip - 1] == SEPARATION_CHARACTER
) && (
self.proteins.input_string[suffix - skip + search_string.len() + 1] != b'P'
|| self.proteins.input_string[suffix - skip + search_string.len() + 1] == SEPARATION_CHARACTER
|| self.proteins.input_string[suffix - skip + search_string.len() + 1] == TERMINATION_CHARACTER
);

if !is_tryptic_match {
sa_index += 1;
continue;
}
)
{
sa_index += 1;
continue;
}

matching_suffixes.push((suffix - skip) as i64);
Expand Down Expand Up @@ -573,16 +567,16 @@ mod tests {
let searcher = Searcher::new(sa, proteins, Box::new(suffix_index_to_protein));

// search suffix 'VAA'
let found_suffixes = searcher.search_matching_suffixes(&[b'V', b'A', b'A'], usize::MAX, false, true);
assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![7]));
let found_suffixes = searcher.search_matching_suffixes(&[b'V', b'A', b'A'], usize::MAX, false, false);
assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![7]));
let found_suffixes = searcher.search_matching_suffixes(&[b'V', b'A', b'A'], usize::MAX, false, true);
assert_eq!(found_suffixes, SearchAllSuffixesResult::NoMatches);

// search suffix 'AC'
let found_suffixes = searcher.search_matching_suffixes(&[b'A', b'C'], usize::MAX, false, true);
assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![11, 5]));
let found_suffixes = searcher.search_matching_suffixes(&[b'A', b'C'], usize::MAX, false, false);
assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![11]));
assert_eq!(found_suffixes, SearchAllSuffixesResult::SearchResult(vec![11, 5]));
let found_suffixes = searcher.search_matching_suffixes(&[b'A', b'C'], usize::MAX, false, true);
assert_eq!(found_suffixes, SearchAllSuffixesResult::NoMatches);
}

#[test]
Expand Down

0 comments on commit 74b4354

Please sign in to comment.