Skip to content

Commit

Permalink
improve checks
Browse files Browse the repository at this point in the history
  • Loading branch information
tibvdm committed Aug 23, 2024
1 parent 74b4354 commit 26bca34
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 34 deletions.
70 changes: 36 additions & 34 deletions sa-index/src/sa_searcher.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::{cmp::min, ops::Deref};
use sa_mappings::proteins::{Protein, Proteins, SEPARATION_CHARACTER, TERMINATION_CHARACTER};
use sa_mappings::proteins::{Protein, Proteins, SEPARATION_CHARACTER};

use crate::{
sa_searcher::BoundSearch::{Maximum, Minimum},
Expand Down Expand Up @@ -341,47 +341,49 @@ impl Searcher {
while sa_index < max_bound {
let suffix = self.sa.get(sa_index) as usize;

// filter away matches where I was wrongfully equalized to L, and check the
// unmatched prefix when I and L equalized, we only need to
// check the prefix, not the whole match, when the prefix is 0, we don't need to
// check at all
if suffix >= skip
&& ((skip == 0
|| Self::check_prefix(
current_search_string_prefix,
&self.proteins.input_string[suffix - skip..suffix],
equate_il
))
&& Self::check_suffix(
skip,
il_locations_current_suffix,
current_search_string_suffix,
&self.proteins.input_string[suffix..suffix + search_string.len() - skip],
equate_il
))
{
if tryptic && (
(suffix - skip == 0)
|| self.proteins.input_string[suffix - skip - 1] == b'R'
|| self.proteins.input_string[suffix - skip - 1] == b'K'
|| self.proteins.input_string[suffix - skip - 1] == SEPARATION_CHARACTER
) && (
self.proteins.input_string[suffix - skip + search_string.len() + 1] != b'P'
|| self.proteins.input_string[suffix - skip + search_string.len() + 1] == SEPARATION_CHARACTER
|| self.proteins.input_string[suffix - skip + search_string.len() + 1] == TERMINATION_CHARACTER
)
{
sa_index += 1;
continue;
}
// If the suffix is smaller than the skip factor, we can't check the prefix
let checkable_suffix = suffix >= skip;

// Check for trypticity if a tryptic search is performed
let is_tryptic = tryptic && (
self.proteins.input_string[suffix - skip - 1] == b'R' ||
self.proteins.input_string[suffix - skip - 1] == b'K' ||
self.proteins.input_string[suffix - skip - 1] == SEPARATION_CHARACTER
) && self.proteins.input_string[suffix - skip + search_string.len()] != b'P';

if !is_tryptic {
sa_index += 1;
continue;
}

// If the skip factor is 0, the entire search string should match.
let completely_matched = checkable_suffix && skip == 0;

// If the skip factor is not 0, the prefix should match the prefix of the search string.
let prefix_matched = completely_matched || Self::check_prefix(
current_search_string_prefix,
&self.proteins.input_string[suffix - skip..suffix],
equate_il
);

// If the prefix is matched, we can check the suffix.
let suffix_matched = prefix_matched && Self::check_suffix(
skip,
il_locations_current_suffix,
current_search_string_suffix,
&self.proteins.input_string[suffix..suffix + search_string.len() - skip],
equate_il
);

if suffix_matched {
matching_suffixes.push((suffix - skip) as i64);

// return if max number of matches is reached
if matching_suffixes.len() >= max_matches {
return SearchAllSuffixesResult::MaxMatches(matching_suffixes);
}
}

sa_index += 1;
}
}
Expand Down
2 changes: 2 additions & 0 deletions sa-mappings/src/proteins.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ impl Proteins {
// because of the encoded functional annotations
let mut lines = ByteLines::new(BufReader::new(file));

input_string.push(SEPARATION_CHARACTER.into());

while let Some(Ok(line)) = lines.next() {
let mut fields = line.split(|b| *b == b'\t');

Expand Down

0 comments on commit 26bca34

Please sign in to comment.