From f5a127abaa532e58dc1b13c22e66547313e00ca2 Mon Sep 17 00:00:00 2001 From: ksahlin Date: Sun, 21 Apr 2024 20:11:45 +0200 Subject: [PATCH] fixes a bug introduced by partial hits: several full seeds can have the same partial base seed (identical query and ref coordinates. Such partial seeds got added to the same NAM and thus increased the score (through incrementing n_hits several times) --- src/nam.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/nam.cpp b/src/nam.cpp index a9826a2c..0e45f0ce 100644 --- a/src/nam.cpp +++ b/src/nam.cpp @@ -62,6 +62,11 @@ inline void add_to_hits_per_ref_partial( } } + bool operator==(const Hit& lhs, const Hit& rhs) + { + return (lhs.query_start == rhs.query_start) && (lhs.query_end == rhs.query_end) && (lhs.ref_start == rhs.ref_start) && (lhs.ref_end == rhs.ref_end); + } + void merge_hits_into_nams( robin_hood::unordered_map>& hits_per_ref, int k, @@ -80,7 +85,11 @@ void merge_hits_into_nams( std::vector open_nams; unsigned int prev_q_start = 0; + auto prev_hit = Hit{0,0,0,0}; for (auto &h : hits) { + if (prev_hit == h) { + continue; + } bool is_added = false; for (auto & o : open_nams) { @@ -152,6 +161,7 @@ void merge_hits_into_nams( open_nams.erase(std::remove_if(open_nams.begin(), open_nams.end(), predicate), open_nams.end()); prev_q_start = h.query_start; } + prev_hit = h; } // Add all current open_matches to final NAMs