Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
h-2 committed Nov 20, 2023
1 parent 4023a8d commit 25d6e3e
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 25 deletions.
58 changes: 34 additions & 24 deletions src/search_algo.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -872,6 +872,10 @@ inline void _writeRecord(TBlastRecord & record, TLocalHolder & lH)
record.matches.resize(lH.options.maxMatches);
}
lH.stats.hitsFinal += record.matches.size();
for (auto it = record.matches.begin(), it2 = std::next(record.matches.begin()); it2 != record.matches.end();
++it, ++it2)
if (it->_n_sId != it2->_n_sId)
++lH.stats.pairs;

// compute LCA
if (lH.options.computeLCA)
Expand Down Expand Up @@ -922,7 +926,8 @@ inline void _widenMatch(Match & m, TLocalHolder const & lH)
uint64_t band = _bandSize(lH.transQrySeqs[m.qryId].size());

// end on subject is beginning plus full query length plus band
m.subjEnd = std::min<size_t>(m.subjStart + lH.transQrySeqs[m.qryId].size() + band, lH.gH.transSbjSeqs[m.subjId].size());
m.subjEnd =
std::min<size_t>(m.subjStart + lH.transQrySeqs[m.qryId].size() + band, lH.gH.transSbjSeqs[m.subjId].size());

// account for band in subj start
m.subjStart = (band < m.subjStart) ? m.subjStart - band : 0;
Expand Down Expand Up @@ -1125,19 +1130,19 @@ inline void _performAlignment(TDepSetH & depSetH,
}

template <typename TLocalHolder>
inline void _widenAndPreprocessMatches(TLocalHolder & lH)
inline void _widenAndPreprocessMatches(std::span<Match> & matches, TLocalHolder & lH)
{
auto before = lH.matches.size();
auto before = matches.size();

for (Match & m : lH.matches)
for (Match & m : matches)
_widenMatch<TLocalHolder>(m, lH);

std::ranges::sort(lH.matches);
std::ranges::sort(matches);

if (lH.matches.size() > 1)
if (matches.size() > 1)
{
// pairwise merge from left to right
for (auto it = lH.matches.begin(); it < lH.matches.end() - 1; ++it)
for (auto it = matches.begin(); it < matches.end() - 1; ++it)
{
Match & l = *it;
Match & r = *(it + 1);
Expand All @@ -1149,7 +1154,7 @@ inline void _widenAndPreprocessMatches(TLocalHolder & lH)
}

// pairwise "swallow" from right to left
for (auto it = lH.matches.rbegin(); it < lH.matches.rend() - 1; ++it)
for (auto it = matches.rbegin(); it < matches.rend() - 1; ++it)
{
Match & l = *it;
Match & r = *(it + 1);
Expand All @@ -1159,14 +1164,14 @@ inline void _widenAndPreprocessMatches(TLocalHolder & lH)
}
}

auto const ret = std::ranges::unique(lH.matches);
lH.matches.erase(ret.begin(), ret.end());
lH.stats.hitsDuplicate += (before - lH.matches.size());
auto [new_end, old_end] = std::ranges::unique(matches); // move non-uniq to the end
matches = std::span<Match>{matches.begin(), new_end}; // "resize" of the span
lH.stats.hitsDuplicate += (before - matches.size());
}
}

template <typename TLocalHolder>
inline void iterateMatchesFullSimd(TLocalHolder & lH, bsDirection const dir = bsDirection::fwd)
inline void iterateMatchesFullSimd(std::span<Match> lambdaMatches, TLocalHolder & lH, bsDirection const dir)
{
using TGlobalHolder = typename TLocalHolder::TGlobalHolder;
using TBlastMatch = typename TLocalHolder::TBlastMatch;
Expand All @@ -1176,7 +1181,7 @@ inline void iterateMatchesFullSimd(TLocalHolder & lH, bsDirection const dir = bs
// statistics
#ifdef LAMBDA_MICRO_STATS
++lH.stats.numQueryWithExt;
lH.stats.numExtScore += seqan::length(lH.matches);
lH.stats.numExtScore += seqan::length(lambdaMatches);

double start = sysTime();
#endif
Expand All @@ -1186,19 +1191,12 @@ inline void iterateMatchesFullSimd(TLocalHolder & lH, bsDirection const dir = bs
seqan::StringSet<typename seqan::Source<typename TLocalHolder::TAlignRow1>::Type> depSetV;

// pre-sort and filter
_widenAndPreprocessMatches(lH);
_widenAndPreprocessMatches(lambdaMatches, lH);

// create blast matches from Lambda matches
std::list<TBlastMatch> blastMatches;
for (Match const & m : lH.matches)
for (Match const & m : lambdaMatches)
{
/* TODO we need to externalise this because right now we sort and merge hits that are not need in BS mode */
// In BS-mode, skip those results that have wrong orientation
if constexpr (TLocalHolder::TGlobalHolder::c_redAlph == AlphabetEnum::DNA3BS)
{
if ((dir == bsDirection::fwd && (m.subjId % 2)) || (dir == bsDirection::rev && !(m.subjId % 2)))
continue;
}
// create blastmatch in list without copy or move
blastMatches.emplace_back(lH.qryIds[m.qryId / TGlobalHolder::qryNumFrames],
const_gH.indexFile.ids[m.subjId / TGlobalHolder::sbjNumFrames]);
Expand Down Expand Up @@ -1362,12 +1360,24 @@ inline void writeRecords(TLocalHolder & lH)
template <typename TLocalHolder>
inline void iterateMatches(TLocalHolder & lH)
{
iterateMatchesFullSimd(lH, bsDirection::fwd);
if constexpr (TLocalHolder::TGlobalHolder::c_redAlph == AlphabetEnum::DNA3BS)
{
iterateMatchesFullSimd(lH, bsDirection::rev);
std::ranges::sort(lH.matches,
[](Match const & l, Match const & r) {
return std::tuple<bool, Match const &>{l.subjId % 2, l} <
std::tuple<bool, Match const &>{r.subjId % 2, r};
});

auto it = std::ranges::find_if(lH.matches, [](Match const & m) { return m.subjId % 2; });

iterateMatchesFullSimd(std::span{lH.matches.begin(), it}, lH, bsDirection::fwd);
iterateMatchesFullSimd(std::span{it, lH.matches.end()}, lH, bsDirection::rev);
lH.blastMatches.sort([](auto const & lhs, auto const & rhs) { return lhs._n_qId < rhs._n_qId; });
}
else
{
iterateMatchesFullSimd(lH.matches, lH, bsDirection::fwd);
}
}

//-----------------------------------------------------------------------
Expand Down
6 changes: 5 additions & 1 deletion src/search_datastructures.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ struct StatsHolder
// final
uint64_t hitsFinal;
uint64_t qrysWithHit;
uint64_t pairs;

#ifdef LAMBDA_MICRO_STATS
// times
Expand Down Expand Up @@ -149,6 +150,7 @@ struct StatsHolder

hitsFinal = 0;
qrysWithHit = 0;
pairs = 0;

#ifdef LAMBDA_MICRO_STATS
seedLengths.clear();
Expand Down Expand Up @@ -181,6 +183,7 @@ struct StatsHolder

hitsFinal += rhs.hitsFinal;
qrysWithHit += rhs.qrysWithHit;
pairs += rhs.pairs;

#ifdef LAMBDA_MICRO_STATS
seqan::append(seedLengths, rhs.seedLengths);
Expand Down Expand Up @@ -280,7 +283,8 @@ void printStats(StatsHolder const & stats, LambdaOptions const & options)
if (options.verbosity >= 1)
{
auto const w = seqan::_numberOfDigits(stats.hitsFinal);
std::cout << "Number of valid hits: " << std::setw(w) << stats.hitsFinal
std::cout << "Number of total hits: " << std::setw(w) << stats.hitsFinal
<< "\nNumber of Query-Subject pairs: " << std::setw(w) << stats.pairs
<< "\nNumber of Queries with at least one valid hit: " << std::setw(w) << stats.qrysWithHit << "\n";
}
}
Expand Down

0 comments on commit 25d6e3e

Please sign in to comment.