Skip to content

Commit

Permalink
using closed syncmers to get window guearnatee. Default k-s = 8 to ge…
Browse files Browse the repository at this point in the history
…t same expected sampling rate as open syncmers
  • Loading branch information
ksahlin committed Apr 19, 2024
1 parent a8bbb03 commit 1aba24a
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 16 deletions.
1 change: 0 additions & 1 deletion src/aln.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1167,7 +1167,6 @@ void align_or_map_single(
shuffle_top_nams(nams, random_engine);
statistics.tot_sort_nams += nam_sort_timer.duration();


Timer extend_timer;
size_t n_best = 0;
switch (map_param.output_format) {
Expand Down
8 changes: 7 additions & 1 deletion src/index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ void StrobemerIndex::read(const std::string& filename) {

/* Pick a suitable number of bits for indexing randstrobe start indices */
int StrobemerIndex::pick_bits(size_t size) const {
size_t estimated_number_of_randstrobes = size / (parameters.syncmer.k - parameters.syncmer.s + 1);
size_t estimated_number_of_randstrobes = 2*size / (parameters.syncmer.k - parameters.syncmer.s + 1);
// Two randstrobes per bucket on average
return std::clamp(static_cast<int>(log2(estimated_number_of_randstrobes)) - 1, 8, 31);
}
Expand Down Expand Up @@ -165,6 +165,12 @@ void StrobemerIndex::populate(float f, unsigned n_threads) {
assign_all_randstrobes(randstrobe_counts, n_threads);
stats.elapsed_generating_seeds = randstrobes_timer.duration();

// for (int i = 0; i < randstrobes.size()-1; ++i) {
// if ( ((randstrobes[i+1].position - randstrobes[i].position) > 7) && (randstrobes[i+1].reference_index() == randstrobes[i].reference_index()) ) {
// std::cout << randstrobes[i].position << " " << randstrobes[i+1].position << std::endl;
// }
// }

Timer sorting_timer;
logger.debug() << " Sorting ...\n";
task_thread_pool::task_thread_pool pool{n_threads};
Expand Down
16 changes: 8 additions & 8 deletions src/indexparameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ struct Profile {
static auto max{std::numeric_limits<int>::max()};

static std::vector<Profile> profiles = {
Profile{ 50, 70, 16, -4, -2, 0},
Profile{ 75, 90, 20, -4, -3, -1},
Profile{100, 110, 18, -4, 1, 3},
Profile{125, 135, 20, -4, 1, 4},
Profile{150, 175, 22, -4, 3, 5},
Profile{250, 275, 24, -4, 4, 12},
Profile{300, 375, 24, -4, 5, 13},
Profile{400, max, 25, -6, 7, 13},
Profile{ 50, 70, 16, -9, -2, 0},
Profile{ 75, 90, 20, -9, -3, -1},
Profile{100, 110, 18, -9, 1, 3},
Profile{125, 135, 20, -9, 1, 4},
Profile{150, 175, 22, -9, 3, 5},
Profile{250, 275, 24, -9, 4, 12},
Profile{300, 375, 24, -9, 5, 13},
Profile{400, max, 25, -13, 7, 13},
};

/* Create an IndexParameters instance based on a given read length.
Expand Down
8 changes: 4 additions & 4 deletions src/indexparameters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ struct SyncmerParameters {
if (s > k) {
throw BadParameter("s is larger than k");
}
if ((k - s) % 2 != 0) {
throw BadParameter("(k - s) must be an even number to create canonical syncmers. Please set s to e.g. k-2, k-4, k-6, ...");
}
// if ((k - s) % 2 != 0) {
// throw BadParameter("(k - s) must be an even number to create canonical syncmers. Please set s to e.g. k-2, k-4, k-6, ...");
// }
}

bool operator==(const SyncmerParameters& other) const;
Expand Down Expand Up @@ -85,7 +85,7 @@ class IndexParameters {
IndexParameters(size_t canonical_read_length, int k, int s, int l, int u, int q, int max_dist, int aux_len)
: canonical_read_length(canonical_read_length)
, syncmer(k, s)
, randstrobe(l, u, q, max_dist, std::max(0, k / (k - s + 1) + l), k / (k - s + 1) + u, aux_len)
, randstrobe(l, u, q, max_dist, std::max(0, 2*k / (k - s + 1) + l), 2*k / (k - s + 1) + u, aux_len)
{
}

Expand Down
2 changes: 1 addition & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ void log_parameters(const IndexParameters& index_parameters, const MappingParame
<< "Maximum seed length: " << index_parameters.randstrobe.max_dist + index_parameters.syncmer.k << std::endl
<< "R: " << map_param.rescue_level << std::endl
<< "Expected [w_min, w_max] in #syncmers: [" << index_parameters.randstrobe.w_min << ", " << index_parameters.randstrobe.w_max << "]" << std::endl
<< "Expected [w_min, w_max] in #nucleotides: [" << (index_parameters.syncmer.k - index_parameters.syncmer.s + 1) * index_parameters.randstrobe.w_min << ", " << (index_parameters.syncmer.k - index_parameters.syncmer.s + 1) * index_parameters.randstrobe.w_max << "]" << std::endl
<< "Expected [w_min, w_max] in #nucleotides: [" << (index_parameters.syncmer.k - index_parameters.syncmer.s + 1)/2 * index_parameters.randstrobe.w_min << ", " << (index_parameters.syncmer.k - index_parameters.syncmer.s + 1)/2 * index_parameters.randstrobe.w_max << "]" << std::endl
<< "A: " << aln_params.match << std::endl
<< "B: " << aln_params.mismatch << std::endl
<< "O: " << aln_params.gap_open << std::endl
Expand Down
2 changes: 1 addition & 1 deletion src/randstrobes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ Syncmer SyncmerIterator::next() {
qs_min_pos = i - s + 1;
}
}
if (qs_min_pos == i - k + t) { // occurs at t:th position in k-mer
if ( (qs_min_pos == i + 1 - k) || (qs_min_pos == i + 1 - k + (k-s)) ) { // occurs at first or last position in k-mer, i.e., closed syncmer
uint64_t yk = std::min(xk[0], xk[1]);
auto syncmer = Syncmer{syncmer_kmer_hash(yk), i - k + 1};
i++;
Expand Down

0 comments on commit 1aba24a

Please sign in to comment.