Skip to content

Commit

Permalink
With --details, add X0:i tag (no. of identically scored best alignments)
Browse files Browse the repository at this point in the history
  • Loading branch information
marcelm committed Nov 27, 2023
1 parent 5a72d7c commit 604edca
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 30 deletions.
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Strobealign Changelog

## development version

* If `--details` is used, output `X0:i` SAM tag with the number of
identically-scored best alignments

## v0.12.0 (2023-11-23)

* #293: Fix: When mapping single-end reads, many multimappers were previously
Expand Down
5 changes: 4 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ style guide, but new code should follow it.

## Detailed SAM output (`--details`)

When `--details` or `-d` is provided, the following additional SAM tags are output for
When `--details` is provided, the following additional SAM tags are output for
mapped reads.

`na`: Number of NAMs (seeds) found
Expand All @@ -77,3 +77,6 @@ mapped reads.
the expected region given its mate)
`al`: Number of times an attempt was made to extend a seed (by gapped or ungapped alignment)
`ga`: Number of times an attempt was made to extend a seed by gapped alignment
`X0`: Number of equally scored best alignments (greater 1 for multimappers).
For paired-end reads, the tag is output for both reads, but the value is
identical and is the number of equally scored best alignment *pairs*.
60 changes: 35 additions & 25 deletions src/aln.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ inline void align_single(
}
tries++;
}
details.best_alignments = alignments_with_best_score;
uint8_t mapq = (60.0 * (best_score - second_best_score) + best_score - 1) / best_score;
bool is_primary = true;
sam.add(best_alignment, record, read.rc, mapq, is_primary, details);
Expand Down Expand Up @@ -491,11 +492,12 @@ inline Alignment rescue_align(
}

/*
* Turn a vector of scored alignment pairs into one that
* is sorted by score (highest first) and contains only unique entries.
* Remove consecutive identical alignment pairs and leave only the first.
*/
void deduplicate_scored_pairs(std::vector<ScoredAlignmentPair>& pairs) {

if (pairs.size() < 2) {
return;
}
int prev_ref_start1 = pairs[0].alignment1.ref_start;
int prev_ref_start2 = pairs[0].alignment2.ref_start;
int prev_ref_id1 = pairs[0].alignment1.ref_id;
Expand Down Expand Up @@ -523,28 +525,24 @@ void deduplicate_scored_pairs(std::vector<ScoredAlignmentPair>& pairs) {
pairs.resize(j);
}


/*
* If there are multiple top-scoring alignments (all with the same score),
* pick one randomly and move it to the front.
* Count how many best alignments there are that all have the same score
*/
void pick_random_top_pair(
std::vector<ScoredAlignmentPair>& high_scores,
std::minstd_rand& random_engine
) {
size_t count_best_alignment_pairs(const std::vector<ScoredAlignmentPair>& pairs) {
if (pairs.empty()) {
return 0;
}
size_t i = 1;
for ( ; i < high_scores.size(); ++i) {
if (high_scores[i].score != high_scores[0].score) {
for ( ; i < pairs.size(); ++i) {
if (pairs[i].score != pairs[0].score) {
break;
}
}
if (i > 1) {
size_t random_index = std::uniform_int_distribution<>(0, i - 1)(random_engine);
if (random_index != 0) {
std::swap(high_scores[0], high_scores[random_index]);
}
}
return i;
}


/*
* Align a pair of reads for which only one has NAMs. For the other, rescue
* is attempted by aligning it locally.
Expand Down Expand Up @@ -598,7 +596,7 @@ std::vector<ScoredAlignmentPair> rescue_read(
}

void output_aligned_pairs(
std::vector<ScoredAlignmentPair>& high_scores,
const std::vector<ScoredAlignmentPair>& high_scores,
Sam& sam,
size_t max_secondary,
double secondary_dropoff,
Expand All @@ -608,17 +606,13 @@ void output_aligned_pairs(
const Read& read2,
float mu,
float sigma,
const std::array<Details, 2>& details,
std::minstd_rand& random_engine
const std::array<Details, 2>& details
) {

if (high_scores.empty()) {
sam.add_unmapped_pair(record1, record2);
return;
}
std::sort(high_scores.begin(), high_scores.end(), by_score<ScoredAlignmentPair>);
deduplicate_scored_pairs(high_scores);
pick_random_top_pair(high_scores, random_engine);

auto [mapq1, mapq2] = joint_mapq_from_high_scores(high_scores);
auto best_aln_pair = high_scores[0];
Expand Down Expand Up @@ -1036,9 +1030,26 @@ void align_or_map_paired(
uint8_t mapq1 = proper_pair_mapq(nams_pair[0]);
uint8_t mapq2 = proper_pair_mapq(nams_pair[1]);

details[0].best_alignments = 1;
details[1].best_alignments = 1;
bool is_primary = true;
sam.add_pair(alignment1, alignment2, record1, record2, read1.rc, read2.rc, mapq1, mapq2, is_proper, is_primary, details);
} else {
std::sort(alignment_pairs.begin(), alignment_pairs.end(), by_score<ScoredAlignmentPair>);
deduplicate_scored_pairs(alignment_pairs);

// If there are multiple top-scoring alignments (all with the same score),
// pick one randomly and move it to the front.
size_t i = count_best_alignment_pairs(alignment_pairs);
details[0].best_alignments = i;
details[1].best_alignments = i;
if (i > 1) {
size_t random_index = std::uniform_int_distribution<>(0, i - 1)(random_engine);
if (random_index != 0) {
std::swap(alignment_pairs[0], alignment_pairs[random_index]);
}
}

double secondary_dropoff = 2 * aligner.parameters.mismatch + aligner.parameters.gap_open;
output_aligned_pairs(
alignment_pairs,
Expand All @@ -1051,8 +1062,7 @@ void align_or_map_paired(
read2,
isize_est.mu,
isize_est.sigma,
details,
random_engine
details
);
}
}
Expand Down
9 changes: 5 additions & 4 deletions src/sam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,11 @@ void Sam::append_rg_and_newline() {

void Sam::append_details(const Details& details) {
std::stringstream s;
s << "\tna:i:" << details.nams;
s << "\tnr:i:" << details.nam_rescue;
s << "\tal:i:" << details.tried_alignment;
s << "\tga:i:" << details.gapped;
s << "\tna:i:" << details.nams
<< "\tnr:i:" << details.nam_rescue
<< "\tal:i:" << details.tried_alignment
<< "\tga:i:" << details.gapped
<< "\tX0:i:" << details.best_alignments;
sam_string.append(s.str());
}

Expand Down
2 changes: 2 additions & 0 deletions src/sam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ struct Details {
uint64_t mate_rescue{0}; // No. of times rescue by local alignment was attempted
uint64_t tried_alignment{0}; // No. of computed alignments (get_alignment or rescue_mate)
uint64_t gapped{0}; // No. of gapped alignments computed (in get_alignment)
uint64_t best_alignments{0}; // No. of best alignments with same score

Details& operator+=(const Details& other) {
nam_rescue = nam_rescue || other.nam_rescue;
Expand All @@ -59,6 +60,7 @@ struct Details {
mate_rescue += other.mate_rescue;
tried_alignment += other.tried_alignment;
gapped += other.gapped;
best_alignments += other.best_alignments;
return *this;
}
};
Expand Down

0 comments on commit 604edca

Please sign in to comment.