Skip to content

Commit

Permalink
[FEATURE] Add tandem_dup_count to Junction
Browse files Browse the repository at this point in the history
Signed-off-by: Lydia Buntrock <[email protected]>
  • Loading branch information
Irallia committed Aug 10, 2021
1 parent 863297c commit 48a7d05
Show file tree
Hide file tree
Showing 10 changed files with 278 additions and 190 deletions.
7 changes: 7 additions & 0 deletions include/structures/junction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class Junction
Breakend mate1{};
Breakend mate2{};
seqan3::dna5_vector inserted_sequence{};
int16_t tandem_dup_count{};
std::string read_name{};

public:
Expand All @@ -29,8 +30,10 @@ class Junction
Junction(Breakend the_mate1,
Breakend the_mate2,
auto const & the_inserted_sequence,
int16_t the_tandem_dup_count,
std::string the_read_name) : mate1{std::move(the_mate1)},
mate2{std::move(the_mate2)},
tandem_dup_count{the_tandem_dup_count},
read_name{std::move(the_read_name)}
{
if ((mate2.seq_name < mate1.seq_name) ||
Expand Down Expand Up @@ -60,6 +63,9 @@ class Junction
*/
seqan3::dna5_vector get_inserted_sequence() const;

//! \brief Returns the number of tandem copies of this junction.
int16_t get_tandem_dup_count() const;

//! \brief Returns the name of the read giving rise to this junction.
std::string get_read_name() const;
};
Expand All @@ -70,6 +76,7 @@ inline constexpr stream_t operator<<(stream_t && stream, Junction const & junc)
stream << junc.get_mate1() << '\t'
<< junc.get_mate2() << '\t'
<< junc.get_inserted_sequence().size() << '\t'
<< junc.get_tandem_dup_count() << '\t'
<< junc.get_read_name();
return stream;
}
Expand Down
3 changes: 3 additions & 0 deletions src/modules/sv_detection_methods/analyze_cigar_method.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ void analyze_cigar(std::string const & read_name,
using seqan3::get;
int32_t length = get<0>(pair);
seqan3::cigar::operation operation = get<1>(pair);
int16_t tandem_dup_count = 0;
if (operation == 'M'_cigar_operation || operation == '='_cigar_operation || operation == 'X'_cigar_operation)
{
pos_ref += length;
Expand All @@ -40,6 +41,7 @@ void analyze_cigar(std::string const & read_name,
Junction new_junction{Breakend{chromosome, pos_ref - 1, strand::forward},
Breakend{chromosome, pos_ref, strand::forward},
inserted_bases,
tandem_dup_count,
read_name};
seqan3::debug_stream << "INS: " << new_junction << "\n";
junctions.push_back(std::move(new_junction));
Expand All @@ -54,6 +56,7 @@ void analyze_cigar(std::string const & read_name,
Junction new_junction{Breakend{chromosome, pos_ref - 1, strand::forward},
Breakend{chromosome, pos_ref + length, strand::forward},
""_dna5,
tandem_dup_count,
read_name};
seqan3::debug_stream << "DEL: " << new_junction << "\n";
junctions.push_back(std::move(new_junction));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ void analyze_aligned_segments(std::vector<AlignedSegment> const & aligned_segmen
int32_t const min_length,
int32_t const max_overlap)
{
int16_t tandem_dup_count = 0;
for (size_t i = 1; i < aligned_segments.size(); i++)
{
AlignedSegment current = aligned_segments[i-1];
Expand Down Expand Up @@ -110,13 +111,13 @@ void analyze_aligned_segments(std::vector<AlignedSegment> const & aligned_segmen
if (distance_on_read < 0)
{
// No inserted sequence between overlapping alignment segments
junctions.emplace_back(mate1, mate2, ""_dna5, read_name);
junctions.emplace_back(mate1, mate2, ""_dna5, tandem_dup_count, read_name);
}
else
{
auto inserted_bases = query_sequence | seqan3::views::slice(current.get_query_end(),
next.get_query_start());
junctions.emplace_back(mate1, mate2, inserted_bases, read_name);
junctions.emplace_back(mate1, mate2, inserted_bases, tandem_dup_count, read_name);
}
seqan3::debug_stream << "BND: " << junctions.back() << "\n";
}
Expand Down
14 changes: 12 additions & 2 deletions src/structures/junction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ seqan3::dna5_vector Junction::get_inserted_sequence() const
return inserted_sequence;
}

int16_t Junction::get_tandem_dup_count() const
{
return tandem_dup_count;
}

std::string Junction::get_read_name() const
{
return read_name;
Expand All @@ -26,12 +31,17 @@ bool operator<(Junction const & lhs, Junction const & rhs)
? lhs.get_mate1() < rhs.get_mate1()
: lhs.get_mate2() != rhs.get_mate2()
? lhs.get_mate2() < rhs.get_mate2()
: lhs.get_inserted_sequence() < rhs.get_inserted_sequence();
: lhs.get_tandem_dup_count() != rhs.get_tandem_dup_count()
? lhs.get_tandem_dup_count() < rhs.get_tandem_dup_count()
: lhs.get_inserted_sequence() < rhs.get_inserted_sequence();
}

bool operator==(Junction const & lhs, Junction const & rhs)
{
return (lhs.get_mate1() == rhs.get_mate1()) && (lhs.get_mate2() == rhs.get_mate2()) && (lhs.get_inserted_sequence() == rhs.get_inserted_sequence());
return (lhs.get_mate1() == rhs.get_mate1()) &&
(lhs.get_mate2() == rhs.get_mate2()) &&
(lhs.get_inserted_sequence() == rhs.get_inserted_sequence() &&
(lhs.get_tandem_dup_count() == rhs.get_tandem_dup_count()));
}

bool operator!=(Junction const & lhs, Junction const & rhs)
Expand Down
Loading

0 comments on commit 48a7d05

Please sign in to comment.