From 48a7d05ef0a817d19307d628381367e5b8c1f664 Mon Sep 17 00:00:00 2001 From: Lydia Buntrock Date: Tue, 27 Jul 2021 15:55:16 +0200 Subject: [PATCH] [FEATURE] Add tandem_dup_count to Junction Signed-off-by: Lydia Buntrock --- include/structures/junction.hpp | 7 + .../analyze_cigar_method.cpp | 3 + .../analyze_split_read_method.cpp | 5 +- src/structures/junction.cpp | 14 +- test/api/clustering_test.cpp | 213 ++++++++++-------- test/api/detection_test.cpp | 103 ++++++--- test/api/input_file_test.cpp | 11 +- test/cli/iGenVar_cli_test.cpp | 24 +- test/data/datasources.cmake | 2 +- test/data/mini_example/output_err.txt | 86 +++---- 10 files changed, 278 insertions(+), 190 deletions(-) diff --git a/include/structures/junction.hpp b/include/structures/junction.hpp index 02f56929..f96b50ce 100644 --- a/include/structures/junction.hpp +++ b/include/structures/junction.hpp @@ -13,6 +13,7 @@ class Junction Breakend mate1{}; Breakend mate2{}; seqan3::dna5_vector inserted_sequence{}; + int16_t tandem_dup_count{}; std::string read_name{}; public: @@ -29,8 +30,10 @@ class Junction Junction(Breakend the_mate1, Breakend the_mate2, auto const & the_inserted_sequence, + int16_t the_tandem_dup_count, std::string the_read_name) : mate1{std::move(the_mate1)}, mate2{std::move(the_mate2)}, + tandem_dup_count{the_tandem_dup_count}, read_name{std::move(the_read_name)} { if ((mate2.seq_name < mate1.seq_name) || @@ -60,6 +63,9 @@ class Junction */ seqan3::dna5_vector get_inserted_sequence() const; + //! \brief Returns the number of tandem copies of this junction. + int16_t get_tandem_dup_count() const; + //! \brief Returns the name of the read giving rise to this junction. std::string get_read_name() const; }; @@ -70,6 +76,7 @@ inline constexpr stream_t operator<<(stream_t && stream, Junction const & junc) stream << junc.get_mate1() << '\t' << junc.get_mate2() << '\t' << junc.get_inserted_sequence().size() << '\t' + << junc.get_tandem_dup_count() << '\t' << junc.get_read_name(); return stream; } diff --git a/src/modules/sv_detection_methods/analyze_cigar_method.cpp b/src/modules/sv_detection_methods/analyze_cigar_method.cpp index ab7e26e5..8ed80e55 100644 --- a/src/modules/sv_detection_methods/analyze_cigar_method.cpp +++ b/src/modules/sv_detection_methods/analyze_cigar_method.cpp @@ -26,6 +26,7 @@ void analyze_cigar(std::string const & read_name, using seqan3::get; int32_t length = get<0>(pair); seqan3::cigar::operation operation = get<1>(pair); + int16_t tandem_dup_count = 0; if (operation == 'M'_cigar_operation || operation == '='_cigar_operation || operation == 'X'_cigar_operation) { pos_ref += length; @@ -40,6 +41,7 @@ void analyze_cigar(std::string const & read_name, Junction new_junction{Breakend{chromosome, pos_ref - 1, strand::forward}, Breakend{chromosome, pos_ref, strand::forward}, inserted_bases, + tandem_dup_count, read_name}; seqan3::debug_stream << "INS: " << new_junction << "\n"; junctions.push_back(std::move(new_junction)); @@ -54,6 +56,7 @@ void analyze_cigar(std::string const & read_name, Junction new_junction{Breakend{chromosome, pos_ref - 1, strand::forward}, Breakend{chromosome, pos_ref + length, strand::forward}, ""_dna5, + tandem_dup_count, read_name}; seqan3::debug_stream << "DEL: " << new_junction << "\n"; junctions.push_back(std::move(new_junction)); diff --git a/src/modules/sv_detection_methods/analyze_split_read_method.cpp b/src/modules/sv_detection_methods/analyze_split_read_method.cpp index 7ab8ed9c..8f6e9b33 100644 --- a/src/modules/sv_detection_methods/analyze_split_read_method.cpp +++ b/src/modules/sv_detection_methods/analyze_split_read_method.cpp @@ -59,6 +59,7 @@ void analyze_aligned_segments(std::vector const & aligned_segmen int32_t const min_length, int32_t const max_overlap) { + int16_t tandem_dup_count = 0; for (size_t i = 1; i < aligned_segments.size(); i++) { AlignedSegment current = aligned_segments[i-1]; @@ -110,13 +111,13 @@ void analyze_aligned_segments(std::vector const & aligned_segmen if (distance_on_read < 0) { // No inserted sequence between overlapping alignment segments - junctions.emplace_back(mate1, mate2, ""_dna5, read_name); + junctions.emplace_back(mate1, mate2, ""_dna5, tandem_dup_count, read_name); } else { auto inserted_bases = query_sequence | seqan3::views::slice(current.get_query_end(), next.get_query_start()); - junctions.emplace_back(mate1, mate2, inserted_bases, read_name); + junctions.emplace_back(mate1, mate2, inserted_bases, tandem_dup_count, read_name); } seqan3::debug_stream << "BND: " << junctions.back() << "\n"; } diff --git a/src/structures/junction.cpp b/src/structures/junction.cpp index 9392d75a..343ed24f 100644 --- a/src/structures/junction.cpp +++ b/src/structures/junction.cpp @@ -15,6 +15,11 @@ seqan3::dna5_vector Junction::get_inserted_sequence() const return inserted_sequence; } +int16_t Junction::get_tandem_dup_count() const +{ + return tandem_dup_count; +} + std::string Junction::get_read_name() const { return read_name; @@ -26,12 +31,17 @@ bool operator<(Junction const & lhs, Junction const & rhs) ? lhs.get_mate1() < rhs.get_mate1() : lhs.get_mate2() != rhs.get_mate2() ? lhs.get_mate2() < rhs.get_mate2() - : lhs.get_inserted_sequence() < rhs.get_inserted_sequence(); + : lhs.get_tandem_dup_count() != rhs.get_tandem_dup_count() + ? lhs.get_tandem_dup_count() < rhs.get_tandem_dup_count() + : lhs.get_inserted_sequence() < rhs.get_inserted_sequence(); } bool operator==(Junction const & lhs, Junction const & rhs) { - return (lhs.get_mate1() == rhs.get_mate1()) && (lhs.get_mate2() == rhs.get_mate2()) && (lhs.get_inserted_sequence() == rhs.get_inserted_sequence()); + return (lhs.get_mate1() == rhs.get_mate1()) && + (lhs.get_mate2() == rhs.get_mate2()) && + (lhs.get_inserted_sequence() == rhs.get_inserted_sequence() && + (lhs.get_tandem_dup_count() == rhs.get_tandem_dup_count())); } bool operator!=(Junction const & lhs, Junction const & rhs) diff --git a/test/api/clustering_test.cpp b/test/api/clustering_test.cpp index cc6391cd..29a210d8 100644 --- a/test/api/clustering_test.cpp +++ b/test/api/clustering_test.cpp @@ -14,6 +14,7 @@ int32_t const chrom1_position2 = 94734377; int32_t const chrom1_position3 = 112323345; std::string const chrom2 = "chr2"; int32_t const chrom2_position1 = 234432; +int16_t tandem_dup_count = 0; std::string const read_name_1 = "m2257/8161/CCS"; std::string const read_name_2 = "m41327/11677/CCS"; std::string const read_name_3 = "m21263/13017/CCS"; @@ -28,21 +29,21 @@ std::vector prepare_input_junctions() std::vector input_junctions { Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, - Breakend{chrom2, chrom2_position1 + 8, strand::forward}, ""_dna5, read_name_1}, + Breakend{chrom2, chrom2_position1 + 8, strand::forward}, ""_dna5, tandem_dup_count, read_name_1}, Junction{Breakend{chrom1, chrom1_position1 + 2, strand::forward}, - Breakend{chrom2, chrom2_position1 - 3, strand::forward}, ""_dna5, read_name_2}, + Breakend{chrom2, chrom2_position1 - 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_2}, Junction{Breakend{chrom1, chrom1_position1 + 9, strand::forward}, - Breakend{chrom2, chrom2_position1 + 1, strand::forward}, ""_dna5, read_name_3}, + Breakend{chrom2, chrom2_position1 + 1, strand::forward}, ""_dna5, tandem_dup_count, read_name_3}, Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, - Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, read_name_4}, + Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_4}, Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, - Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, read_name_5}, + Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_5}, Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, - Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, read_name_6}, + Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, tandem_dup_count, read_name_6}, Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, - Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, read_name_7}, + Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_7}, Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, - Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, read_name_8} + Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, tandem_dup_count, read_name_8} }; std::sort(input_junctions.begin(), input_junctions.end()); @@ -59,28 +60,36 @@ TEST(simple_clustering, all_separate) std::vector expected_clusters { Cluster{{ Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, - Breakend{chrom2, chrom2_position1 + 8, strand::forward}, ""_dna5, read_name_1} + Breakend{chrom2, chrom2_position1 + 8, strand::forward}, + ""_dna5, tandem_dup_count, read_name_1} }}, Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 2, strand::forward}, - Breakend{chrom2, chrom2_position1 - 3, strand::forward}, ""_dna5, read_name_2} + Breakend{chrom2, chrom2_position1 - 3, strand::forward}, + ""_dna5, tandem_dup_count, read_name_2} }}, Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 9, strand::forward}, - Breakend{chrom2, chrom2_position1 + 1, strand::forward}, ""_dna5, read_name_3}, + Breakend{chrom2, chrom2_position1 + 1, strand::forward}, + ""_dna5, tandem_dup_count, read_name_3}, }}, Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, - Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, read_name_4}, + Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_4}, }}, Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, - Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, read_name_5}, + Breakend{chrom2, chrom2_position1 + 3, strand::forward}, + ""_dna5, tandem_dup_count, read_name_5}, }}, Cluster{{ Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, - Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, read_name_6} + Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_6} }}, Cluster{{ Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, - Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, read_name_7} + Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_7} }}, Cluster{{ Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, - Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, read_name_8} + Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_8} }} }; std::sort(expected_clusters.begin(), expected_clusters.end()); @@ -89,7 +98,9 @@ TEST(simple_clustering, all_separate) for (size_t cluster_index = 0; cluster_index < expected_clusters.size(); ++cluster_index) { - EXPECT_TRUE(expected_clusters[cluster_index] == resulting_clusters[cluster_index]) << "Cluster " << cluster_index << " unequal"; + EXPECT_TRUE(expected_clusters[cluster_index] == resulting_clusters[cluster_index]) << "Cluster " + << cluster_index + << " unequal"; } } @@ -98,9 +109,9 @@ TEST(simple_clustering, clustered) std::vector input_junctions { Junction{Breakend{chrom1, chrom1_position1, strand::forward}, - Breakend{chrom2, chrom2_position1, strand::forward}, ""_dna5, read_name_1}, + Breakend{chrom2, chrom2_position1, strand::forward}, ""_dna5, tandem_dup_count, read_name_1}, Junction{Breakend{chrom1, chrom1_position1, strand::forward}, - Breakend{chrom2, chrom2_position1, strand::forward}, ""_dna5, read_name_2} + Breakend{chrom2, chrom2_position1, strand::forward}, ""_dna5, tandem_dup_count, read_name_2} }; std::vector resulting_clusters{}; resulting_clusters = simple_clustering_method(input_junctions); @@ -108,10 +119,10 @@ TEST(simple_clustering, clustered) // Both junctions in the same cluster std::vector expected_clusters { - Cluster{{ Junction{Breakend{chrom1, chrom1_position1, strand::forward}, - Breakend{chrom2, chrom2_position1, strand::forward}, ""_dna5, read_name_1}, - Junction{Breakend{chrom1, chrom1_position1, strand::forward}, - Breakend{chrom2, chrom2_position1, strand::forward}, ""_dna5, read_name_2} + Cluster{{Junction{Breakend{chrom1, chrom1_position1, strand::forward}, + Breakend{chrom2, chrom2_position1, strand::forward}, ""_dna5, tandem_dup_count, read_name_1}, + Junction{Breakend{chrom1, chrom1_position1, strand::forward}, + Breakend{chrom2, chrom2_position1, strand::forward}, ""_dna5, tandem_dup_count, read_name_2} }} }; std::sort(expected_clusters.begin(), expected_clusters.end()); @@ -120,7 +131,9 @@ TEST(simple_clustering, clustered) for (size_t cluster_index = 0; cluster_index < expected_clusters.size(); ++cluster_index) { - EXPECT_TRUE(expected_clusters[cluster_index] == resulting_clusters[cluster_index]) << "Cluster " << cluster_index << " unequal"; + EXPECT_TRUE(expected_clusters[cluster_index] == resulting_clusters[cluster_index]) << "Cluster " + << cluster_index + << " unequal"; } } @@ -150,27 +163,27 @@ TEST(hierarchical_clustering, partitioning) { { Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, - Breakend{chrom2, chrom2_position1 + 8, strand::forward}, ""_dna5, read_name_1}, //cluster 1 + Breakend{chrom2, chrom2_position1 + 8, strand::forward}, ""_dna5, tandem_dup_count, read_name_1}, //cluster 1 Junction{Breakend{chrom1, chrom1_position1 + 2, strand::forward}, - Breakend{chrom2, chrom2_position1 - 3, strand::forward}, ""_dna5, read_name_2}, //cluster 1 + Breakend{chrom2, chrom2_position1 - 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_2}, //cluster 1 Junction{Breakend{chrom1, chrom1_position1 + 9, strand::forward}, - Breakend{chrom2, chrom2_position1 + 1, strand::forward}, ""_dna5, read_name_3}, //cluster 1 + Breakend{chrom2, chrom2_position1 + 1, strand::forward}, ""_dna5, tandem_dup_count, read_name_3}, //cluster 1 }, { Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, - Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, read_name_4}, //cluster 2 + Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_4}, //cluster 2 }, { Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, - Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, read_name_5}, //cluster 3 + Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_5}, //cluster 3 }, { Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, - Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, read_name_6}, //cluster 4 + Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, tandem_dup_count, read_name_6}, //cluster 4 Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, - Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, read_name_7}, //cluster 4 + Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_7}, //cluster 4 Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, - Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, read_name_8} //cluster 4 + Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, tandem_dup_count, read_name_8} //cluster 4 } }; @@ -186,7 +199,6 @@ TEST(hierarchical_clustering, partitioning) } } - TEST(hierarchical_clustering, strict_clustering) { std::vector input_junctions = prepare_input_junctions(); @@ -195,29 +207,29 @@ TEST(hierarchical_clustering, strict_clustering) // Each junction in separate cluster std::vector expected_clusters { - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, - Breakend{chrom2, chrom2_position1 + 8, strand::forward}, ""_dna5, read_name_1} + Cluster{{Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, + Breakend{chrom2, chrom2_position1 + 8, strand::forward}, ""_dna5, tandem_dup_count, read_name_1} }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 2, strand::forward}, - Breakend{chrom2, chrom2_position1 - 3, strand::forward}, ""_dna5, read_name_2} + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 2, strand::forward}, + Breakend{chrom2, chrom2_position1 - 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_2} }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 9, strand::forward}, - Breakend{chrom2, chrom2_position1 + 1, strand::forward}, ""_dna5, read_name_3}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 9, strand::forward}, + Breakend{chrom2, chrom2_position1 + 1, strand::forward}, ""_dna5, tandem_dup_count, read_name_3}, }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, - Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, read_name_4}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, + Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_4}, }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, - Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, read_name_5}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, + Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_5}, }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, - Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, read_name_6} + Cluster{{Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, + Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, tandem_dup_count, read_name_6} }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, - Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, read_name_7} + Cluster{{Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, + Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_7} }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, - Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, read_name_8} + Cluster{{Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, + Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, tandem_dup_count, read_name_8} }} }; std::sort(expected_clusters.begin(), expected_clusters.end()); @@ -242,7 +254,6 @@ TEST(hierarchical_clustering, strict_clustering) } } - TEST(hierarchical_clustering, clustering_10) { std::vector input_junctions = prepare_input_junctions(); @@ -260,28 +271,28 @@ TEST(hierarchical_clustering, clustering_10) // Only junctions from reads 7 and 8 have a distance < 10 and cluster together std::vector expected_clusters { - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, - Breakend{chrom2, chrom2_position1 + 8, strand::forward}, ""_dna5, read_name_1} + Cluster{{Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, + Breakend{chrom2, chrom2_position1 + 8, strand::forward}, ""_dna5, tandem_dup_count, read_name_1} }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 2, strand::forward}, - Breakend{chrom2, chrom2_position1 - 3, strand::forward}, ""_dna5, read_name_2} + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 2, strand::forward}, + Breakend{chrom2, chrom2_position1 - 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_2} }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 9, strand::forward}, - Breakend{chrom2, chrom2_position1 + 1, strand::forward}, ""_dna5, read_name_3}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 9, strand::forward}, + Breakend{chrom2, chrom2_position1 + 1, strand::forward}, ""_dna5, tandem_dup_count, read_name_3}, }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, - Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, read_name_4}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, + Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_4}, }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, - Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, read_name_5}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, + Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_5}, }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, - Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, read_name_6} + Cluster{{Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, + Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, tandem_dup_count, read_name_6} }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, - Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, read_name_7}, - Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, - Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, read_name_8} + Cluster{{Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, + Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_7}, + Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, + Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, tandem_dup_count, read_name_8} }} }; std::sort(expected_clusters.begin(), expected_clusters.end()); @@ -325,25 +336,25 @@ TEST(hierarchical_clustering, clustering_15) std::vector expected_clusters { Cluster{{ Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, - Breakend{chrom2, chrom2_position1 + 8, strand::forward}, ""_dna5, read_name_1} + Breakend{chrom2, chrom2_position1 + 8, strand::forward}, ""_dna5, tandem_dup_count, read_name_1} }}, Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 2, strand::forward}, - Breakend{chrom2, chrom2_position1 - 3, strand::forward}, ""_dna5, read_name_2}, + Breakend{chrom2, chrom2_position1 - 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_2}, Junction{Breakend{chrom1, chrom1_position1 + 9, strand::forward}, - Breakend{chrom2, chrom2_position1 + 1, strand::forward}, ""_dna5, read_name_3}, + Breakend{chrom2, chrom2_position1 + 1, strand::forward}, ""_dna5, tandem_dup_count, read_name_3}, }}, Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, - Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, read_name_4}, + Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_4}, }}, Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, - Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, read_name_5}, + Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_5}, }}, Cluster{{ Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, - Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, read_name_6}, + Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, tandem_dup_count, read_name_6}, Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, - Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, read_name_7}, + Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_7}, Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, - Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, read_name_8} + Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, tandem_dup_count, read_name_8} }} }; std::sort(expected_clusters.begin(), expected_clusters.end()); @@ -368,7 +379,6 @@ TEST(hierarchical_clustering, clustering_15) } } - TEST(hierarchical_clustering, clustering_25) { std::vector input_junctions = prepare_input_junctions(); @@ -386,25 +396,33 @@ TEST(hierarchical_clustering, clustering_25) // Junctions from reads 6-8 and 1-3 have a distance < 25 and cluster together std::vector expected_clusters { - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, - Breakend{chrom2, chrom2_position1 + 8, strand::forward}, ""_dna5, read_name_1}, - Junction{Breakend{chrom1, chrom1_position1 + 2, strand::forward}, - Breakend{chrom2, chrom2_position1 - 3, strand::forward}, ""_dna5, read_name_2}, - Junction{Breakend{chrom1, chrom1_position1 + 9, strand::forward}, - Breakend{chrom2, chrom2_position1 + 1, strand::forward}, ""_dna5, read_name_3}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, + Breakend{chrom2, chrom2_position1 + 8, strand::forward}, + ""_dna5, tandem_dup_count, read_name_1}, + Junction{Breakend{chrom1, chrom1_position1 + 2, strand::forward}, + Breakend{chrom2, chrom2_position1 - 3, strand::forward}, + ""_dna5, tandem_dup_count, read_name_2}, + Junction{Breakend{chrom1, chrom1_position1 + 9, strand::forward}, + Breakend{chrom2, chrom2_position1 + 1, strand::forward}, + ""_dna5, tandem_dup_count, read_name_3}, }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, - Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, read_name_4}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, + Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_4}, }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, - Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, read_name_5}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, + Breakend{chrom2, chrom2_position1 + 3, strand::forward}, + ""_dna5, tandem_dup_count, read_name_5}, }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, - Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, read_name_6}, - Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, - Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, read_name_7}, - Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, - Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, read_name_8} + Cluster{{Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, + Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_6}, + Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, + Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_7}, + Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, + Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_8} }} }; std::sort(expected_clusters.begin(), expected_clusters.end()); @@ -434,17 +452,14 @@ TEST(hierarchical_clustering, subsampling) std::vector input_junctions; for (int32_t i = 0; i < 300; ++i) { - input_junctions.emplace_back(Breakend{chrom1, - chrom1_position1 + i, - strand::forward}, - Breakend{chrom2, - chrom2_position1 + i, - strand::forward}, + input_junctions.emplace_back(Breakend{chrom1, chrom1_position1 + i, strand::forward}, + Breakend{chrom2, chrom2_position1 + i, strand::forward}, ""_dna5, + tandem_dup_count, read_name_1); } std::sort(input_junctions.begin(), input_junctions.end()); - + testing::internal::CaptureStderr(); std::vector clusters = hierarchical_clustering_method(input_junctions, 0); @@ -454,7 +469,7 @@ TEST(hierarchical_clustering, subsampling) num_junctions += cluster.get_cluster_size(); } EXPECT_EQ(num_junctions, 200); - + std::string const expected_err { "A partition exceeds the maximum size (300>200) and has to be subsampled. " diff --git a/test/api/detection_test.cpp b/test/api/detection_test.cpp index 1b356c15..406bad41 100644 --- a/test/api/detection_test.cpp +++ b/test/api/detection_test.cpp @@ -11,7 +11,7 @@ using seqan3::operator""_dna5; /* -------- detection methods tests -------- */ -// TODO (irallia): implement test cases +// TODO (irallia): implement test cases <- (23.7.21, irallia) which cases are done / still open? TEST(junction_detection, cigar_string_simple_del) { @@ -41,7 +41,11 @@ TEST(junction_detection, cigar_string_simple_del) Breakend new_breakend_1 {chromosome, 15, strand::forward}; Breakend new_breakend_2 {chromosome, 22, strand::forward}; - std::vector junctions_expected_res{Junction{new_breakend_1, new_breakend_2, ""_dna5, read_name}}; + std::vector junctions_expected_res{Junction{new_breakend_1, + new_breakend_2, + ""_dna5, + 0, + read_name}}; ASSERT_EQ(junctions_expected_res.size(), junctions_res.size()); @@ -71,7 +75,11 @@ TEST(junction_detection, cigar_string_del_padding) Breakend new_breakend_1 {chromosome, 15, strand::forward}; Breakend new_breakend_2 {chromosome, 22, strand::forward}; - std::vector junctions_expected_res{Junction{new_breakend_1, new_breakend_2, ""_dna5, read_name}}; + std::vector junctions_expected_res{Junction{new_breakend_1, + new_breakend_2, + ""_dna5, + 0, + read_name}}; ASSERT_EQ(junctions_expected_res.size(), junctions_res.size()); @@ -99,7 +107,11 @@ TEST(junction_detection, cigar_string_simple_ins) Breakend new_breakend_1 {chromosome, 9, strand::forward}; Breakend new_breakend_2 {chromosome, 10, strand::forward}; - std::vector junctions_expected_res{Junction{new_breakend_1, new_breakend_2, "ATTTCG"_dna5, read_name}}; + std::vector junctions_expected_res{Junction{new_breakend_1, + new_breakend_2, + "ATTTCG"_dna5, + 0, + read_name}}; ASSERT_EQ(junctions_expected_res.size(), junctions_res.size()); @@ -127,7 +139,11 @@ TEST(junction_detection, cigar_string_ins_hardclip) Breakend new_breakend_1 {chromosome, 9, strand::forward}; Breakend new_breakend_2 {chromosome, 10, strand::forward}; - std::vector junctions_expected_res{Junction{new_breakend_1, new_breakend_2, "GATCGA"_dna5, read_name}}; + std::vector junctions_expected_res{Junction{new_breakend_1, + new_breakend_2, + "GATCGA"_dna5, + 0, + read_name}}; ASSERT_EQ(junctions_expected_res.size(), junctions_res.size()); @@ -268,20 +284,31 @@ TEST(junction_detection, analyze_aligned_segments) Breakend new_breakend_10 {"chr1", 150, strand::forward}; Breakend new_breakend_11 {"chr1", 155, strand::forward}; Breakend new_breakend_12 {"chr1", 156, strand::forward}; - std::vector junctions_expected_res{Junction{new_breakend_1, new_breakend_2, ""_dna5, read_name}, //translocation - Junction{new_breakend_3, new_breakend_4, ""_dna5, read_name}, //translocation - Junction{new_breakend_5, new_breakend_6, ""_dna5, read_name}, //inversion - Junction{new_breakend_7, new_breakend_8, ""_dna5, read_name}, //inversion - Junction{new_breakend_9, new_breakend_10, ""_dna5, read_name}, //deletion - Junction{new_breakend_11, - new_breakend_12, - "GCGATACGCGTCGCAACTACGACGCGCATCAGCAGGCGAC"_dna5, read_name}}; //insertion + std::vector junctions_expected_res{Junction{new_breakend_1, new_breakend_2, + ""_dna5, + 0, read_name}, // translocation + Junction{new_breakend_3, new_breakend_4, + ""_dna5, + 0, read_name}, // translocation + Junction{new_breakend_5, new_breakend_6, + ""_dna5, + 0, read_name}, // inversion + Junction{new_breakend_7, new_breakend_8, + ""_dna5, + 0, read_name}, // inversion + Junction{new_breakend_9, new_breakend_10, + ""_dna5, + 0, read_name}, // deletion + Junction{new_breakend_11, new_breakend_12, + "GCGATACGCGTCGCAACTACGACGCGCATCAGCAGGCGAC"_dna5, + 0, read_name}}; // insertion ASSERT_EQ(junctions_expected_res.size(), junctions_res.size()); for (size_t i = 0; i < junctions_expected_res.size(); ++i) { - EXPECT_EQ(junctions_expected_res[i].get_read_name(), junctions_res[i].get_read_name()) << "Read names of junction " << i << " unequal"; + EXPECT_EQ(junctions_expected_res[i].get_read_name(), + junctions_res[i].get_read_name()) << "Read names of junction " << i << " unequal"; EXPECT_TRUE(junctions_expected_res[i] == junctions_res[i]) << "Junction " << i << " unequal\nMate 1 equal: " << (junctions_expected_res[i].get_mate1() == junctions_res[i].get_mate1()) << "\nMate 2 equal: " @@ -310,17 +337,23 @@ TEST(junction_detection, analyze_aligned_segments) Breakend new_breakend_11 {"chr1", 155, strand::forward}; Breakend new_breakend_12 {"chr1", 156, strand::forward}; // The inversion and deletion are smaller than 20 bp and therefore not returned - std::vector junctions_expected_res{Junction{new_breakend_1, new_breakend_2, ""_dna5, read_name}, //translocation - Junction{new_breakend_3, new_breakend_4, ""_dna5, read_name}, //translocation + std::vector junctions_expected_res{Junction{new_breakend_1, new_breakend_2, + ""_dna5, + 0, read_name}, // translocation + Junction{new_breakend_3, new_breakend_4, + ""_dna5, + 0, read_name}, // translocation Junction{new_breakend_11, new_breakend_12, - "GCGATACGCGTCGCAACTACGACGCGCATCAGCAGGCGAC"_dna5, read_name}}; //insertion + "GCGATACGCGTCGCAACTACGACGCGCATCAGCAGGCGAC"_dna5, + 0, read_name}}; // insertion ASSERT_EQ(junctions_expected_res.size(), junctions_res.size()); for (size_t i = 0; i < junctions_expected_res.size(); ++i) { - EXPECT_EQ(junctions_expected_res[i].get_read_name(), junctions_res[i].get_read_name()) << "Read names of junction " << i << " unequal"; + EXPECT_EQ(junctions_expected_res[i].get_read_name(), + junctions_res[i].get_read_name()) << "Read names of junction " << i << " unequal"; EXPECT_TRUE(junctions_expected_res[i] == junctions_res[i]) << "Junction " << i << " unequal\nMate 1 equal: " << (junctions_expected_res[i].get_mate1() == junctions_res[i].get_mate1()) << "\nMate 2 equal: " @@ -351,13 +384,16 @@ TEST(junction_detection, overlapping_segments) // Deletion from two overlapping alignment segments (overlap of 5bp) Breakend new_breakend_1 {"chr1", 119, strand::forward}; Breakend new_breakend_2 {"chr1", 205, strand::forward}; - std::vector junctions_expected_res{Junction{new_breakend_1, new_breakend_2, ""_dna5, read_name}}; + std::vector junctions_expected_res{Junction{new_breakend_1, new_breakend_2, + ""_dna5, + 0, read_name}}; ASSERT_EQ(junctions_expected_res.size(), junctions_res.size()); for (size_t i = 0; i < junctions_expected_res.size(); ++i) { - EXPECT_EQ(junctions_expected_res[i].get_read_name(), junctions_res[i].get_read_name()) << "Read names of junction " << i << " unequal"; + EXPECT_EQ(junctions_expected_res[i].get_read_name(), + junctions_res[i].get_read_name()) << "Read names of junction " << i << " unequal"; EXPECT_TRUE(junctions_expected_res[i] == junctions_res[i]) << "Junction " << i << " unequal\nMate 1 equal: " << (junctions_expected_res[i].get_mate1() == junctions_res[i].get_mate1()) << "\nMate 2 equal: " @@ -414,20 +450,33 @@ TEST(junction_detection, analyze_sa_tag) Breakend new_breakend_10 {"chr1", 150, strand::forward}; Breakend new_breakend_11 {"chr1", 155, strand::forward}; Breakend new_breakend_12 {"chr1", 156, strand::forward}; - std::vector junctions_expected_res{Junction{new_breakend_1, new_breakend_2, ""_dna5, read_name}, //translocation - Junction{new_breakend_3, new_breakend_4, ""_dna5, read_name}, //translocation - Junction{new_breakend_5, new_breakend_6, ""_dna5, read_name}, //inversion - Junction{new_breakend_7, new_breakend_8, ""_dna5, read_name}, //inversion - Junction{new_breakend_9, new_breakend_10, ""_dna5, read_name}, //deletion + std::vector junctions_expected_res{Junction{new_breakend_1, new_breakend_2, + ""_dna5, + 0, read_name}, // translocation + Junction{new_breakend_3, new_breakend_4, + ""_dna5, + 0, read_name}, // translocation + Junction{new_breakend_5, new_breakend_6, + ""_dna5, + 0, read_name}, // inversion + Junction{new_breakend_7, new_breakend_8, + ""_dna5, + 0, read_name}, // inversion + Junction{new_breakend_9, new_breakend_10, + ""_dna5, + 0, read_name}, // deletion Junction{new_breakend_11, new_breakend_12, - "GCGATACGCGTCGCAACTACGACGCGCATCAGCAGGCGAC"_dna5, read_name}}; //insertion + "GCGATACGCGTCGCAACTACGACGCGCATCAGCAGGCGAC"_dna5, + 0, read_name // insertion + }}; ASSERT_EQ(junctions_expected_res.size(), junctions_res.size()); for (size_t i = 0; i < junctions_expected_res.size(); ++i) { - EXPECT_EQ(junctions_expected_res[i].get_read_name(), junctions_res[i].get_read_name()) << "Read names of junction " << i << " unequal"; + EXPECT_EQ(junctions_expected_res[i].get_read_name(), + junctions_res[i].get_read_name()) << "Read names of junction " << i << " unequal"; EXPECT_TRUE(junctions_expected_res[i] == junctions_res[i]) << "Junction " << i << " unequal\nMate 1 equal: " << (junctions_expected_res[i].get_mate1() == junctions_res[i].get_mate1()) << "\nMate 2 equal: " diff --git a/test/api/input_file_test.cpp b/test/api/input_file_test.cpp index 9416eb47..9a65609e 100644 --- a/test/api/input_file_test.cpp +++ b/test/api/input_file_test.cpp @@ -164,16 +164,19 @@ TEST(input_file, detect_junctions_in_long_reads_sam_file) Breakend new_breakend_7 {chromosome_2, pos_ref_5, strand::forward}; Breakend new_breakend_8 {chromosome_1, pos_ref_6, strand::forward}; + int16_t tandem_dup_count = 0; + std::string const read_name_1 = "m2257/8161/CCS"; std::string const read_name_2 = "m41327/11677/CCS"; std::string const read_name_3 = "m21263/13017/CCS"; std::string const read_name_4 = "m38637/7161/CCS"; std::vector junctions_expected_res - { Junction{new_breakend_1, new_breakend_2, insertion_sequence_1, read_name_1}, - Junction{new_breakend_5, new_breakend_6, "TA"_dna5, read_name_2}, - Junction{new_breakend_7, new_breakend_8, ""_dna5, read_name_3}, - Junction{new_breakend_7, new_breakend_8, ""_dna5, read_name_4} + { + Junction{new_breakend_1, new_breakend_2, insertion_sequence_1, tandem_dup_count, read_name_1}, + Junction{new_breakend_5, new_breakend_6, "TA"_dna5, tandem_dup_count, read_name_2}, + Junction{new_breakend_7, new_breakend_8, ""_dna5, tandem_dup_count, read_name_3}, + Junction{new_breakend_7, new_breakend_8, ""_dna5, tandem_dup_count, read_name_4} }; ASSERT_EQ(junctions_expected_res.size(), junctions_res.size()); diff --git a/test/cli/iGenVar_cli_test.cpp b/test/cli/iGenVar_cli_test.cpp index 9836aabf..5944e834 100644 --- a/test/cli/iGenVar_cli_test.cpp +++ b/test/cli/iGenVar_cli_test.cpp @@ -154,10 +154,10 @@ std::string expected_res_empty std::string expected_err_default_no_err { "Detect junctions in long reads...\n" - "INS: chr21\t41972615\tForward\tchr21\t41972616\tForward\t1681\tm2257/8161/CCS\n" - "BND: chr21\t41972615\tReverse\tchr22\t17458415\tReverse\t2\tm41327/11677/CCS\n" - "BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\tm21263/13017/CCS\n" - "BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\tm38637/7161/CCS\n" + "INS: chr21\t41972615\tForward\tchr21\t41972616\tForward\t1681\t0\tm2257/8161/CCS\n" + "BND: chr21\t41972615\tReverse\tchr22\t17458415\tReverse\t2\t0\tm41327/11677/CCS\n" + "BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\t0\tm21263/13017/CCS\n" + "BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\t0\tm38637/7161/CCS\n" "Start clustering...\n" "Done with clustering. Found 2 junction clusters.\n" "No refinement was selected.\n" @@ -343,13 +343,13 @@ TEST_F(iGenVar_cli_test, with_default_arguments) std::string expected_err { "Detect junctions in long reads...\n" - "INS: chr21\t41972615\tForward\tchr21\t41972616\tForward\t1681\tm2257/8161/CCS\n" + "INS: chr21\t41972615\tForward\tchr21\t41972616\tForward\t1681\t0\tm2257/8161/CCS\n" "The read depth method for long reads is not yet implemented.\n" - "BND: chr21\t41972615\tReverse\tchr22\t17458415\tReverse\t2\tm41327/11677/CCS\n" + "BND: chr21\t41972615\tReverse\tchr22\t17458415\tReverse\t2\t0\tm41327/11677/CCS\n" "The read depth method for long reads is not yet implemented.\n" - "BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\tm21263/13017/CCS\n" + "BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\t0\tm21263/13017/CCS\n" "The read depth method for long reads is not yet implemented.\n" - "BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\tm38637/7161/CCS\n" + "BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\t0\tm38637/7161/CCS\n" "The read depth method for long reads is not yet implemented.\n" "Start clustering...\n" "Done with clustering. Found 2 junction clusters.\n" @@ -434,10 +434,10 @@ TEST_F(iGenVar_cli_test, test_direct_methods_input) std::string expected_err { "Detect junctions in long reads...\n" - "INS: chr21\t41972615\tForward\tchr21\t41972616\tForward\t1681\tm2257/8161/CCS\n" - "BND: chr21\t41972615\tReverse\tchr22\t17458415\tReverse\t2\tm41327/11677/CCS\n" - "BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\tm21263/13017/CCS\n" - "BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\tm38637/7161/CCS\n" + "INS: chr21\t41972615\tForward\tchr21\t41972616\tForward\t1681\t0\tm2257/8161/CCS\n" + "BND: chr21\t41972615\tReverse\tchr22\t17458415\tReverse\t2\t0\tm41327/11677/CCS\n" + "BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\t0\tm21263/13017/CCS\n" + "BND: chr21\t41972616\tReverse\tchr22\t17458416\tReverse\t0\t0\tm38637/7161/CCS\n" "Start clustering...\n" "Done with clustering. Found 3 junction clusters.\n" "No refinement was selected.\n" diff --git a/test/data/datasources.cmake b/test/data/datasources.cmake index 059d9c4c..6128de0b 100644 --- a/test/data/datasources.cmake +++ b/test/data/datasources.cmake @@ -24,7 +24,7 @@ declare_datasource (FILE single_end_mini_example.sam # copies file to /data/output_err.txt declare_datasource (FILE output_err.txt URL ${CMAKE_SOURCE_DIR}/test/data/mini_example/output_err.txt - URL_HASH SHA256=5f2c89eb3aa4838e7c4ff9b2d08d0d715064f5e42e6851ecaddef4156e8e4655) + URL_HASH SHA256=5e9c724487b23794ad9b3b4758d5a157f7a1791014e78be0b437932c68fbcd41) # copies file to /data/output_res.txt declare_datasource (FILE output_res.txt diff --git a/test/data/mini_example/output_err.txt b/test/data/mini_example/output_err.txt index 13533384..57bd49f6 100644 --- a/test/data/mini_example/output_err.txt +++ b/test/data/mini_example/output_err.txt @@ -1,47 +1,47 @@ Detect junctions in long reads... -DEL: chr1 56 Forward chr1 70 Forward 0 read010 -DEL: chr1 56 Forward chr1 70 Forward 0 read011 -DEL: chr1 56 Forward chr1 70 Forward 0 read012 -DEL: chr1 56 Forward chr1 70 Forward 0 read013 -DEL: chr1 56 Forward chr1 70 Forward 0 read014 -DEL: chr1 56 Forward chr1 70 Forward 0 read015 -DEL: chr1 56 Forward chr1 70 Forward 0 read016 -DEL: chr1 56 Forward chr1 70 Forward 0 read017 -DEL: chr1 56 Forward chr1 70 Forward 0 read018 -BND: chr1 109 Reverse chr1 124 Reverse 0 read021 -INS: chr1 124 Forward chr1 125 Forward 15 read023 -INS: chr1 124 Forward chr1 125 Forward 15 read024 -INS: chr1 124 Forward chr1 125 Forward 15 read025 -BND: chr1 96 Forward chr1 125 Forward 0 read027 -BND: chr1 180 Reverse chr1 187 Reverse 0 read029 -BND: chr1 180 Reverse chr1 187 Reverse 0 read030 -INS: chr1 179 Forward chr1 180 Forward 8 read031 -BND: chr1 180 Reverse chr1 187 Reverse 0 read033 -BND: chr1 180 Reverse chr1 187 Reverse 0 read034 -DEL: chr1 265 Forward chr1 286 Forward 0 read037 -DEL: chr1 265 Forward chr1 286 Forward 0 read038 -BND: chr1 265 Forward chr1 286 Forward 0 read039 -BND: chr1 282 Reverse chr1 298 Reverse 0 read039 -BND: chr1 266 Reverse chr1 285 Reverse 0 read039 -BND: chr1 265 Forward chr1 286 Forward 0 read040 -BND: chr1 282 Reverse chr1 298 Reverse 0 read040 -BND: chr1 266 Reverse chr1 285 Reverse 0 read040 -DEL: chr1 281 Forward chr1 299 Forward 0 read041 -BND: chr1 282 Reverse chr1 298 Reverse 0 read041 -BND: chr1 266 Reverse chr1 285 Reverse 0 read041 -DEL: chr1 335 Forward chr1 350 Forward 0 read042 -DEL: chr1 335 Forward chr1 350 Forward 0 read043 -DEL: chr1 335 Forward chr1 350 Forward 0 read044 -DEL: chr1 335 Forward chr1 350 Forward 0 read045 -INS: chr1 367 Forward chr1 368 Forward 11 read046 -INS: chr1 367 Forward chr1 368 Forward 11 read047 -INS: chr1 367 Forward chr1 368 Forward 11 read048 -INS: chr1 367 Forward chr1 368 Forward 11 read049 -INS: chr1 367 Forward chr1 368 Forward 11 read050 -DEL: chr1 383 Forward chr1 395 Forward 0 read050 -BND: chr1 10 Reverse chr1 470 Reverse 0 read051 -BND: chr1 10 Reverse chr1 470 Reverse 0 read052 -BND: chr1 33 Forward chr1 471 Forward 0 read052 +DEL: chr1 56 Forward chr1 70 Forward 0 0 read010 +DEL: chr1 56 Forward chr1 70 Forward 0 0 read011 +DEL: chr1 56 Forward chr1 70 Forward 0 0 read012 +DEL: chr1 56 Forward chr1 70 Forward 0 0 read013 +DEL: chr1 56 Forward chr1 70 Forward 0 0 read014 +DEL: chr1 56 Forward chr1 70 Forward 0 0 read015 +DEL: chr1 56 Forward chr1 70 Forward 0 0 read016 +DEL: chr1 56 Forward chr1 70 Forward 0 0 read017 +DEL: chr1 56 Forward chr1 70 Forward 0 0 read018 +BND: chr1 109 Reverse chr1 124 Reverse 0 0 read021 +INS: chr1 124 Forward chr1 125 Forward 15 0 read023 +INS: chr1 124 Forward chr1 125 Forward 15 0 read024 +INS: chr1 124 Forward chr1 125 Forward 15 0 read025 +BND: chr1 96 Forward chr1 125 Forward 0 0 read027 +BND: chr1 180 Reverse chr1 187 Reverse 0 0 read029 +BND: chr1 180 Reverse chr1 187 Reverse 0 0 read030 +INS: chr1 179 Forward chr1 180 Forward 8 0 read031 +BND: chr1 180 Reverse chr1 187 Reverse 0 0 read033 +BND: chr1 180 Reverse chr1 187 Reverse 0 0 read034 +DEL: chr1 265 Forward chr1 286 Forward 0 0 read037 +DEL: chr1 265 Forward chr1 286 Forward 0 0 read038 +BND: chr1 265 Forward chr1 286 Forward 0 0 read039 +BND: chr1 282 Reverse chr1 298 Reverse 0 0 read039 +BND: chr1 266 Reverse chr1 285 Reverse 0 0 read039 +BND: chr1 265 Forward chr1 286 Forward 0 0 read040 +BND: chr1 282 Reverse chr1 298 Reverse 0 0 read040 +BND: chr1 266 Reverse chr1 285 Reverse 0 0 read040 +DEL: chr1 281 Forward chr1 299 Forward 0 0 read041 +BND: chr1 282 Reverse chr1 298 Reverse 0 0 read041 +BND: chr1 266 Reverse chr1 285 Reverse 0 0 read041 +DEL: chr1 335 Forward chr1 350 Forward 0 0 read042 +DEL: chr1 335 Forward chr1 350 Forward 0 0 read043 +DEL: chr1 335 Forward chr1 350 Forward 0 0 read044 +DEL: chr1 335 Forward chr1 350 Forward 0 0 read045 +INS: chr1 367 Forward chr1 368 Forward 11 0 read046 +INS: chr1 367 Forward chr1 368 Forward 11 0 read047 +INS: chr1 367 Forward chr1 368 Forward 11 0 read048 +INS: chr1 367 Forward chr1 368 Forward 11 0 read049 +INS: chr1 367 Forward chr1 368 Forward 11 0 read050 +DEL: chr1 383 Forward chr1 395 Forward 0 0 read050 +BND: chr1 10 Reverse chr1 470 Reverse 0 0 read051 +BND: chr1 10 Reverse chr1 470 Reverse 0 0 read052 +BND: chr1 33 Forward chr1 471 Forward 0 0 read052 Start clustering... Done with clustering. Found 15 junction clusters. No refinement was selected.