diff --git a/doc/plots/CallerComparisonPlot.pdf b/doc/plots/CallerComparisonPlot.pdf index e1b5af82..29163558 100644 Binary files a/doc/plots/CallerComparisonPlot.pdf and b/doc/plots/CallerComparisonPlot.pdf differ diff --git a/doc/plots/hierarchical_clustering_cutoff.results.all.png b/doc/plots/hierarchical_clustering_cutoff.results.all.png index dbaee493..ad39ad4c 100755 Binary files a/doc/plots/hierarchical_clustering_cutoff.results.all.png and b/doc/plots/hierarchical_clustering_cutoff.results.all.png differ diff --git a/doc/plots/max_overlap.results.all.png b/doc/plots/max_overlap.results.all.png index 2460b093..bd5d005b 100755 Binary files a/doc/plots/max_overlap.results.all.png and b/doc/plots/max_overlap.results.all.png differ diff --git a/doc/plots/max_tol_deleted_length.results.all.png b/doc/plots/max_tol_deleted_length.results.all.png new file mode 100644 index 00000000..1992c58e Binary files /dev/null and b/doc/plots/max_tol_deleted_length.results.all.png differ diff --git a/doc/plots/max_tol_inserted_length.results.all.png b/doc/plots/max_tol_inserted_length.results.all.png index d409c632..cf6781f2 100755 Binary files a/doc/plots/max_tol_inserted_length.results.all.png and b/doc/plots/max_tol_inserted_length.results.all.png differ diff --git a/doc/plots/max_var_length.results.all.png b/doc/plots/max_var_length.results.all.png index bcfcffe3..5cde3901 100755 Binary files a/doc/plots/max_var_length.results.all.png and b/doc/plots/max_var_length.results.all.png differ diff --git a/doc/plots/min_var_length.results.all.png b/doc/plots/min_var_length.results.all.png index e61ec750..c7a3e27c 100755 Binary files a/doc/plots/min_var_length.results.all.png and b/doc/plots/min_var_length.results.all.png differ diff --git a/doc/plots/partition_max_distance.results.all.png b/doc/plots/partition_max_distance.results.all.png new file mode 100644 index 00000000..dda1f29f Binary files /dev/null and b/doc/plots/partition_max_distance.results.all.png differ diff --git a/include/iGenVar.hpp b/include/iGenVar.hpp index 1de60e88..c5d1352d 100644 --- a/include/iGenVar.hpp +++ b/include/iGenVar.hpp @@ -27,11 +27,13 @@ struct cmd_arguments // SV specifications: /* -k */ int32_t min_var_length = 30; /* -l */ int32_t max_var_length = 100000; - /* -m */ int32_t max_tol_inserted_length = 5; + /* -m */ int32_t max_tol_inserted_length = 50; + /* -e */ int32_t max_tol_deleted_length = 50; /* -n */ int32_t max_overlap = 10; /* -q */ int32_t min_qual = 1; // Clustering specifications: - /* -w */ double hierarchical_clustering_cutoff = 100; + /* -p */ int32_t partition_max_distance = 1000; + /* -w */ double hierarchical_clustering_cutoff = 0.5; /* x? */ // Refinement specifications: /* y, z? */ diff --git a/include/modules/clustering/hierarchical_clustering_method.hpp b/include/modules/clustering/hierarchical_clustering_method.hpp index c06efc7d..9df2f7ce 100644 --- a/include/modules/clustering/hierarchical_clustering_method.hpp +++ b/include/modules/clustering/hierarchical_clustering_method.hpp @@ -6,13 +6,18 @@ * The returned partitions contain junctions meeting the following criteria: * a) all junctions in a partition connect the same reference sequences, * b) all junctions in a partition have the same orientations, and - * c) the distance between corresponding mates of two neighboring junctions is at most 50bp. + * c) the distance between corresponding mates of two neighboring junctions is + * smaller or equal than the given partition_max_distance. * The junctions in each of the returned partitions are sorted even though * the partitions themselves are not returned in a particular order. * * \param[in] junctions - a vector of junctions (needs to be sorted) + * \param[in] partition_max_distance - maximum distance between junctions in the same partition + * + * \returns Returns unordered partitions of sorted junctions. */ -std::vector> partition_junctions(std::vector const & junctions); +std::vector> partition_junctions(std::vector const & junctions, + int32_t const partition_max_distance); /*! \brief Sub-partition an existing partition based on the second mate of each junction. * The junctions in each of the returned sub-partitions are sorted even though @@ -20,8 +25,12 @@ std::vector> partition_junctions(std::vector con * * \param[in] partition - a partition (i.e. a vector) of junctions (needs to be sorted * by their second mates) + * \param[in] partition_max_distance - maximum distance between junctions in the same partition + * + * \returns Returns unordered sub-partitions of sorted junctions based on the second mate of each junction. */ -std::vector> split_partition_based_on_mate2(std::vector const & partition); +std::vector> split_partition_based_on_mate2(std::vector const & partition, + int32_t const partition_max_distance); /*! \brief Compute the distance between two junctions. * For two junctions that connect the same reference sequences and have the same @@ -31,16 +40,23 @@ std::vector> split_partition_based_on_mate2(std::vector hierarchical_clustering_method(std::vector const & junctions, double clustering_cutoff); +std::vector hierarchical_clustering_method(std::vector const & junctions, + int32_t const partition_max_distance, + double clustering_cutoff); diff --git a/src/iGenVar.cpp b/src/iGenVar.cpp index ade13429..a434f624 100644 --- a/src/iGenVar.cpp +++ b/src/iGenVar.cpp @@ -94,6 +94,10 @@ void initialize_argument_parser(seqan3::argument_parser & parser, cmd_arguments "Specify what should be the longest tolerated inserted sequence at sites of non-INS SVs. " "This value needs to be non-negative.", seqan3::option_spec::advanced); + parser.add_option(args.max_tol_deleted_length, 'e', "max_tol_deleted_length", + "Specify what should be the longest tolerated deleted sequence at sites of non-DEL SVs. " + "This value needs to be non-negative.", + seqan3::option_spec::advanced); parser.add_option(args.max_overlap, 'n', "max_overlap", "Specify the maximum allowed overlap between two alignment segments. " "This value needs to be non-negative.", @@ -104,6 +108,10 @@ void initialize_argument_parser(seqan3::argument_parser & parser, cmd_arguments seqan3::option_spec::advanced); // Options - Clustering specifications: + parser.add_option(args.partition_max_distance, 'p', "partition_max_distance", + "Specify the maximum distance in bp between members of the same partition." + "This value needs to be non-negative.", + seqan3::option_spec::advanced); parser.add_option(args.hierarchical_clustering_cutoff, 'w', "hierarchical_clustering_cutoff", "Specify the distance cutoff for the hierarchical clustering. " "This value needs to be non-negative.", @@ -155,7 +163,9 @@ void detect_variants_in_alignment_file(cmd_arguments const & args) clusters = simple_clustering_method(junctions); break; case 1: // hierarchical clustering - clusters = hierarchical_clustering_method(junctions, args.hierarchical_clustering_cutoff); + clusters = hierarchical_clustering_method(junctions, + args.partition_max_distance, + args.hierarchical_clustering_cutoff); break; case 2: // self-balancing_binary_tree, seqan3::debug_stream << "The self-balancing binary tree clustering method is not yet implemented\n"; diff --git a/src/modules/clustering/hierarchical_clustering_method.cpp b/src/modules/clustering/hierarchical_clustering_method.cpp index d25a1a55..5cb2ce5f 100644 --- a/src/modules/clustering/hierarchical_clustering_method.cpp +++ b/src/modules/clustering/hierarchical_clustering_method.cpp @@ -8,7 +8,8 @@ #include "fastcluster.h" // for hclust_fast -std::vector> partition_junctions(std::vector const & junctions) +std::vector> partition_junctions(std::vector const & junctions, + int32_t const partition_max_distance) { // Partition based on mate 1 std::vector current_partition{}; @@ -25,13 +26,14 @@ std::vector> partition_junctions(std::vector con { if (junction.get_mate1().seq_name != current_partition.back().get_mate1().seq_name || junction.get_mate1().orientation != current_partition.back().get_mate1().orientation || - abs(junction.get_mate1().position - current_partition.back().get_mate1().position) > 50) + std::abs(junction.get_mate1().position - current_partition.back().get_mate1().position) + > partition_max_distance) { // Partition based on mate 2 std::sort(current_partition.begin(), current_partition.end(), [](Junction const & a, Junction const & b) { return a.get_mate2() < b.get_mate2(); }); - current_partition_splitted = split_partition_based_on_mate2(current_partition); + current_partition_splitted = split_partition_based_on_mate2(current_partition, partition_max_distance); for (std::vector partition : current_partition_splitted) { final_partitions.push_back(partition); @@ -46,7 +48,7 @@ std::vector> partition_junctions(std::vector con std::sort(current_partition.begin(), current_partition.end(), [](Junction a, Junction b) { return a.get_mate2() < b.get_mate2(); }); - current_partition_splitted = split_partition_based_on_mate2(current_partition); + current_partition_splitted = split_partition_based_on_mate2(current_partition, partition_max_distance); for (std::vector partition : current_partition_splitted) { final_partitions.push_back(partition); @@ -55,7 +57,8 @@ std::vector> partition_junctions(std::vector con return final_partitions; } -std::vector> split_partition_based_on_mate2(std::vector const & partition) +std::vector> split_partition_based_on_mate2(std::vector const & partition, + int32_t const partition_max_distance) { std::vector current_partition{}; std::vector> splitted_partition{}; @@ -70,7 +73,8 @@ std::vector> split_partition_based_on_mate2(std::vector 50) + std::abs(junction.get_mate2().position - current_partition.back().get_mate2().position) + > partition_max_distance) { std::sort(current_partition.begin(), current_partition.end()); splitted_partition.push_back(current_partition); @@ -87,25 +91,54 @@ std::vector> split_partition_based_on_mate2(std::vectorB (2bp inserted) - // Junction 2 with mates C and D: C------>D (5bp inserted) - // Distance = 1 (distance A-C) + 2 (distance B-D) + 0 (no tandem duplication) + 3 (absolute insertion size difference) - return (std::abs(lhs.get_mate1().position - rhs.get_mate1().position) + - std::abs(lhs.get_mate2().position - rhs.get_mate2().position) + - std::abs((int)(lhs.get_tandem_dup_count() - rhs.get_tandem_dup_count())) + - std::abs((int)(lhs.get_inserted_sequence().size() - rhs.get_inserted_sequence().size()))); + // lhs and rhs are intra-chromosomal adjacencies + if (lhs.get_mate1().seq_name == lhs.get_mate2().seq_name) + { + // the directed size is the directed distance between both mates + // the directed size is positive for insertions and negative for deletions + int32_t lhs_directed_size = lhs.get_inserted_sequence().size() + + lhs.get_mate1().position - + lhs.get_mate2().position; + int32_t rhs_directed_size = rhs.get_inserted_sequence().size() + + rhs.get_mate1().position - + rhs.get_mate2().position; + // lhs and rhs have the same type (either deletion/inversion or insertion) + if ((lhs_directed_size < 0 && rhs_directed_size < 0) || + (lhs_directed_size > 0 && rhs_directed_size > 0)) + { + double position_distance = std::abs(lhs.get_mate1().position - rhs.get_mate1().position) / 1000.0; + // TODO (irallia 01.09.2021): std::abs((int)(lhs.get_tandem_dup_count() - rhs.get_tandem_dup_count())) + double size_distance = ((double)(std::max(std::abs(lhs_directed_size), std::abs(rhs_directed_size))) / + (double)(std::min(std::abs(lhs_directed_size), std::abs(rhs_directed_size)))) - 1.0; + return position_distance + size_distance; + } + // lhs and rhs have different types + else + { + return std::numeric_limits::max(); + } + } + // lhs and rhs are inter-chromosomal adjacencies + else + { + double position_distance1 = std::abs(lhs.get_mate1().position - rhs.get_mate1().position) / 1000.0; + double position_distance2 = std::abs(lhs.get_mate2().position - rhs.get_mate2().position) / 1000.0; + // TODO (irallia 01.09.2021): std::abs((int)(lhs.get_tandem_dup_count() - rhs.get_tandem_dup_count())) + double size_distance = std::abs((double)(lhs.get_inserted_sequence().size() - rhs.get_inserted_sequence().size())) / 1000.0; + return position_distance1 + position_distance2 + size_distance; + } } else { - return std::numeric_limits::max(); + return std::numeric_limits::max(); } } @@ -118,9 +151,11 @@ inline std::vector subsample_partition(std::vector const & p return subsample; } -std::vector hierarchical_clustering_method(std::vector const & junctions, double clustering_cutoff) +std::vector hierarchical_clustering_method(std::vector const & junctions, + int32_t const partition_max_distance, + double clustering_cutoff) { - auto partitions = partition_junctions(junctions); + auto partitions = partition_junctions(junctions, partition_max_distance); std::vector clusters{}; // Set the maximum partition size that is still feasible to cluster in reasonable time // A trade-off between reducing runtime and keeping as many junctions as possible has to be made diff --git a/src/variant_detection/variant_detection.cpp b/src/variant_detection/variant_detection.cpp index 5b72cbef..74dd9986 100644 --- a/src/variant_detection/variant_detection.cpp +++ b/src/variant_detection/variant_detection.cpp @@ -90,11 +90,13 @@ void detect_junctions_in_short_reads_sam_file(std::vector & junctions, break; } } - - num_good++; - if (num_good % 1000 == 0) + if (gVerbose) { - seqan3::debug_stream << num_good << " good alignments from short read file." << std::endl; + ++num_good; + if (num_good % 100000 == 0) + { + seqan3::debug_stream << num_good << " good alignments from short read file." << std::endl; + } } } } @@ -174,10 +176,13 @@ void detect_junctions_in_long_reads_sam_file(std::vector & junctions, } } - num_good++; - if (num_good % 1000 == 0) + if (gVerbose) { - seqan3::debug_stream << num_good << " good alignments from long read file." << std::endl; + ++num_good; + if (num_good % 100000 == 0) + { + seqan3::debug_stream << num_good << " good alignments from long read file." << std::endl; + } } } } diff --git a/src/variant_detection/variant_output.cpp b/src/variant_detection/variant_output.cpp index 8c2f6569..ea5f538b 100644 --- a/src/variant_detection/variant_output.cpp +++ b/src/variant_detection/variant_output.cpp @@ -32,10 +32,11 @@ void find_and_output_variants(std::map & references_length size_t insert_size = clusters[i].get_average_inserted_sequence_size(); if (mate1.orientation == strand::forward) { - int32_t distance = mate2_pos - mate1_pos; - //Deletion - if (distance >= args.min_var_length && - distance <= args.max_var_length && + int32_t distance = mate2_pos - mate1_pos - 1; + int32_t sv_length = insert_size - distance; + // Deletion (sv_length is negative) + if (-sv_length >= args.min_var_length && + -sv_length <= args.max_var_length && insert_size <= args.max_tol_inserted_length) { variant_record tmp{}; @@ -45,15 +46,16 @@ void find_and_output_variants(std::map & references_length tmp.add_info("SVTYPE", "DEL"); // Increment position by 1 because VCF is 1-based tmp.set_pos(mate1_pos + 1); - tmp.add_info("SVLEN", std::to_string(-distance + 1)); + tmp.add_info("SVLEN", std::to_string(sv_length)); // Increment end by 1 because VCF is 1-based // Decrement end by 1 because deletion ends one base before mate2 begins tmp.add_info("END", std::to_string(mate2_pos)); tmp.print(out_stream); } - //Insertion - else if (distance == 1 && - insert_size >= args.min_var_length) + // Insertion (sv_length is positive) + else if (sv_length >= args.min_var_length && + sv_length <= args.max_var_length && + distance <= args.max_tol_deleted_length) { variant_record tmp{}; tmp.set_chrom(mate1.seq_name); @@ -62,7 +64,7 @@ void find_and_output_variants(std::map & references_length tmp.add_info("SVTYPE", "INS"); // Increment position by 1 because VCF is 1-based tmp.set_pos(mate1_pos + 1); - tmp.add_info("SVLEN", std::to_string(insert_size)); + tmp.add_info("SVLEN", std::to_string(sv_length)); // Increment end by 1 because VCF is 1-based tmp.add_info("END", std::to_string(mate1_pos + 1)); tmp.print(out_stream); diff --git a/test/api/clustering_test.cpp b/test/api/clustering_test.cpp index 19ca5ff3..c98dde17 100644 --- a/test/api/clustering_test.cpp +++ b/test/api/clustering_test.cpp @@ -12,9 +12,9 @@ using seqan3::operator""_dna5; std::string const chrom1 = "chr1"; int32_t const chrom1_position1 = 12323443; int32_t const chrom1_position2 = 94734377; -int32_t const chrom1_position3 = 112323345; std::string const chrom2 = "chr2"; int32_t const chrom2_position1 = 234432; +int32_t const chrom2_position2 = 112323345; size_t tandem_dup_count = 0; std::string const read_name_1 = "m2257/8161/CCS"; std::string const read_name_2 = "m41327/11677/CCS"; @@ -25,6 +25,8 @@ std::string const read_name_6 = "m1245/5634/CCS"; std::string const read_name_7 = "m8765/9765/CCS"; std::string const read_name_8 = "m13456/11102/CCS"; +constexpr int32_t default_partition_max_distance = 1000; + std::vector prepare_input_junctions() { std::vector input_junctions @@ -37,14 +39,14 @@ std::vector prepare_input_junctions() Breakend{chrom2, chrom2_position1 + 1, strand::forward}, ""_dna5, tandem_dup_count, read_name_3}, Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_4}, - Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, + Junction{Breakend{chrom1, chrom1_position1 + 1245, strand::forward}, Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_5}, Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, - Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, tandem_dup_count, read_name_6}, + Breakend{chrom2, chrom2_position2 + 8, strand::reverse}, ""_dna5, tandem_dup_count, read_name_6}, Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, - Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_7}, + Breakend{chrom2, chrom2_position2 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_7}, Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, - Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, tandem_dup_count, read_name_8} + Breakend{chrom2, chrom2_position2 + 2, strand::reverse}, ""_dna5, tandem_dup_count, read_name_8} }; std::sort(input_junctions.begin(), input_junctions.end()); @@ -60,37 +62,37 @@ TEST(simple_clustering, all_separate) // Each junction in separate cluster std::vector expected_clusters { - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, - Breakend{chrom2, chrom2_position1 + 8, strand::forward}, - ""_dna5, tandem_dup_count, read_name_1} + Cluster{{Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, + Breakend{chrom2, chrom2_position1 + 8, strand::forward}, + ""_dna5, tandem_dup_count, read_name_1} }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 2, strand::forward}, - Breakend{chrom2, chrom2_position1 - 3, strand::forward}, - ""_dna5, tandem_dup_count, read_name_2} + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 2, strand::forward}, + Breakend{chrom2, chrom2_position1 - 3, strand::forward}, + ""_dna5, tandem_dup_count, read_name_2} }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 9, strand::forward}, - Breakend{chrom2, chrom2_position1 + 1, strand::forward}, - ""_dna5, tandem_dup_count, read_name_3}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 9, strand::forward}, + Breakend{chrom2, chrom2_position1 + 1, strand::forward}, + ""_dna5, tandem_dup_count, read_name_3}, }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, - Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, - ""_dna5, tandem_dup_count, read_name_4}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, + Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_4}, }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, - Breakend{chrom2, chrom2_position1 + 3, strand::forward}, - ""_dna5, tandem_dup_count, read_name_5}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 1245, strand::forward}, + Breakend{chrom2, chrom2_position1 + 3, strand::forward}, + ""_dna5, tandem_dup_count, read_name_5}, }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, - Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, - ""_dna5, tandem_dup_count, read_name_6} + Cluster{{Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, + Breakend{chrom2, chrom2_position2 + 8, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_6} }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, - Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, - ""_dna5, tandem_dup_count, read_name_7} + Cluster{{Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, + Breakend{chrom2, chrom2_position2 - 1, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_7} }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, - Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, - ""_dna5, tandem_dup_count, read_name_8} + Cluster{{Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, + Breakend{chrom2, chrom2_position2 + 2, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_8} }} }; std::sort(expected_clusters.begin(), expected_clusters.end()); @@ -158,7 +160,7 @@ TEST(simple_clustering, empty_junction_vector) TEST(hierarchical_clustering, partitioning) { std::vector input_junctions = prepare_input_junctions(); - std::vector> partitions = partition_junctions(input_junctions); + std::vector> partitions = partition_junctions(input_junctions, default_partition_max_distance); std::vector> expected_partitions { @@ -175,16 +177,16 @@ TEST(hierarchical_clustering, partitioning) Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_4}, //cluster 2 }, { - Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, + Junction{Breakend{chrom1, chrom1_position1 + 1245, strand::forward}, Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_5}, //cluster 3 }, { Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, - Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, tandem_dup_count, read_name_6}, //cluster 4 + Breakend{chrom2, chrom2_position2 + 8, strand::reverse}, ""_dna5, tandem_dup_count, read_name_6}, //cluster 4 Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, - Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_7}, //cluster 4 + Breakend{chrom2, chrom2_position2 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_7}, //cluster 4 Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, - Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, tandem_dup_count, read_name_8} //cluster 4 + Breakend{chrom2, chrom2_position2 + 2, strand::reverse}, ""_dna5, tandem_dup_count, read_name_8} //cluster 4 } }; @@ -203,34 +205,42 @@ TEST(hierarchical_clustering, partitioning) TEST(hierarchical_clustering, strict_clustering) { std::vector input_junctions = prepare_input_junctions(); - std::vector clusters = hierarchical_clustering_method(input_junctions, 0); + std::vector clusters = hierarchical_clustering_method(input_junctions, default_partition_max_distance, 0); // Each junction in separate cluster std::vector expected_clusters { Cluster{{Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, - Breakend{chrom2, chrom2_position1 + 8, strand::forward}, ""_dna5, tandem_dup_count, read_name_1} + Breakend{chrom2, chrom2_position1 + 8, strand::forward}, + ""_dna5, tandem_dup_count, read_name_1} }}, Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 2, strand::forward}, - Breakend{chrom2, chrom2_position1 - 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_2} + Breakend{chrom2, chrom2_position1 - 3, strand::forward}, + ""_dna5, tandem_dup_count, read_name_2} }}, Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 9, strand::forward}, - Breakend{chrom2, chrom2_position1 + 1, strand::forward}, ""_dna5, tandem_dup_count, read_name_3}, + Breakend{chrom2, chrom2_position1 + 1, strand::forward}, + ""_dna5, tandem_dup_count, read_name_3}, }}, Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, - Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_4}, + Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_4}, }}, - Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, - Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_5}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 1245, strand::forward}, + Breakend{chrom2, chrom2_position1 + 3, strand::forward}, + ""_dna5, tandem_dup_count, read_name_5}, }}, Cluster{{Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, - Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, tandem_dup_count, read_name_6} + Breakend{chrom2, chrom2_position2 + 8, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_6} }}, Cluster{{Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, - Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_7} + Breakend{chrom2, chrom2_position2 - 1, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_7} }}, Cluster{{Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, - Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, tandem_dup_count, read_name_8} + Breakend{chrom2, chrom2_position2 + 2, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_8} }} }; std::sort(expected_clusters.begin(), expected_clusters.end()); @@ -258,9 +268,9 @@ TEST(hierarchical_clustering, strict_clustering) TEST(hierarchical_clustering, clustering_10) { std::vector input_junctions = prepare_input_junctions(); - std::vector clusters = hierarchical_clustering_method(input_junctions, 10); + std::vector clusters = hierarchical_clustering_method(input_junctions, default_partition_max_distance, 0.01); - // Distance matrix for junctions from reads 1-3 and 6-8 + // Position distance matrix for junctions from reads 1-3 and 6-8 (size distance is 0 for all pairs) // 1 2 3 // 1 18 21 // 2 11 @@ -269,31 +279,39 @@ TEST(hierarchical_clustering, clustering_10) // 6 14 14 // 7 6 - // Only junctions from reads 7 and 8 have a distance < 10 and cluster together + // Only junctions from reads 7 and 8 have a distance < 0.01 and cluster together std::vector expected_clusters { Cluster{{Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, - Breakend{chrom2, chrom2_position1 + 8, strand::forward}, ""_dna5, tandem_dup_count, read_name_1} + Breakend{chrom2, chrom2_position1 + 8, strand::forward}, + ""_dna5, tandem_dup_count, read_name_1} }}, Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 2, strand::forward}, - Breakend{chrom2, chrom2_position1 - 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_2} + Breakend{chrom2, chrom2_position1 - 3, strand::forward}, + ""_dna5, tandem_dup_count, read_name_2} }}, Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 9, strand::forward}, - Breakend{chrom2, chrom2_position1 + 1, strand::forward}, ""_dna5, tandem_dup_count, read_name_3}, + Breakend{chrom2, chrom2_position1 + 1, strand::forward}, + ""_dna5, tandem_dup_count, read_name_3}, }}, Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, - Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_4}, + Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_4}, }}, - Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, - Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_5}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 1245, strand::forward}, + Breakend{chrom2, chrom2_position1 + 3, strand::forward}, + ""_dna5, tandem_dup_count, read_name_5}, }}, Cluster{{Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, - Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, tandem_dup_count, read_name_6} + Breakend{chrom2, chrom2_position2 + 8, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_6} }}, Cluster{{Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, - Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_7}, + Breakend{chrom2, chrom2_position2 - 1, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_7}, Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, - Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, tandem_dup_count, read_name_8} + Breakend{chrom2, chrom2_position2 + 2, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_8} }} }; std::sort(expected_clusters.begin(), expected_clusters.end()); @@ -321,9 +339,9 @@ TEST(hierarchical_clustering, clustering_10) TEST(hierarchical_clustering, clustering_15) { std::vector input_junctions = prepare_input_junctions(); - std::vector clusters = hierarchical_clustering_method(input_junctions, 15); + std::vector clusters = hierarchical_clustering_method(input_junctions, default_partition_max_distance, 0.015); - // Distance matrix for junctions from reads 1-3 and 6-8 + // Position distance matrix for junctions from reads 1-3 and 6-8 (size distance is 0 for all pairs) // 1 2 3 // 1 18 21 // 2 11 @@ -332,29 +350,37 @@ TEST(hierarchical_clustering, clustering_15) // 6 14 14 // 7 6 - // Junctions from reads 6-8 and 2-3 have a distance < 15 and cluster together + // Junctions from reads 6-8 and 2-3 have a distance < 0.015 and cluster together std::vector expected_clusters { - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, - Breakend{chrom2, chrom2_position1 + 8, strand::forward}, ""_dna5, tandem_dup_count, read_name_1} + Cluster{{Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, + Breakend{chrom2, chrom2_position1 + 8, strand::forward}, + ""_dna5, tandem_dup_count, read_name_1} }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 2, strand::forward}, - Breakend{chrom2, chrom2_position1 - 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_2}, - Junction{Breakend{chrom1, chrom1_position1 + 9, strand::forward}, - Breakend{chrom2, chrom2_position1 + 1, strand::forward}, ""_dna5, tandem_dup_count, read_name_3}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 2, strand::forward}, + Breakend{chrom2, chrom2_position1 - 3, strand::forward}, + ""_dna5, tandem_dup_count, read_name_2}, + Junction{Breakend{chrom1, chrom1_position1 + 9, strand::forward}, + Breakend{chrom2, chrom2_position1 + 1, strand::forward}, + ""_dna5, tandem_dup_count, read_name_3}, }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, - Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_4}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 5, strand::forward}, + Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_4}, }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, - Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_5}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 1245, strand::forward}, + Breakend{chrom2, chrom2_position1 + 3, strand::forward}, + ""_dna5, tandem_dup_count, read_name_5}, }}, - Cluster{{ Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, - Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, ""_dna5, tandem_dup_count, read_name_6}, - Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, - Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_7}, - Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, - Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, ""_dna5, tandem_dup_count, read_name_8} + Cluster{{Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, + Breakend{chrom2, chrom2_position2 + 8, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_6}, + Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, + Breakend{chrom2, chrom2_position2 - 1, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_7}, + Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, + Breakend{chrom2, chrom2_position2 + 2, strand::reverse}, + ""_dna5, tandem_dup_count, read_name_8} }} }; std::sort(expected_clusters.begin(), expected_clusters.end()); @@ -382,9 +408,9 @@ TEST(hierarchical_clustering, clustering_15) TEST(hierarchical_clustering, clustering_25) { std::vector input_junctions = prepare_input_junctions(); - std::vector clusters = hierarchical_clustering_method(input_junctions, 25); + std::vector clusters = hierarchical_clustering_method(input_junctions, default_partition_max_distance, 0.025); - // Distance matrix for junctions from reads 1-3 and 6-8 + // Position distance matrix for junctions from reads 1-3 and 6-8 (size distance is 0 for all pairs) // 1 2 3 // 1 18 21 // 2 11 @@ -393,7 +419,7 @@ TEST(hierarchical_clustering, clustering_25) // 6 14 14 // 7 6 - // Junctions from reads 6-8 and 1-3 have a distance < 25 and cluster together + // Junctions from reads 6-8 and 1-3 have a distance < 0.025 and cluster together std::vector expected_clusters { Cluster{{Junction{Breakend{chrom1, chrom1_position1 - 5, strand::forward}, @@ -410,18 +436,18 @@ TEST(hierarchical_clustering, clustering_25) Breakend{chrom2, chrom2_position1 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_4}, }}, - Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 92, strand::forward}, + Cluster{{Junction{Breakend{chrom1, chrom1_position1 + 1245, strand::forward}, Breakend{chrom2, chrom2_position1 + 3, strand::forward}, ""_dna5, tandem_dup_count, read_name_5}, }}, Cluster{{Junction{Breakend{chrom1, chrom1_position2 - 2, strand::forward}, - Breakend{chrom2, chrom1_position3 + 8, strand::reverse}, + Breakend{chrom2, chrom2_position2 + 8, strand::reverse}, ""_dna5, tandem_dup_count, read_name_6}, Junction{Breakend{chrom1, chrom1_position2 + 3, strand::forward}, - Breakend{chrom2, chrom1_position3 - 1, strand::reverse}, + Breakend{chrom2, chrom2_position2 - 1, strand::reverse}, ""_dna5, tandem_dup_count, read_name_7}, Junction{Breakend{chrom1, chrom1_position2 + 6, strand::forward}, - Breakend{chrom2, chrom1_position3 + 2, strand::reverse}, + Breakend{chrom2, chrom2_position2 + 2, strand::reverse}, ""_dna5, tandem_dup_count, read_name_8} }} }; @@ -463,7 +489,7 @@ TEST(hierarchical_clustering, subsampling) std::sort(input_junctions.begin(), input_junctions.end()); testing::internal::CaptureStderr(); - std::vector clusters = hierarchical_clustering_method(input_junctions, 0); + std::vector clusters = hierarchical_clustering_method(input_junctions, default_partition_max_distance, 0); size_t num_junctions = 0; for (Cluster const & cluster : clusters) @@ -528,19 +554,21 @@ TEST(hierarchical_clustering, cluster_tandem_dup_count) << " unequal"; } - resulting_clusters = hierarchical_clustering_method(input_junctions, 0); // clustering_cutoff = 0 + std::vector expected_clusters_2 { input_junctions }; + + resulting_clusters = hierarchical_clustering_method(input_junctions, default_partition_max_distance, 0); // clustering_cutoff = 0 ASSERT_EQ(5, resulting_clusters.size()); - resulting_clusters = hierarchical_clustering_method(input_junctions, 1); // clustering_cutoff = 1 - ASSERT_EQ(expected_clusters.size(), resulting_clusters.size()); + resulting_clusters = hierarchical_clustering_method(input_junctions, default_partition_max_distance, 1); // clustering_cutoff = 1 + ASSERT_EQ(expected_clusters_2.size(), resulting_clusters.size()); - for (size_t cluster_index = 0; cluster_index < expected_clusters.size(); ++cluster_index) + for (size_t cluster_index = 0; cluster_index < expected_clusters_2.size(); ++cluster_index) { - EXPECT_TRUE(expected_clusters[cluster_index] == resulting_clusters[cluster_index]) << "Cluster " - << cluster_index - << " unequal"; + EXPECT_TRUE(expected_clusters_2[cluster_index] == resulting_clusters[cluster_index]) << "Cluster " + << cluster_index + << " unequal"; } - resulting_clusters = hierarchical_clustering_method(input_junctions, 10); // clustering_cutoff = 10 (default value) + resulting_clusters = hierarchical_clustering_method(input_junctions, default_partition_max_distance, 10); // clustering_cutoff = 10 (default value) ASSERT_EQ(1, resulting_clusters.size()); } diff --git a/test/api/detection_test.cpp b/test/api/detection_test.cpp index 406bad41..8973a0bc 100644 --- a/test/api/detection_test.cpp +++ b/test/api/detection_test.cpp @@ -431,10 +431,12 @@ TEST(junction_detection, analyze_sa_tag) sVirl_refinement_method, 10, // min_var_length, 1000000, // default max_var_length, - 5, // default max_tol_inserted_length, + 50, // default max_tol_inserted_length, + 50, // default max_tol_deleted_length, 0, // max_overlap, 1, // default min_qual, - 10}; // default hierarchical_clustering_cutoff + 1000, // default partition_max_distance + 0.5}; // default hierarchical_clustering_cutoff analyze_sa_tag(read_name, flag, chromosome, pos, mapq, test_cigar, seq, sa_tag, args, junctions_res); diff --git a/test/api/input_file_test.cpp b/test/api/input_file_test.cpp index 72ae6eb6..829a3c11 100644 --- a/test/api/input_file_test.cpp +++ b/test/api/input_file_test.cpp @@ -16,10 +16,12 @@ constexpr int16_t default_threads = 1; std::vector const default_methods{cigar_string, split_read, read_pairs, read_depth}; constexpr int32_t default_min_length = 30; constexpr int32_t default_max_var_length = 1000000; -constexpr int32_t default_max_tol_inserted_length = 5; +constexpr int32_t default_max_tol_inserted_length = 50; +constexpr int32_t default_max_tol_deleted_length = 50; constexpr int32_t default_max_overlap = 10; constexpr int32_t default_min_qual = 1; -constexpr double default_hierarchical_clustering_cutoff = 10; +constexpr int32_t default_partition_max_distance = 1000; +constexpr double default_hierarchical_clustering_cutoff = 0.5; // Explanation for the strings: // chr21\t41972615\tForward\tchr21\t41972616\tForward\t1\t1681 @@ -67,8 +69,10 @@ TEST(input_file, detect_junctions_in_short_read_sam_file) default_min_length, default_max_var_length, default_max_tol_inserted_length, + default_max_tol_deleted_length, default_max_overlap, default_min_qual, + default_partition_max_distance, default_hierarchical_clustering_cutoff}; detect_junctions_in_short_reads_sam_file(junctions_res, references_lengths, args); @@ -107,8 +111,10 @@ TEST(input_file, detect_junctions_in_long_reads_sam_file) default_min_length, default_max_var_length, default_max_tol_inserted_length, + default_max_tol_deleted_length, default_max_overlap, default_min_qual, + default_partition_max_distance, default_hierarchical_clustering_cutoff}; detect_junctions_in_long_reads_sam_file(junctions_res, references_lengths, args); @@ -221,8 +227,10 @@ TEST(input_file, long_read_sam_file_unsorted) default_min_length, default_max_var_length, default_max_tol_inserted_length, + default_max_tol_deleted_length, default_max_overlap, default_min_qual, + default_partition_max_distance, default_hierarchical_clustering_cutoff}; EXPECT_THROW(detect_junctions_in_long_reads_sam_file(junctions_res, references_lengths, @@ -298,8 +306,10 @@ TEST(input_file, short_and_long_read_sam_file_with_different_references_lengths) default_min_length, default_max_var_length, default_max_tol_inserted_length, + default_max_tol_deleted_length, default_max_overlap, default_min_qual, + default_partition_max_distance, default_hierarchical_clustering_cutoff}; EXPECT_NO_THROW(detect_junctions_in_short_reads_sam_file(junctions_res, references_lengths, args)); EXPECT_NO_THROW(detect_junctions_in_long_reads_sam_file(junctions_res, references_lengths, args)); diff --git a/test/benchmark/parameter_benchmarks/Snakefile b/test/benchmark/parameter_benchmarks/Snakefile index e1555a0b..b862b84a 100644 --- a/test/benchmark/parameter_benchmarks/Snakefile +++ b/test/benchmark/parameter_benchmarks/Snakefile @@ -6,8 +6,12 @@ rule all: parameter_name="max_var_length"), expand("results/parameter_benchmarks/plots/{parameter_name}.results.all.png", parameter_name="max_tol_inserted_length"), + expand("results/parameter_benchmarks/plots/{parameter_name}.results.all.png", + parameter_name="max_tol_deleted_length"), expand("results/parameter_benchmarks/plots/{parameter_name}.results.all.png", parameter_name="max_overlap"), + expand("results/parameter_benchmarks/plots/{parameter_name}.results.all.png", + parameter_name="partition_max_distance"), expand("results/parameter_benchmarks/plots/{parameter_name}.results.all.png", parameter_name="hierarchical_clustering_cutoff") @@ -84,25 +88,34 @@ rule cat_truvari_results_full: parameter_value=[1000, 10000, 100000, 1000000], # default: 100.000 min_qual=range(1, 51)), input3 = expand("results/parameter_benchmarks/truvari/max_tol_inserted_length/{parameter_value}_min_qual_{min_qual}/pr_rec.txt", - parameter_value=[1, 10, 100, 1000], # default: 5 + parameter_value=[1, 10, 100, 1000], # default: 50 min_qual=range(1, 51)), - input4 = expand("results/parameter_benchmarks/truvari/max_overlap/{parameter_value}_min_qual_{min_qual}/pr_rec.txt", + input4 = expand("results/parameter_benchmarks/truvari/max_tol_deleted_length/{parameter_value}_min_qual_{min_qual}/pr_rec.txt", + parameter_value=[1, 10, 100, 1000], # default: 50 + min_qual=range(1, 51)), + input5 = expand("results/parameter_benchmarks/truvari/max_overlap/{parameter_value}_min_qual_{min_qual}/pr_rec.txt", parameter_value=[1, 10, 100, 1000], # default: 10 min_qual=range(1, 51)), - input5 = expand("results/parameter_benchmarks/truvari/hierarchical_clustering_cutoff/{parameter_value}_min_qual_{min_qual}/pr_rec.txt", - parameter_value= [20, 50, 100, 1000, 10000], # default: 50 + input6 = expand("results/parameter_benchmarks/truvari/partition_max_distance/{parameter_value}_min_qual_{min_qual}/pr_rec.txt", + parameter_value=[10, 100, 1000, 10000], # default: 1000 + min_qual=range(1, 51)), + input7 = expand("results/parameter_benchmarks/truvari/hierarchical_clustering_cutoff/{parameter_value}_min_qual_{min_qual}/pr_rec.txt", + parameter_value= [0.2, 0.5, 1, 10], # default: 0.5 min_qual=range(1, 51)) output: summary1 = "results/parameter_benchmarks/truvari/min_var_length/all_results.txt", summary2 = "results/parameter_benchmarks/truvari/max_var_length/all_results.txt", summary3 = "results/parameter_benchmarks/truvari/max_tol_inserted_length/all_results.txt", - summary4 = "results/parameter_benchmarks/truvari/max_overlap/all_results.txt", - summary5 = "results/parameter_benchmarks/truvari/hierarchical_clustering_cutoff/all_results.txt" + summary4 = "results/parameter_benchmarks/truvari/max_tol_deleted_length/all_results.txt", + summary5 = "results/parameter_benchmarks/truvari/max_overlap/all_results.txt", + summary6 = "results/parameter_benchmarks/truvari/partition_max_distance/all_results.txt", + summary7 = "results/parameter_benchmarks/truvari/hierarchical_clustering_cutoff/all_results.txt" shell: """ cat {input.input1} > {output.summary1} && cat {input.input2} > {output.summary2} && \ cat {input.input3} > {output.summary3} && cat {input.input4} > {output.summary4} && \ - cat {input.input5} > {output.summary5} + cat {input.input5} > {output.summary5} && cat {input.input6} > {output.summary6} && \ + cat {input.input7} > {output.summary7} """ rule plot_pr_all_results: diff --git a/test/benchmark/parameter_benchmarks/plot_all.R b/test/benchmark/parameter_benchmarks/plot_all.R index 08553efa..e3c7d4c4 100644 --- a/test/benchmark/parameter_benchmarks/plot_all.R +++ b/test/benchmark/parameter_benchmarks/plot_all.R @@ -12,10 +12,14 @@ if (parameter_name == "min_var_length") { parameter_values = c(1000,10000,100000,1000000) } else if (parameter_name == "max_tol_inserted_length") { parameter_values = c(1,10,100,1000) +} else if (parameter_name == "max_tol_deleted_length") { + parameter_values = c(1,10,100,1000) } else if (parameter_name == "max_overlap") { parameter_values = c(1,10,100,1000) +} else if (parameter_name == "partition_max_distance") { + parameter_values = c(10,100,1000,10000) } else { # hierarchical_clustering_cutoff - parameter_values = c(20,50,100,1000,10000) + parameter_values = c(0.2,0.5,1,10) } res <- read_tsv(args[1], col_names = c(parameter_name, "min_qual", "metric", "value")) diff --git a/test/cli/iGenVar_cli_test.cpp b/test/cli/iGenVar_cli_test.cpp index aabfddf3..26414597 100644 --- a/test/cli/iGenVar_cli_test.cpp +++ b/test/cli/iGenVar_cli_test.cpp @@ -109,7 +109,11 @@ std::string const help_page_advanced " -m, --max_tol_inserted_length (signed 32 bit integer)\n" " Specify what should be the longest tolerated inserted sequence at\n" " sites of non-INS SVs. This value needs to be non-negative. Default:\n" - " 5.\n" + " 50.\n" + " -e, --max_tol_deleted_length (signed 32 bit integer)\n" + " Specify what should be the longest tolerated deleted sequence at\n" + " sites of non-DEL SVs. This value needs to be non-negative. Default:\n" + " 50.\n" " -n, --max_overlap (signed 32 bit integer)\n" " Specify the maximum allowed overlap between two alignment segments.\n" " This value needs to be non-negative. Default: 10.\n" @@ -117,9 +121,12 @@ std::string const help_page_advanced " Specify the minimum quality (amount of supporting reads) of a\n" " structural variant to be reported in the vcf output file. This value\n" " needs to be non-negative. Default: 1.\n" + " -p, --partition_max_distance (signed 32 bit integer)\n" + " Specify the maximum distance in bp between members of the same\n" + " partition.This value needs to be non-negative. Default: 1000.\n" " -w, --hierarchical_clustering_cutoff (double)\n" " Specify the distance cutoff for the hierarchical clustering. This\n" - " value needs to be non-negative. Default: 100.\n" + " value needs to be non-negative. Default: 0.5.\n" }; // std::string expected_res_default @@ -535,9 +542,9 @@ TEST_F(iGenVar_cli_test, dataset_single_end_mini_example) cli_test_result result = execute_app("iGenVar", "-j", data("single_end_mini_example.sam"), "--verbose", - "--method cigar_string --method split_read " - "--min_var_length 8 --max_var_length 400 " - "--hierarchical_clustering_cutoff 20"); + "--method cigar_string --method split_read", + "--min_var_length 8 --max_var_length 400", + "--hierarchical_clustering_cutoff 0.1"); // Check the output of junctions: seqan3::debug_stream << "Check the output of junctions... " << '\n';