From 95e3421ad5f39af183f8c89f32495384f2a48ded Mon Sep 17 00:00:00 2001 From: marehr Date: Thu, 3 Sep 2020 12:19:50 +0200 Subject: [PATCH 01/16] [INFRA] seqan3/std/* header files MUST NOT include any seqan3 header --- test/header/CMakeLists.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/header/CMakeLists.txt b/test/header/CMakeLists.txt index 3504aec5e8..de9df058b2 100644 --- a/test/header/CMakeLists.txt +++ b/test/header/CMakeLists.txt @@ -82,6 +82,15 @@ TEST(${header_test_name_safe}) {}") #endif") endif () + # seqan3/std/* must not include platform.hpp (and therefore any other seqan3 header) + # See https://github.com/seqan/product_backlog/issues/135 + if (header MATCHES "seqan3/std/") + file (APPEND "${header_target_source}" " +#ifdef SEQAN3_DOXYGEN_ONLY +#error \"The standard header '${header}' file MUST NOT include any other seqan3 header (except for seqan3/contrib)\" +#endif") + endif () + # test whether seqan3 has the visibility bug on lower gcc versions # https://github.com/seqan/seqan3/issues/1317 file (APPEND "${header_target_source}" " From 758fd29f9dd3a363696f4c19ca66ca3776b1f864 Mon Sep 17 00:00:00 2001 From: rrahn Date: Tue, 8 Sep 2020 13:55:59 +0200 Subject: [PATCH 02/16] [MISC] Update the simd benchmark to use correct alignment output configuration. --- .../alignment/global_affine_alignment_simd_benchmark.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/performance/alignment/global_affine_alignment_simd_benchmark.cpp b/test/performance/alignment/global_affine_alignment_simd_benchmark.cpp index 5c1d2454a7..24b9201d5d 100644 --- a/test/performance/alignment/global_affine_alignment_simd_benchmark.cpp +++ b/test/performance/alignment/global_affine_alignment_simd_benchmark.cpp @@ -78,6 +78,7 @@ void seqan3_affine_dna4_accelerated(benchmark::State & state, align_configs_t && BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, simd_with_score, seqan3::align_cfg::result{seqan3::with_score, seqan3::using_score_type}, + seqan3::align_cfg::output_score, seqan3::align_cfg::vectorised) ->UseRealTime() ->DenseRange(deviation_begin, deviation_end, deviation_step); @@ -85,6 +86,8 @@ BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, simd_with_end_position, seqan3::align_cfg::result{seqan3::with_end_positions, seqan3::using_score_type}, + seqan3::align_cfg::output_score, + seqan3::align_cfg::output_end_position, seqan3::align_cfg::vectorised) ->UseRealTime() ->DenseRange(deviation_begin, deviation_end, deviation_step); @@ -92,6 +95,7 @@ BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, simd_parallel_with_score, seqan3::align_cfg::result{seqan3::with_score, seqan3::using_score_type}, + seqan3::align_cfg::output_score, seqan3::align_cfg::vectorised, seqan3::align_cfg::parallel{get_number_of_threads()}) ->UseRealTime() @@ -100,6 +104,8 @@ BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, simd_parallel_with_end_position, seqan3::align_cfg::result{seqan3::with_end_positions, seqan3::using_score_type}, + seqan3::align_cfg::output_score, + seqan3::align_cfg::output_end_position, seqan3::align_cfg::vectorised, seqan3::align_cfg::parallel{get_number_of_threads()}) ->UseRealTime() From 80b2dc48b9f87ed3b9edf6e45f52e8032ab31e57 Mon Sep 17 00:00:00 2001 From: rrahn Date: Wed, 9 Sep 2020 12:55:38 +0200 Subject: [PATCH 03/16] [TEST] Add test for aa27 in global affine alignment. --- test/unit/alignment/pairwise/CMakeLists.txt | 1 + .../fixture/global_affine_unbanded.hpp | 103 ++++++++++++++++++ .../global_affine_unbanded_aa27_test.cpp | 21 ++++ 3 files changed, 125 insertions(+) create mode 100644 test/unit/alignment/pairwise/global_affine_unbanded_aa27_test.cpp diff --git a/test/unit/alignment/pairwise/CMakeLists.txt b/test/unit/alignment/pairwise/CMakeLists.txt index a19b8163b0..7151e4fb29 100644 --- a/test/unit/alignment/pairwise/CMakeLists.txt +++ b/test/unit/alignment/pairwise/CMakeLists.txt @@ -4,6 +4,7 @@ seqan3_test(alignment_result_test.cpp) seqan3_test(align_result_selector_test.cpp) seqan3_test(alignment_configurator_test.cpp) seqan3_test(global_affine_banded_test.cpp) +seqan3_test(global_affine_unbanded_aa27_test.cpp) seqan3_test(global_affine_unbanded_callback_test.cpp) seqan3_test(global_affine_unbanded_collection_callback_test.cpp) seqan3_test(global_affine_unbanded_collection_simd_test.cpp) diff --git a/test/unit/alignment/pairwise/fixture/global_affine_unbanded.hpp b/test/unit/alignment/pairwise/fixture/global_affine_unbanded.hpp index ff59078cc3..16f14db2ce 100644 --- a/test/unit/alignment/pairwise/fixture/global_affine_unbanded.hpp +++ b/test/unit/alignment/pairwise/fixture/global_affine_unbanded.hpp @@ -401,4 +401,107 @@ static auto dna4_match_4_mismatch_5_gap_1_open_10_both_empty = []() }; }(); +// ---------------------------------------------------------------------------- +// alignment fixtures using amino acid alphabet +// ---------------------------------------------------------------------------- + +inline constexpr auto config_blosum62_scheme = + seqan3::align_cfg::scoring_scheme{seqan3::aminoacid_scoring_scheme{seqan3::aminoacid_similarity_matrix::BLOSUM62}}; + +static auto aa27_blosum62_gap_1_open_10 = [] () +{ + using seqan3::operator""_aa27; + return seqan3::test::alignment::fixture::alignment_fixture + { + "FNQSAEYPDISHCGVMQLKWRATLGT"_aa27, + "EIKSDVLLHRWSMKNPGNILMIDVGMQVAESYFAT"_aa27, + align_config | config_blosum62_scheme, + -26, + "--------FNQSAEYP-DISHCGVMQLKWRATLGT", + "EIKSDVLLHRWSMKNPGNILMIDVGMQVAESYFAT", + seqan3::alignment_coordinate{seqan3::detail::column_index_type{0u}, seqan3::detail::row_index_type{0u}}, + seqan3::alignment_coordinate{seqan3::detail::column_index_type{26u}, seqan3::detail::row_index_type{35u}}, + std::vector + { + // e ,F ,N ,Q ,S ,A ,E ,Y ,P ,D ,I ,S ,H ,C ,G ,V ,M ,Q ,L ,K ,W ,R ,A ,T ,L ,G ,T , + /*e*/ 0 ,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31,-32,-33,-34,-35,-36, + /*E*/ -11,-3 ,-11,-10,-13,-15,-10,-18,-18,-16,-22,-20,-21,-25,-25,-26,-27,-24,-30,-27,-32,-30,-32,-33,-36,-36,-36, + /*I*/ -12,-11,-6 ,-14,-12,-14,-18,-11,-21,-21,-12,-23,-23,-22,-26,-22,-25,-29,-22,-31,-30,-33,-31,-33,-31,-37,-37, + /*K*/ -13,-15,-11,-5 ,-14,-13,-13,-19,-12,-21,-22,-12,-23,-24,-24,-26,-23,-24,-29,-17,-28,-28,-30,-31,-32,-33,-34, + /*S*/ -14,-15,-14,-11,-1 ,-12,-13,-14,-15,-12,-17,-18,-13,-20,-21,-22,-23,-23,-25,-26,-20,-28,-27,-29,-31,-32,-32, + /*D*/ -15,-17,-14,-14,-11,-3 ,-10,-15,-15,-9 ,-15,-17,-19,-16,-21,-23,-24,-23,-26,-26,-28,-22,-30,-28,-32,-32,-33, + /*V*/ -16,-16,-20,-16,-13,-11,-5 ,-11,-17,-18,-6 ,-17,-18,-19,-19,-17,-22,-23,-22,-25,-26,-27,-22,-29,-27,-31,-32, + /*L*/ -17,-16,-19,-19,-14,-14,-14,-6 ,-14,-18,-16,-8 ,-19,-19,-21,-18,-15,-24,-19,-24,-27,-28,-28,-23,-25,-31,-32, + /*L*/ -18,-17,-19,-20,-15,-15,-17,-15,-9 ,-18,-16,-18,-11,-20,-23,-20,-16,-17,-20,-21,-26,-29,-29,-29,-19,-29,-31, + /*H*/ -19,-19,-16,-19,-16,-17,-15,-15,-17,-10,-19,-17,-10,-14,-22,-23,-22,-16,-20,-21,-23,-26,-30,-31,-30,-21,-31, + /*R*/ -20,-22,-19,-15,-17,-17,-17,-17,-17,-19,-13,-20,-17,-13,-16,-25,-24,-21,-18,-18,-24,-18,-27,-30,-31,-32,-22, + /*W*/ -21,-19,-25,-21,-18,-19,-20,-15,-21,-21,-21,-16,-22,-19,-15,-19,-26,-26,-23,-21,-7 ,-18,-19,-20,-21,-22,-23, + /*S*/ -22,-23,-18,-24,-17,-17,-19,-21,-16,-21,-22,-17,-17,-23,-19,-17,-20,-26,-28,-23,-18,-8 ,-17,-18,-21,-21,-21, + /*M*/ -23,-22,-25,-18,-20,-18,-19,-20,-23,-19,-20,-23,-19,-18,-26,-18,-12,-20,-24,-25,-19,-19,-9 ,-18,-16,-22,-22, + /*K*/ -24,-26,-22,-24,-18,-21,-17,-21,-21,-24,-22,-20,-24,-22,-20,-28,-19,-11,-22,-19,-20,-17,-20,-10,-20,-18,-23, + /*N*/ -25,-27,-20,-22,-22,-20,-21,-19,-23,-20,-25,-21,-19,-27,-22,-23,-24,-19,-14,-22,-21,-20,-19,-20,-13,-20,-18, + /*P*/ -26,-28,-29,-21,-23,-23,-21,-24,-12,-23,-23,-25,-23,-22,-28,-24,-25,-23,-22,-15,-22,-22,-21,-20,-23,-15,-21, + /*G*/ -27,-29,-28,-29,-21,-23,-25,-24,-23,-13,-24,-23,-26,-26,-16,-27,-26,-24,-26,-24,-17,-23,-22,-23,-24,-17,-17, + /*N*/ -28,-30,-23,-28,-25,-23,-23,-27,-24,-22,-16,-23,-22,-29,-26,-19,-27,-25,-27,-26,-24,-17,-24,-22,-26,-24,-17, + /*I*/ -29,-28,-33,-26,-26,-26,-26,-24,-25,-25,-18,-18,-26,-23,-28,-23,-18,-26,-23,-28,-25,-25,-18,-25,-20,-28,-25, + /*L*/ -30,-29,-31,-32,-27,-27,-29,-27,-26,-26,-23,-20,-21,-27,-27,-27,-21,-20,-22,-25,-26,-26,-26,-19,-21,-24,-29, + /*M*/ -31,-30,-31,-31,-28,-28,-29,-30,-27,-27,-25,-24,-22,-22,-30,-26,-22,-21,-18,-23,-26,-27,-27,-27,-17,-24,-25, + /*I*/ -32,-31,-33,-34,-29,-29,-31,-30,-28,-28,-23,-27,-27,-23,-26,-27,-25,-25,-19,-21,-26,-28,-28,-28,-25,-21,-25, + /*D*/ -33,-35,-30,-33,-30,-31,-27,-32,-29,-22,-31,-23,-28,-30,-24,-29,-30,-25,-29,-20,-25,-28,-29,-29,-29,-26,-22, + /*V*/ -34,-34,-38,-32,-31,-30,-33,-28,-30,-30,-19,-30,-26,-29,-33,-20,-28,-31,-24,-31,-23,-28,-28,-29,-28,-32,-26, + /*G*/ -35,-37,-34,-37,-32,-31,-32,-34,-30,-31,-30,-19,-30,-29,-23,-31,-23,-30,-32,-26,-31,-25,-28,-30,-31,-22,-33, + /*M*/ -36,-35,-39,-34,-33,-33,-33,-33,-32,-32,-30,-30,-21,-31,-32,-22,-26,-23,-28,-33,-27,-32,-26,-29,-28,-33,-23, + /*Q*/ -37,-39,-35,-34,-34,-34,-31,-34,-33,-32,-32,-30,-30,-24,-33,-33,-22,-21,-25,-27,-33,-26,-33,-27,-31,-30,-34, + /*V*/ -38,-38,-42,-37,-35,-34,-36,-32,-34,-34,-29,-32,-33,-31,-27,-29,-32,-24,-20,-27,-30,-33,-26,-33,-26,-34,-30, + /*A*/ -39,-40,-40,-41,-36,-31,-35,-38,-33,-35,-34,-28,-34,-33,-31,-27,-30,-33,-25,-21,-30,-31,-29,-26,-34,-26,-34, + /*E*/ -40,-42,-40,-38,-37,-37,-26,-37,-36,-31,-35,-34,-28,-37,-35,-33,-29,-28,-32,-24,-24,-30,-32,-30,-29,-36,-27, + /*S*/ -41,-42,-41,-40,-34,-36,-37,-28,-37,-36,-33,-31,-35,-29,-37,-37,-34,-29,-30,-32,-27,-25,-29,-31,-32,-29,-35, + /*Y*/ -42,-38,-44,-42,-39,-36,-38,-30,-31,-38,-37,-35,-29,-37,-32,-38,-37,-35,-30,-32,-30,-29,-27,-31,-32,-35,-31, + /*F*/ -43,-36,-41,-45,-40,-41,-39,-35,-34,-34,-38,-37,-36,-31,-40,-33,-38,-37,-35,-33,-31,-33,-31,-29,-31,-35,-37, + /*A*/ -44,-45,-38,-42,-41,-36,-40,-41,-36,-36,-35,-37,-39,-36,-31,-40,-34,-38,-36,-36,-36,-32,-29,-31,-30,-31,-35, + /*T*/ -45,-46,-45,-39,-41,-41,-37,-42,-41,-37,-37,-34,-39,-40,-38,-31,-40,-35,-37,-37,-38,-37,-32,-24,-32,-32,-26 + }, + std::vector + { + // e ,F ,N ,Q ,S ,A ,E ,Y ,P ,D ,I ,S ,H ,C ,G ,V ,M ,Q ,L ,K ,W ,R ,A ,T ,L ,G ,T , + /*e*/ N ,L ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l , + /*E*/ U ,DUL,DUL,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,l ,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl, + /*I*/ u ,DUL,DUL,DUL,DUl,DUl,DUl,Dul,DUl,DUl,Dul,L ,DUl,Dul,l ,Dul,Dul,l ,Dul,l ,Dul,l ,DUl,DUl,Dul,l ,DUl, + /*K*/ u ,DuL,DUl,DuL,DUL,DUl,Dul,l ,Dul,l ,l ,Dul,L ,l ,Dul,l ,DUl,Dul,l ,Dul,L ,Dul,l ,l ,l ,Dul,l , + /*S*/ u ,DuL,Dul,DUL,DuL,L ,Dul,l ,l ,Dul,l ,DUl,Dul,l ,l ,l ,l ,DUl,l ,l ,DUl,l ,DUl,DUl,l ,DUl,DUl, + /*D*/ u ,DuL,Dul,DuL,DUL,DUL,DuL,l ,Dul,DUl,Dul,Dul,DUl,DUl,DUl,l ,l ,DUl,l ,Dul,l ,DUl,DUl,DUl,l ,DUl,DUl, + /*V*/ u ,DuL,DuL,Dul,uL ,DUL,DUL,DuL,Dul,DUl,Dul,DuL,l ,l ,DUl,Dul,Dul,l ,Dul,l ,l ,l ,Dul,l ,Dul,l ,Dul, + /*L*/ u ,DuL,DuL,ul ,ul ,DuL,DUL,DUL,DuL,l ,DUl,Dul,L ,Dul,l ,DUl,DUl,DUl,DUl,Dul,Dul,Dul,DUl,Dul,DUl,DUl,DUl, + /*L*/ u ,DuL,DuL,ul ,ul ,DuL,DuL,DUl,DUL,DuL,Dul,DUl,Dul,DuL,Dul,Dul,DUl,Dul,DUl,Dul,Dul,Dul,Dul,DUl,DUl,DUL,l , + /*H*/ u ,DuL,DuL,DuL,ul ,DuL,Dul,DuL,DUL,Dul,uL ,Dul,DUl,DuL,Dul,l ,Dul,DUl,Dul,DUl,Dul,Dul,l ,Dul,Ul ,DUl,DUL, + /*R*/ u ,DuL,Dul,DuL,uL ,Dul,Dul,DuL,DuL,DUL,Dul,DuL,DUl,DUl,DuL,Dul,Dul,DUl,DUl,DUl,DUL,Dul,DuL,l ,ul ,DUl,DUl, + /*W*/ u ,DuL,uL ,Dul,Dul,uL ,Dul,Dul,DuL,Dul,ul ,Dul,DuL,DUl,DUl,DuL,Dul,Dul,DUl,DUl,Dul,L ,l ,l ,l ,l ,l , + /*S*/ u ,DuL,Dul,uL ,Dul,DuL,DuL,ul ,Dul,DuL,ul ,Dul,DuL,DuL,DUl,DUl,DuL,Dul,Dul,Dul,Ul ,DUL,DUL,DUl,l ,DUl,Dul, + /*M*/ u ,DuL,DuL,Dul,uL ,Dul,DuL,Dul,Dul,Dul,DuL,Dul,Dul,DuL,DuL,DUl,DuL,DuL,Dul,l ,ul ,DUl,DUl,DUL,DUl,l ,DUl, + /*K*/ u ,DuL,Dul,DuL,Dul,DuL,Dul,DuL,Dul,Dul,Dul,Dul,DuL,Dul,Dul,DuL,DUl,DuL,DuL,Dul,ul ,Dul,DUl,DUl,DUL,Dul,Dul, + /*N*/ u ,DuL,Dul,DuL,ul ,Dul,DuL,Dul,DuL,Dul,uL ,Dul,DuL,DuL,Dul,Dul,ul ,DUl,DuL,DUL,ul ,Dul,Dul,DUl,Dul,DUL,Dul, + /*P*/ u ,uL ,Dul,Dul,DuL,Dul,Dul,DuL,Dul,L ,Dul,l ,Dul,Dul,l ,Dul,Dul,ul ,DUl,Dul,uL ,ul ,Dul,Dul,DUl,Dul,DUL, + /*G*/ u ,DuL,Dul,uL ,Dul,DuL,Dul,Dul,Ul ,DuL,L ,Dul,l ,Dul,Dul,L ,ul ,ul ,ul ,DUl,Dul,uL ,Dul,Dul,Dul,DUl,DuL, + /*N*/ u ,DuL,Dul,DuL,ul ,Dul,DuL,DuL,ul ,DUL,DuL,DuL,Dul,Dul,DUl,Dul,uL ,ul ,Dul,Dul,ul ,Dul,uL ,Dul,Dul,Dul,DUl, + /*I*/ u ,DuL,DuL,Dul,uL ,DuL,DuL,DuL,uL ,ul ,DUL,DuL,DuL,Dul,ul ,DUl,Dul,uL ,Dul,ul ,ul ,ul ,Dul,DuL,Dul,ul ,DUl, + /*L*/ u ,DuL,DuL,ul ,ul ,DuL,DuL,Dul,uL ,uL ,DuL,DUL,DuL,Dul,Dul,Dul,Dul,DuL,DuL,Dul,ul ,ul ,Dul,Dul,DuL,Dul,Dul, + /*M*/ u ,DuL,DuL,Dul,uL ,DuL,DuL,Dul,ul ,uL ,DuL,DuL,DuL,DuL,DuL,Dul,Dul,DuL,DuL,DuL,Dul,Dul,Dul,Dul,Dul,DuL,Dul, + /*I*/ u ,DuL,DuL,Dul,ul ,DuL,DuL,Dul,uL ,uL ,DuL,DuL,Dul,DUl,DuL,Dul,Dul,DuL,DUL,DuL,Dul,ul ,Dul,Dul,DUl,Dul,DuL, + /*D*/ u ,DuL,Dul,DuL,ul ,DuL,Dul,uL ,ul ,Dul,DuL,Dul,DuL,Dul,Dul,DuL,Dul,Dul,DuL,Dul,DuL,Dul,ul ,Dul,ul ,Dul,Dul, + /*V*/ u ,DuL,DuL,Dul,uL ,DuL,DuL,Dul,uL ,ul ,Dul,L ,Dul,Dul,Dul,Dul,DuL,ul ,Dul,DUl,Dul,DuL,Dul,Dul,Dul,Dul,Dul, + /*G*/ u ,DuL,Dul,uL ,Dul,DuL,DuL,ul ,Dul,DuL,Ul ,DuL,L ,Dul,Dul,Ul ,Dul,DuL,ul ,Dul,ul ,Dul,DuL,Dul,ul ,Dul,L , + /*M*/ u ,DuL,DuL,Dul,uL ,DuL,DuL,DuL,uL ,uL ,DuL,UL ,DuL,DuL,DUl,Dul,DUL,Dul,DuL,Dul,Dul,Dul,Dul,DuL,Dul,Ul ,Dul, + /*Q*/ u ,DuL,Dul,DuL,DuL,DuL,DuL,DuL,ul ,Dul,uL ,DuL,DUL,DuL,DuL,ul ,Dul,DuL,DuL,Dul,ul ,Dul,Dul,Dul,Dul,Dul,DUl, + /*V*/ u ,DuL,DuL,Dul,uL ,DuL,DuL,Dul,uL ,ul ,Dul,uL ,Dul,DUl,DuL,DuL,DUl,DUl,DuL,DuL,Dul,l ,Dul,Dul,Dul,Dul,Dul, + /*A*/ u ,DuL,Dul,uL ,Dul,DuL,DuL,Dul,Dul,uL ,ul ,DuL,DuL,Dul,Dul,Dul,DuL,Dul,DUl,DuL,DuL,Dul,Dul,Dul,Dul,Dul,DuL, + /*E*/ u ,DuL,Dul,DuL,uL ,DuL,DuL,DuL,ul ,Dul,ul ,Dul,Dul,uL ,Dul,Dul,Dul,DuL,uL ,DUl,DuL,DuL,Dul,Dul,Dul,Dul,Dul, + /*S*/ u ,DuL,Dul,DuL,DuL,DuL,DUl,Dul,uL ,Dul,Dul,Dul,DuL,Dul,DuL,Dul,Dul,Dul,DuL,Dul,DUl,DuL,DuL,Dul,Dul,Dul,Dul, + /*Y*/ u ,DuL,DuL,Dul,ul ,DuL,DuL,DUl,DuL,ul ,Dul,Dul,Dul,DuL,Dul,Dul,ul ,Dul,Dul,DuL,Dul,DUL,DuL,DuL,Dul,Dul,Dul, + /*F*/ u ,DuL,DuL,ul ,ul ,Dul,Dul,DuL,DuL,DuL,DuL,ul ,Dul,DuL,DuL,Dul,Dul,ul ,Dul,DuL,DuL,DuL,DUl,DuL,DuL,Dul,Dul, + /*A*/ u ,DuL,Dul,DuL,ul ,Dul,uL ,Dul,Dul,DuL,DuL,DuL,Dul,Dul,DuL,DuL,Dul,ul ,ul ,Dul,Dul,DuL,DuL,DuL,Dul,DuL,Dul, + /*T*/ u ,DuL,Dul,DuL,DuL,Dul,Dul,DuL,ul ,Dul,DuL,DuL,DuL,Dul,DUl,Dul,uL ,Dul,ul ,Dul,Dul,Dul,Dul,DuL,DuL,Dul,Dul + } + }; +}(); + } // namespace seqan3::test::alignment::fixture::global::affine::unbanded diff --git a/test/unit/alignment/pairwise/global_affine_unbanded_aa27_test.cpp b/test/unit/alignment/pairwise/global_affine_unbanded_aa27_test.cpp new file mode 100644 index 0000000000..7b7ca52837 --- /dev/null +++ b/test/unit/alignment/pairwise/global_affine_unbanded_aa27_test.cpp @@ -0,0 +1,21 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +#include + +#include + +#include "fixture/global_affine_unbanded.hpp" +#include "pairwise_alignment_single_test_template.hpp" + +using pairwise_global_affine_unbanded_testing_types = ::testing::Types< + pairwise_alignment_fixture<&seqan3::test::alignment::fixture::global::affine::unbanded::aa27_blosum62_gap_1_open_10> + >; + +INSTANTIATE_TYPED_TEST_SUITE_P(pairwise_global_affine_unbanded_aa27, + pairwise_alignment_test, + pairwise_global_affine_unbanded_testing_types, ); From 0afca7e353b5bc72b22fa220f2b55be7f76f452a Mon Sep 17 00:00:00 2001 From: rrahn Date: Wed, 9 Sep 2020 12:57:21 +0200 Subject: [PATCH 04/16] [FEATURE] Differentiate between simple and matrix simd score. Selects the correct simd scoring scheme based on the given matrix type. For the moment to test and implement the featurs of protein simd alignment we can use this simple static differentiation. Later this will be replaced by a dynamic dispatching mechanism. --- .../pairwise/alignment_configurator.hpp | 82 ++++++++++++++----- test/unit/alignment/pairwise/CMakeLists.txt | 1 + ...ine_unbanded_collection_simd_aa27_test.cpp | 39 +++++++++ 3 files changed, 103 insertions(+), 19 deletions(-) create mode 100644 test/unit/alignment/pairwise/global_affine_unbanded_collection_simd_aa27_test.cpp diff --git a/include/seqan3/alignment/pairwise/alignment_configurator.hpp b/include/seqan3/alignment/pairwise/alignment_configurator.hpp index e29924afd5..ae66735838 100644 --- a/include/seqan3/alignment/pairwise/alignment_configurator.hpp +++ b/include/seqan3/alignment/pairwise/alignment_configurator.hpp @@ -48,7 +48,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -546,22 +548,50 @@ struct alignment_configurator policy_affine_gap_recursion>; using result_builder_policy_t = policy_alignment_result_builder; - using alignment_method_t = typename std::conditional_t; + // ---------------------------------------------------------------------------- + // Configure scoring scheme policy + // ---------------------------------------------------------------------------- + + using alignment_method_t = std::conditional_t; using score_t = typename traits_t::score_type; - using alignment_scoring_scheme_t = - lazy_conditional_t, - typename traits_t::scoring_scheme_type>; + using scoring_scheme_t = typename traits_t::scoring_scheme_type; + constexpr bool is_aminoacid_scheme = is_type_specialisation_of_v; + + using simple_simd_scheme_t = lazy_conditional_t, + void>; + using matrix_simd_scheme_t = lazy_conditional_t, + void>; + + using alignment_scoring_scheme_t = std::conditional_t, + scoring_scheme_t>; using scoring_scheme_policy_t = policy_scoring_scheme; + + // ---------------------------------------------------------------------------- + // Configure alignment matrix policy + // ---------------------------------------------------------------------------- + using alignment_matrix_policy_t = policy_alignment_matrix>; + // ---------------------------------------------------------------------------- + // Configure alignment algorithm + // ---------------------------------------------------------------------------- + using algorithm_t = select_alignment_algorithm_t; - using alignment_scoring_scheme_t = - lazy_conditional_t>, - typename traits_t::scoring_scheme_type>; + using scoring_scheme_t = typename traits_t::scoring_scheme_type; + constexpr bool is_aminoacid_scheme = is_type_specialisation_of_v; + using alignment_type_t = typename std::conditional_t; + + using simple_simd_scheme_t = lazy_conditional_t, + void>; + using matrix_simd_scheme_t = lazy_conditional_t, + void>; + + using alignment_scoring_scheme_t = std::conditional_t, + scoring_scheme_t>; using scoring_scheme_policy_t = deferred_crtp_base; return configure_free_ends(cfg); diff --git a/test/unit/alignment/pairwise/CMakeLists.txt b/test/unit/alignment/pairwise/CMakeLists.txt index 7151e4fb29..21ac7b410e 100644 --- a/test/unit/alignment/pairwise/CMakeLists.txt +++ b/test/unit/alignment/pairwise/CMakeLists.txt @@ -7,6 +7,7 @@ seqan3_test(global_affine_banded_test.cpp) seqan3_test(global_affine_unbanded_aa27_test.cpp) seqan3_test(global_affine_unbanded_callback_test.cpp) seqan3_test(global_affine_unbanded_collection_callback_test.cpp) +seqan3_test(global_affine_unbanded_collection_simd_aa27_test.cpp) seqan3_test(global_affine_unbanded_collection_simd_test.cpp) seqan3_test(global_affine_unbanded_collection_test.cpp) seqan3_test(global_affine_unbanded_test.cpp) diff --git a/test/unit/alignment/pairwise/global_affine_unbanded_collection_simd_aa27_test.cpp b/test/unit/alignment/pairwise/global_affine_unbanded_collection_simd_aa27_test.cpp new file mode 100644 index 0000000000..b5d60aabf9 --- /dev/null +++ b/test/unit/alignment/pairwise/global_affine_unbanded_collection_simd_aa27_test.cpp @@ -0,0 +1,39 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +#include + +#include + +#include + +#include "fixture/global_affine_unbanded.hpp" +#include "pairwise_alignment_collection_test_template.hpp" + +namespace seqan3::test::alignment::collection::simd::global::affine::unbanded +{ + +static auto aa27_all_same = []() +{ + auto base_fixture = fixture::global::affine::unbanded::aa27_blosum62_gap_1_open_10; + using fixture_t = decltype(base_fixture); + + std::vector data{}; + for (size_t i = 0; i < 100; ++i) + data.push_back(base_fixture); + + return alignment_fixture_collection{base_fixture.config | seqan3::align_cfg::vectorised, data}; +}(); +} // namespace seqan3::test::alignment::collection::simd::global::affine::unbanded + +using pairwise_collection_simd_global_affine_unbanded_testing_types = ::testing::Types< + pairwise_alignment_fixture<&seqan3::test::alignment::collection::simd::global::affine::unbanded::aa27_all_same> + >; + +INSTANTIATE_TYPED_TEST_SUITE_P(pairwise_collection_simd_global_affine_unbanded_aa27, + pairwise_alignment_collection_test, + pairwise_collection_simd_global_affine_unbanded_testing_types, ); From e0472faa25abc73825eec3fd6a76550c1a081c32 Mon Sep 17 00:00:00 2001 From: rrahn Date: Wed, 9 Sep 2020 12:57:47 +0200 Subject: [PATCH 05/16] [TEST] Add initial microbenchmark for simd protein alignment. --- test/performance/alignment/CMakeLists.txt | 1 + ...ffine_alignment_protein_simd_benchmark.cpp | 142 ++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 test/performance/alignment/global_affine_alignment_protein_simd_benchmark.cpp diff --git a/test/performance/alignment/CMakeLists.txt b/test/performance/alignment/CMakeLists.txt index 27a54f41f0..3694b82d4c 100644 --- a/test/performance/alignment/CMakeLists.txt +++ b/test/performance/alignment/CMakeLists.txt @@ -1,5 +1,6 @@ seqan3_benchmark(global_affine_alignment_benchmark.cpp) seqan3_benchmark(global_affine_alignment_parallel_benchmark.cpp) +seqan3_benchmark(global_affine_alignment_protein_simd_benchmark.cpp) seqan3_benchmark(global_affine_alignment_simd_benchmark.cpp) seqan3_benchmark(local_affine_alignment_benchmark.cpp) seqan3_benchmark(edit_distance_unbanded_benchmark.cpp) diff --git a/test/performance/alignment/global_affine_alignment_protein_simd_benchmark.cpp b/test/performance/alignment/global_affine_alignment_protein_simd_benchmark.cpp new file mode 100644 index 0000000000..718f8a8db0 --- /dev/null +++ b/test/performance/alignment/global_affine_alignment_protein_simd_benchmark.cpp @@ -0,0 +1,142 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef SEQAN3_HAS_SEQAN2 + #include + #include +#endif + +constexpr auto aa_score_scheme = seqan3::aminoacid_scoring_scheme{seqan3::aminoacid_similarity_matrix::BLOSUM62}; +constexpr auto affine_cfg = seqan3::align_cfg::method_global{} | + seqan3::align_cfg::gap{seqan3::gap_scheme{seqan3::gap_score{-1}, + seqan3::gap_open_score{-10}}} | + seqan3::align_cfg::scoring_scheme{aa_score_scheme}; + +// Globally defined constants to ensure same test data. +inline constexpr size_t sequence_length = 150; +inline constexpr size_t set_size = 1024; + +// Range to test for sequence length variance +inline constexpr size_t deviation_begin = 0; +inline constexpr size_t deviation_end = 64; +inline constexpr size_t deviation_step = 8; + +// We don't know if the system supports hyper-threading so we use only half the threads so that the +// simd benchmark is likely to run on physical cores only. +uint32_t get_number_of_threads() +{ + uint32_t thread_count = std::thread::hardware_concurrency(); + return (thread_count == 1) ? thread_count : thread_count >> 1; +} + +// ============================================================================ +// affine; score; aa; collection +// ============================================================================ + +template +void seqan3_affine_aa_accelerated(benchmark::State & state, align_configs_t && ...configs) +{ + size_t sequence_length_variance = state.range(0); + auto data = seqan3::test::generate_sequence_pairs(sequence_length, + set_size, + sequence_length_variance); + + int64_t total = 0; + auto accelerate_config = (affine_cfg | ... | configs); + for (auto _ : state) + { + for (auto && res : seqan3::align_pairwise(data, accelerate_config)) + total += res.score(); + } + + state.counters["cells"] = seqan3::test::pairwise_cell_updates(data, affine_cfg); + state.counters["CUPS"] = seqan3::test::cell_updates_per_second(state.counters["cells"]); + state.counters["total"] = total; +} + +BENCHMARK_CAPTURE(seqan3_affine_aa_accelerated, + simd_with_score, + seqan3::align_cfg::result{seqan3::with_score, seqan3::using_score_type}, + seqan3::align_cfg::output_score, + seqan3::align_cfg::vectorised) + ->UseRealTime() + ->DenseRange(deviation_begin, deviation_end, deviation_step); + +BENCHMARK_CAPTURE(seqan3_affine_aa_accelerated, + simd_parallel_with_score, + seqan3::align_cfg::result{seqan3::with_score, seqan3::using_score_type}, + seqan3::align_cfg::output_score, + seqan3::align_cfg::vectorised, + seqan3::align_cfg::parallel{get_number_of_threads()}) + ->UseRealTime() + ->DenseRange(deviation_begin, deviation_end, deviation_step); + +#ifdef SEQAN3_HAS_SEQAN2 + +template +void seqan2_affine_aa_accelerated(benchmark::State & state, args_t && ...args) +{ + std::tuple captured_args{args...}; + + using scoring_scheme_t = seqan::Score>; + + size_t sequence_length_variance = state.range(0); + auto [vec1, vec2] = seqan3::test::generate_sequence_pairs_seqan2(sequence_length, + set_size, + sequence_length_variance); + auto exec = std::get<0>(captured_args); + setNumThreads(exec, std::get<1>(captured_args)); + + int64_t total = 0; + for (auto _ : state) + { + // In SeqAn2 the gap open contains already the gap extension costs, that's why we use -11 here. + auto res = seqan::globalAlignmentScore(exec, vec1, vec2, scoring_scheme_t{-1, -11}); + total = std::accumulate(seqan::begin(res), seqan::end(res), total); + } + + state.counters["cells"] = seqan3::test::pairwise_cell_updates(seqan3::views::zip(vec1, vec2), affine_cfg); + state.counters["CUPS"] = seqan3::test::cell_updates_per_second(state.counters["cells"]); + state.counters["total"] = total; +} + +// Note SeqAn2 has no parallel interface yet for computing the traceback as well. +BENCHMARK_CAPTURE(seqan2_affine_aa_accelerated, + simd_with_score, + seqan::ExecutionPolicy{}, + 1) + ->UseRealTime() + ->DenseRange(deviation_begin, deviation_end, deviation_step); + +BENCHMARK_CAPTURE(seqan2_affine_aa_accelerated, + simd_parallel_with_score, + seqan::ExecutionPolicy{}, + get_number_of_threads()) + ->UseRealTime() + ->DenseRange(deviation_begin, deviation_end, deviation_step); +#endif // SEQAN3_HAS_SEQAN2 + +// ============================================================================ +// instantiate tests +// ============================================================================ + +BENCHMARK_MAIN(); From 98f9ae1f62e4d4e04a3aac36dfb9bf36bf51a44e Mon Sep 17 00:00:00 2001 From: rrahn Date: Wed, 9 Sep 2020 14:01:19 +0200 Subject: [PATCH 06/16] [MISC] Refactor global alignment simd benchmark. Makes a test template for the alignment benchmark so we can reuse it for different benchmark settings. --- ...global_affine_alignment_simd_benchmark.cpp | 129 ++++++------------ ...fine_alignment_simd_benchmark_template.hpp | 100 ++++++++++++++ 2 files changed, 139 insertions(+), 90 deletions(-) create mode 100644 test/performance/alignment/global_affine_alignment_simd_benchmark_template.hpp diff --git a/test/performance/alignment/global_affine_alignment_simd_benchmark.cpp b/test/performance/alignment/global_affine_alignment_simd_benchmark.cpp index 5c1d2454a7..8782a57b0e 100644 --- a/test/performance/alignment/global_affine_alignment_simd_benchmark.cpp +++ b/test/performance/alignment/global_affine_alignment_simd_benchmark.cpp @@ -7,24 +7,16 @@ #include -#include -#include -#include +#include "global_affine_alignment_simd_benchmark_template.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include +// Range to test for sequence length variance +inline constexpr size_t deviation_begin = 0; +inline constexpr size_t deviation_end = 64; +inline constexpr size_t deviation_step = 8; -#ifdef SEQAN3_HAS_SEQAN2 - #include - #include -#endif +// ---------------------------------------------------------------------------- +// SeqAn3 +// ---------------------------------------------------------------------------- constexpr auto nt_score_scheme = seqan3::nucleotide_scoring_scheme{seqan3::match_score{4}, seqan3::mismatch_score{-5}}; @@ -33,73 +25,45 @@ constexpr auto affine_cfg = seqan3::align_cfg::method_global{} | seqan3::gap_open_score{-10}}} | seqan3::align_cfg::scoring_scheme{nt_score_scheme}; -// Globally defined constants to ensure same test data. -inline constexpr size_t sequence_length = 150; -inline constexpr size_t set_size = 1024; - -// Range to test for sequence length variance -inline constexpr size_t deviation_begin = 0; -inline constexpr size_t deviation_end = 64; -inline constexpr size_t deviation_step = 8; - -//We don't know if the system supports hyper-threading so we use only half the threads so that the -// simd benchmark is likely to run on physical cores only. -uint32_t get_number_of_threads() -{ - uint32_t thread_count = std::thread::hardware_concurrency(); - return (thread_count == 1) ? thread_count : thread_count >> 1; -} - -// ============================================================================ -// affine; score; dna4; collection -// ============================================================================ - -template -void seqan3_affine_dna4_accelerated(benchmark::State & state, align_configs_t && ...configs) -{ - size_t sequence_length_variance = state.range(0); - auto data = seqan3::test::generate_sequence_pairs(sequence_length, - set_size, - sequence_length_variance); - - int64_t total = 0; - auto accelerate_config = (affine_cfg | ... | configs); - for (auto _ : state) - { - for (auto && res : seqan3::align_pairwise(data, accelerate_config)) - total += res.score(); - } - - state.counters["cells"] = seqan3::test::pairwise_cell_updates(data, affine_cfg); - state.counters["CUPS"] = seqan3::test::cell_updates_per_second(state.counters["cells"]); - state.counters["total"] = total; -} - -BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, +BENCHMARK_CAPTURE(seqan3_affine_accelerated, simd_with_score, + seqan3::dna4{}, + affine_cfg, seqan3::align_cfg::result{seqan3::with_score, seqan3::using_score_type}, + seqan3::align_cfg::output_score, seqan3::align_cfg::vectorised) ->UseRealTime() ->DenseRange(deviation_begin, deviation_end, deviation_step); -BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, +BENCHMARK_CAPTURE(seqan3_affine_accelerated, simd_with_end_position, + seqan3::dna4{}, + affine_cfg, seqan3::align_cfg::result{seqan3::with_end_positions, seqan3::using_score_type}, + seqan3::align_cfg::output_score, + seqan3::align_cfg::output_end_position, seqan3::align_cfg::vectorised) ->UseRealTime() ->DenseRange(deviation_begin, deviation_end, deviation_step); -BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, +BENCHMARK_CAPTURE(seqan3_affine_accelerated, simd_parallel_with_score, + seqan3::dna4{}, + affine_cfg, seqan3::align_cfg::result{seqan3::with_score, seqan3::using_score_type}, seqan3::align_cfg::vectorised, + seqan3::align_cfg::output_score, seqan3::align_cfg::parallel{get_number_of_threads()}) ->UseRealTime() ->DenseRange(deviation_begin, deviation_end, deviation_step); -BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, +BENCHMARK_CAPTURE(seqan3_affine_accelerated, simd_parallel_with_end_position, + seqan3::dna4{}, + affine_cfg, seqan3::align_cfg::result{seqan3::with_end_positions, seqan3::using_score_type}, + seqan3::align_cfg::output_score, + seqan3::align_cfg::output_end_position, seqan3::align_cfg::vectorised, seqan3::align_cfg::parallel{get_number_of_threads()}) ->UseRealTime() @@ -107,43 +71,28 @@ BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, #ifdef SEQAN3_HAS_SEQAN2 -template -void seqan2_affine_dna4_accelerated(benchmark::State & state, args_t && ...args) -{ - std::tuple captured_args{args...}; - - size_t sequence_length_variance = state.range(0); - auto [vec1, vec2] = seqan3::test::generate_sequence_pairs_seqan2(sequence_length, - set_size, - sequence_length_variance); - auto exec = std::get<0>(captured_args); - setNumThreads(exec, std::get<1>(captured_args)); - - int64_t total = 0; - for (auto _ : state) - { - // In SeqAn2 the gap open contains already the gap extension costs, that's why we use -11 here. - auto res = seqan::globalAlignmentScore(exec, vec1, vec2, seqan::Score{4, -5, -1, -11}); - total = std::accumulate(seqan::begin(res), seqan::end(res), total); - } - - state.counters["cells"] = seqan3::test::pairwise_cell_updates(seqan3::views::zip(vec1, vec2), affine_cfg); - state.counters["CUPS"] = seqan3::test::cell_updates_per_second(state.counters["cells"]); - state.counters["total"] = total; -} +// ---------------------------------------------------------------------------- +// SeqAn2 +// ---------------------------------------------------------------------------- // Note SeqAn2 has no parallel interface yet for computing the traceback as well. -BENCHMARK_CAPTURE(seqan2_affine_dna4_accelerated, +BENCHMARK_CAPTURE(seqan2_affine_accelerated, simd_with_score, + seqan::Dna{}, + seqan::Score{4, -5, -1, -11}, seqan::ExecutionPolicy{}, - 1) + 1, + affine_cfg) ->UseRealTime() ->DenseRange(deviation_begin, deviation_end, deviation_step); -BENCHMARK_CAPTURE(seqan2_affine_dna4_accelerated, +BENCHMARK_CAPTURE(seqan2_affine_accelerated, simd_parallel_with_score, + seqan::Dna{}, + seqan::Score{4, -5, -1, -11}, seqan::ExecutionPolicy{}, - get_number_of_threads()) + get_number_of_threads(), + affine_cfg) ->UseRealTime() ->DenseRange(deviation_begin, deviation_end, deviation_step); #endif // SEQAN3_HAS_SEQAN2 diff --git a/test/performance/alignment/global_affine_alignment_simd_benchmark_template.hpp b/test/performance/alignment/global_affine_alignment_simd_benchmark_template.hpp new file mode 100644 index 0000000000..d53bca51d2 --- /dev/null +++ b/test/performance/alignment/global_affine_alignment_simd_benchmark_template.hpp @@ -0,0 +1,100 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef SEQAN3_HAS_SEQAN2 + #include + #include +#endif + +// Globally defined constants to ensure same test data. +inline constexpr size_t sequence_length = 150; +inline constexpr size_t set_size = 1024; + +// We don't know if the system supports hyper-threading so we use only half the threads so that the +// simd benchmark is likely to run on physical cores only. +uint32_t get_number_of_threads() +{ + uint32_t thread_count = std::thread::hardware_concurrency(); + return (thread_count == 1) ? thread_count : thread_count >> 1; +} + +// ---------------------------------------------------------------------------- +// seqan3 pairwise alignment +// ---------------------------------------------------------------------------- + +template +void seqan3_affine_accelerated(benchmark::State & state, alphabet_t, align_configs_t && ...configs) +{ + size_t sequence_length_variance = state.range(0); + auto data = seqan3::test::generate_sequence_pairs(sequence_length, + set_size, + sequence_length_variance); + + int64_t total = 0; + auto accelerate_config = (configs | ...); + for (auto _ : state) + { + for (auto && res : seqan3::align_pairwise(data, accelerate_config)) + total += res.score(); + } + + state.counters["cells"] = seqan3::test::pairwise_cell_updates(data, accelerate_config); + state.counters["CUPS"] = seqan3::test::cell_updates_per_second(state.counters["cells"]); + state.counters["total"] = total; +} + +#ifdef SEQAN3_HAS_SEQAN2 + +// ---------------------------------------------------------------------------- +// seqan2 pairwise alignment +// ---------------------------------------------------------------------------- + +template +void seqan2_affine_accelerated(benchmark::State & state, alphabet_t, args_t && ...args) +{ + std::tuple captured_args{args...}; + + size_t sequence_length_variance = state.range(0); + auto [vec1, vec2] = seqan3::test::generate_sequence_pairs_seqan2(sequence_length, + set_size, + sequence_length_variance); + + auto scoring_scheme = std::get<0>(captured_args); + auto exec = std::get<1>(captured_args); + setNumThreads(exec, std::get<2>(captured_args)); + + int64_t total = 0; + for (auto _ : state) + { + // In SeqAn2 the gap open contains already the gap extension costs, that's why we use -11 here. + auto res = seqan::globalAlignmentScore(exec, vec1, vec2, scoring_scheme); + total = std::accumulate(seqan::begin(res), seqan::end(res), total); + } + + state.counters["cells"] = seqan3::test::pairwise_cell_updates(seqan3::views::zip(vec1, vec2), + std::get<3>(captured_args)); + state.counters["CUPS"] = seqan3::test::cell_updates_per_second(state.counters["cells"]); + state.counters["total"] = total; +} + +#endif // SEQAN3_HAS_SEQAN2 From 96af05c63ffac863c4fc07feeff5c0a0457654ee Mon Sep 17 00:00:00 2001 From: rrahn Date: Wed, 9 Sep 2020 14:21:55 +0200 Subject: [PATCH 07/16] [TEST] Move the simd protein benchmark to the new test template. --- ...ffine_alignment_protein_simd_benchmark.cpp | 116 +++++------------- 1 file changed, 28 insertions(+), 88 deletions(-) diff --git a/test/performance/alignment/global_affine_alignment_protein_simd_benchmark.cpp b/test/performance/alignment/global_affine_alignment_protein_simd_benchmark.cpp index 718f8a8db0..e5ea8c06b4 100644 --- a/test/performance/alignment/global_affine_alignment_protein_simd_benchmark.cpp +++ b/test/performance/alignment/global_affine_alignment_protein_simd_benchmark.cpp @@ -7,23 +7,16 @@ #include -#include -#include -#include +#include "global_affine_alignment_simd_benchmark_template.hpp" -#include -#include -#include -#include -#include -#include -#include -#include +// Range to test for sequence length variance +inline constexpr size_t deviation_begin = 0; +inline constexpr size_t deviation_end = 64; +inline constexpr size_t deviation_step = 8; -#ifdef SEQAN3_HAS_SEQAN2 - #include - #include -#endif +// ---------------------------------------------------------------------------- +// SeqAn3 +// ---------------------------------------------------------------------------- constexpr auto aa_score_scheme = seqan3::aminoacid_scoring_scheme{seqan3::aminoacid_similarity_matrix::BLOSUM62}; constexpr auto affine_cfg = seqan3::align_cfg::method_global{} | @@ -31,58 +24,20 @@ constexpr auto affine_cfg = seqan3::align_cfg::method_global{} | seqan3::gap_open_score{-10}}} | seqan3::align_cfg::scoring_scheme{aa_score_scheme}; -// Globally defined constants to ensure same test data. -inline constexpr size_t sequence_length = 150; -inline constexpr size_t set_size = 1024; - -// Range to test for sequence length variance -inline constexpr size_t deviation_begin = 0; -inline constexpr size_t deviation_end = 64; -inline constexpr size_t deviation_step = 8; - -// We don't know if the system supports hyper-threading so we use only half the threads so that the -// simd benchmark is likely to run on physical cores only. -uint32_t get_number_of_threads() -{ - uint32_t thread_count = std::thread::hardware_concurrency(); - return (thread_count == 1) ? thread_count : thread_count >> 1; -} - -// ============================================================================ -// affine; score; aa; collection -// ============================================================================ - -template -void seqan3_affine_aa_accelerated(benchmark::State & state, align_configs_t && ...configs) -{ - size_t sequence_length_variance = state.range(0); - auto data = seqan3::test::generate_sequence_pairs(sequence_length, - set_size, - sequence_length_variance); - - int64_t total = 0; - auto accelerate_config = (affine_cfg | ... | configs); - for (auto _ : state) - { - for (auto && res : seqan3::align_pairwise(data, accelerate_config)) - total += res.score(); - } - - state.counters["cells"] = seqan3::test::pairwise_cell_updates(data, affine_cfg); - state.counters["CUPS"] = seqan3::test::cell_updates_per_second(state.counters["cells"]); - state.counters["total"] = total; -} - -BENCHMARK_CAPTURE(seqan3_affine_aa_accelerated, +BENCHMARK_CAPTURE(seqan3_affine_accelerated, simd_with_score, + seqan3::aa27{}, + affine_cfg, seqan3::align_cfg::result{seqan3::with_score, seqan3::using_score_type}, seqan3::align_cfg::output_score, seqan3::align_cfg::vectorised) ->UseRealTime() ->DenseRange(deviation_begin, deviation_end, deviation_step); -BENCHMARK_CAPTURE(seqan3_affine_aa_accelerated, +BENCHMARK_CAPTURE(seqan3_affine_accelerated, simd_parallel_with_score, + seqan3::aa27{}, + affine_cfg, seqan3::align_cfg::result{seqan3::with_score, seqan3::using_score_type}, seqan3::align_cfg::output_score, seqan3::align_cfg::vectorised, @@ -92,45 +47,30 @@ BENCHMARK_CAPTURE(seqan3_affine_aa_accelerated, #ifdef SEQAN3_HAS_SEQAN2 -template -void seqan2_affine_aa_accelerated(benchmark::State & state, args_t && ...args) -{ - std::tuple captured_args{args...}; - - using scoring_scheme_t = seqan::Score>; - - size_t sequence_length_variance = state.range(0); - auto [vec1, vec2] = seqan3::test::generate_sequence_pairs_seqan2(sequence_length, - set_size, - sequence_length_variance); - auto exec = std::get<0>(captured_args); - setNumThreads(exec, std::get<1>(captured_args)); - - int64_t total = 0; - for (auto _ : state) - { - // In SeqAn2 the gap open contains already the gap extension costs, that's why we use -11 here. - auto res = seqan::globalAlignmentScore(exec, vec1, vec2, scoring_scheme_t{-1, -11}); - total = std::accumulate(seqan::begin(res), seqan::end(res), total); - } +// ---------------------------------------------------------------------------- +// SeqAn2 +// ---------------------------------------------------------------------------- - state.counters["cells"] = seqan3::test::pairwise_cell_updates(seqan3::views::zip(vec1, vec2), affine_cfg); - state.counters["CUPS"] = seqan3::test::cell_updates_per_second(state.counters["cells"]); - state.counters["total"] = total; -} +using scoring_scheme_t = seqan::Score>; // Note SeqAn2 has no parallel interface yet for computing the traceback as well. -BENCHMARK_CAPTURE(seqan2_affine_aa_accelerated, +BENCHMARK_CAPTURE(seqan2_affine_accelerated, simd_with_score, + seqan::AminoAcid{}, + scoring_scheme_t{-1, -11}, seqan::ExecutionPolicy{}, - 1) + 1, + affine_cfg) ->UseRealTime() ->DenseRange(deviation_begin, deviation_end, deviation_step); -BENCHMARK_CAPTURE(seqan2_affine_aa_accelerated, +BENCHMARK_CAPTURE(seqan2_affine_accelerated, simd_parallel_with_score, + seqan::AminoAcid{}, + scoring_scheme_t{-1, -11}, seqan::ExecutionPolicy{}, - get_number_of_threads()) + get_number_of_threads(), + affine_cfg) ->UseRealTime() ->DenseRange(deviation_begin, deviation_end, deviation_step); #endif // SEQAN3_HAS_SEQAN2 From 4eec66a153d0afd5f70c5528cf3bd961e75418e0 Mon Sep 17 00:00:00 2001 From: rrahn Date: Tue, 8 Sep 2020 09:23:56 +0200 Subject: [PATCH 08/16] [FIX] Track correct coordinate in banded alignment. When tracking the last cell of the banded column computation the matrix iterator was referring to the wrong alignment cell. --- .../pairwise/alignment_configurator.hpp | 2 +- .../pairwise_alignment_algorithm_banded.hpp | 51 ++++++++++++++++--- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/include/seqan3/alignment/pairwise/alignment_configurator.hpp b/include/seqan3/alignment/pairwise/alignment_configurator.hpp index e29924afd5..d72fdac1ec 100644 --- a/include/seqan3/alignment/pairwise/alignment_configurator.hpp +++ b/include/seqan3/alignment/pairwise/alignment_configurator.hpp @@ -523,7 +523,7 @@ struct alignment_configurator if constexpr (traits_t::is_local || // it is a local alignment, traits_t::is_debug || // it runs in debug mode, (traits_t::compute_begin_positions || traits_t::compute_sequence_alignment) || // it computes more than the end position. - (traits_t::is_banded && traits_t::compute_end_positions) || // banded + (traits_t::is_banded && traits_t::compute_begin_positions) || // banded (traits_t::is_vectorised && traits_t::is_banded) || // it is vectorised and banded, (traits_t::is_vectorised && traits_t::compute_end_positions)) // simd and more than the score. { diff --git a/include/seqan3/alignment/pairwise/detail/pairwise_alignment_algorithm_banded.hpp b/include/seqan3/alignment/pairwise/detail/pairwise_alignment_algorithm_banded.hpp index 4c51aea1ba..235cac3999 100644 --- a/include/seqan3/alignment/pairwise/detail/pairwise_alignment_algorithm_banded.hpp +++ b/include/seqan3/alignment/pairwise/detail/pairwise_alignment_algorithm_banded.hpp @@ -228,7 +228,7 @@ class pairwise_alignment_algorithm_banded : } /*!\brief Computes a column of the band that does not start in the first row of the alignment matrix. - * \tparam alignment_column_t The type of the alignment column; must model std::ranges::input_range. + * \tparam alignment_column_t The type of the alignment column; must model std::ranges::forward_range. * \tparam cell_index_column_t The type of the indexed column; must model std::ranges::input_range. * \tparam sequence1_value_t The value type of sequence1; must model seqan3::semialphabet. * \tparam sequence2_t The type of the second sequence; must model std::ranges::input_range. @@ -245,8 +245,40 @@ class pairwise_alignment_algorithm_banded : * phase the first cell of the column is computed and in the iteration phase all remaining cells are computed. * In the final phase the last cell is possibly evaluated for a new alignment optimum. * Note that the length of `sequence2` determines the size of the column. + * + * ### Implementation with a score matrix using linear memory + * + * When the score matrix uses only linear memory, i.e. only one column is stored, the algorithm reuses the cells of + * the same column to compute the current one. This means, before the current cell is updated its value is + * cached and used as the previous diagonal value when computing the next cell below. + * In the banded case, however, the position of the referenced cell is shifted by one and needs a different + * handling. + * + * ``` + * 0 1 2 3 4 5 6 + * _____________ + * 0 |0|0|0|\ | + * 1 |1|1|1|0|\ | + * 2 |\|2|2|1|0|\ | + * 3 | \|3|2|1|0|\| + * 4 | \|3|2|1|0| + * 5 | \|3|2|1| + * ––––––––––––– + * + * ``` + * The picture above depicts the banded matrix with the indices of the column. As long as the band touches + * the first row (column 0 - 2), the indices of the actual stored column refer to the same position as the + * previous column. Hence, to compute these columns the regular algorithm + * seqan3::detail::pairwise_alignment_algorithm::compute_column can be used. + * Starting at column 3 the first cell of the band moves downwards, causing a shift in the position of the previous + * cell. In this state, the value of the current cell represents the previous diagonal value before it is updated. + * To read the previous horizontal value the next cell below has to be dereferenced. + * Accordingly, two iterators are used to point to the respective cells in the matrix. The first one points to the + * current cell (the one that is written to) and the second points to the next cell (the one where the + * horizontal and vertical scores are read from). After computing the last cell of the column the value of the + * current iterator can be used to track the score of the cell. */ - template @@ -259,13 +291,15 @@ class pairwise_alignment_algorithm_banded : // Initial phase: prepare column and initialise first cell // --------------------------------------------------------------------- - auto alignment_column_it = alignment_column.begin(); + auto current_alignment_column_it = alignment_column.begin(); auto cell_index_column_it = cell_index_column.begin(); - auto cell = *alignment_column_it; + // Points to the last valid cell in the column. + decltype(current_alignment_column_it) next_alignment_column_it{current_alignment_column_it}; + auto cell = *current_alignment_column_it; cell = this->track_cell( this->initialise_band_first_cell(cell.best_score(), - *++alignment_column_it, + *++next_alignment_column_it, this->scoring_scheme.score(sequence1_value, *std::ranges::begin(sequence2))), *cell_index_column_it); @@ -276,10 +310,11 @@ class pairwise_alignment_algorithm_banded : for (auto && sequence2_value : sequence2 | views::drop(1)) { - auto cell = *alignment_column_it; + current_alignment_column_it = next_alignment_column_it; + auto cell = *current_alignment_column_it; cell = this->track_cell( this->compute_inner_cell(cell.best_score(), - *++alignment_column_it, + *++next_alignment_column_it, this->scoring_scheme.score(sequence1_value, sequence2_value)), *++cell_index_column_it); } @@ -288,7 +323,7 @@ class pairwise_alignment_algorithm_banded : // Final phase: track last cell // --------------------------------------------------------------------- - this->track_last_row_cell(*alignment_column_it, *cell_index_column_it); + this->track_last_row_cell(*current_alignment_column_it, *cell_index_column_it); } }; From 13f00dda6445d999ee2d65a4ab5fc3be6ba53cf2 Mon Sep 17 00:00:00 2001 From: rrahn Date: Tue, 8 Sep 2020 15:01:28 +0200 Subject: [PATCH 09/16] [MISC] Refactor semi-global banded alignment test configs. --- .../fixture/semi_global_affine_banded.hpp | 66 ++++++++++--------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/test/unit/alignment/pairwise/fixture/semi_global_affine_banded.hpp b/test/unit/alignment/pairwise/fixture/semi_global_affine_banded.hpp index f56d126c92..d6de9bc3e3 100644 --- a/test/unit/alignment/pairwise/fixture/semi_global_affine_banded.hpp +++ b/test/unit/alignment/pairwise/fixture/semi_global_affine_banded.hpp @@ -27,27 +27,39 @@ using seqan3::operator""_dna4; namespace seqan3::test::alignment::fixture::semi_global::affine::banded { -inline constexpr auto align_config = seqan3::align_cfg::gap{seqan3::gap_scheme{seqan3::gap_score{-1}, - seqan3::gap_open_score{-10}}} | - seqan3::align_cfg::band_fixed_size{seqan3::align_cfg::lower_diagonal{-4}, - seqan3::align_cfg::upper_diagonal{8}}; - -inline constexpr auto align_config_semi_seq1 = align_config | - seqan3::align_cfg::method_global{ - seqan3::align_cfg::free_end_gaps_sequence1_leading{true}, - seqan3::align_cfg::free_end_gaps_sequence2_leading{false}, - seqan3::align_cfg::free_end_gaps_sequence1_trailing{true}, - seqan3::align_cfg::free_end_gaps_sequence2_trailing{false} - } | - seqan3::align_cfg::aligned_ends{seqan3::free_ends_first}; -inline constexpr auto align_config_semi_seq2 = align_config | - seqan3::align_cfg::method_global{ - seqan3::align_cfg::free_end_gaps_sequence1_leading{false}, - seqan3::align_cfg::free_end_gaps_sequence2_leading{true}, - seqan3::align_cfg::free_end_gaps_sequence1_trailing{false}, - seqan3::align_cfg::free_end_gaps_sequence2_trailing{true} - } | - seqan3::align_cfg::aligned_ends{seqan3::free_ends_second}; +inline constexpr auto config_band_m4_8 = seqan3::align_cfg::band_fixed_size{seqan3::align_cfg::lower_diagonal{-4}, + seqan3::align_cfg::upper_diagonal{8}}; + +inline constexpr auto config_gap = seqan3::align_cfg::gap{seqan3::gap_scheme{seqan3::gap_score{-1}, + seqan3::gap_open_score{-10}}}; + +inline constexpr auto config_semi_seq1 = seqan3::align_cfg::method_global{ + seqan3::align_cfg::free_end_gaps_sequence1_leading{true}, + seqan3::align_cfg::free_end_gaps_sequence2_leading{false}, + seqan3::align_cfg::free_end_gaps_sequence1_trailing{true}, + seqan3::align_cfg::free_end_gaps_sequence2_trailing{false} + } | seqan3::align_cfg::aligned_ends{seqan3::free_ends_first}; + +inline constexpr auto config_semi_seq2 = seqan3::align_cfg::method_global{ + seqan3::align_cfg::free_end_gaps_sequence1_leading{false}, + seqan3::align_cfg::free_end_gaps_sequence2_leading{true}, + seqan3::align_cfg::free_end_gaps_sequence1_trailing{false}, + seqan3::align_cfg::free_end_gaps_sequence2_trailing{true} + } | seqan3::align_cfg::aligned_ends{seqan3::free_ends_second}; + +// free gaps for the second leading and first trailing gaps. +inline constexpr auto config_free_gaps_sl_ft = seqan3::align_cfg::method_global{ + seqan3::align_cfg::free_end_gaps_sequence1_leading{false}, + seqan3::align_cfg::free_end_gaps_sequence2_leading{true}, + seqan3::align_cfg::free_end_gaps_sequence1_trailing{true}, + seqan3::align_cfg::free_end_gaps_sequence2_trailing{false} + } | seqan3::align_cfg::aligned_ends{ + seqan3::end_gaps{seqan3::front_end_second{std::true_type{}}, + seqan3::back_end_first{std::true_type{}}}}; + +inline constexpr auto config_scoring_m4_mm5 = seqan3::align_cfg::scoring_scheme{ + seqan3::nucleotide_scoring_scheme{seqan3::match_score{4}, + seqan3::mismatch_score{-5}}}; static auto dna4_01_semi_first = []() { @@ -55,9 +67,7 @@ static auto dna4_01_semi_first = []() { "TTTTTACGTATGTCCCCC"_dna4, "ACGTAAAACGT"_dna4, - align_config_semi_seq1 - | seqan3::align_cfg::scoring_scheme{seqan3::nucleotide_scoring_scheme{seqan3::match_score{4}, - seqan3::mismatch_score{-5}}}, + config_semi_seq1 | config_gap | config_band_m4_8 | config_scoring_m4_mm5, 10, "ACGT---ATGT", "ACGTAAAACGT", @@ -124,9 +134,7 @@ static auto dna4_03_semi_second = []() { "TTTTTACGTATGTCCCCC"_dna4, "ACGTAAAACGT"_dna4, - align_config_semi_seq2 - | seqan3::align_cfg::scoring_scheme{seqan3::nucleotide_scoring_scheme{seqan3::match_score{4}, - seqan3::mismatch_score{-5}}}, + config_semi_seq2 | config_gap | config_band_m4_8 | config_scoring_m4_mm5, -19, "TTTTTACGTATGTCCCCC", "GTAAAACGT---------", @@ -173,9 +181,7 @@ static auto dna4_04_semi_second = []() { "ACGTAAAACGT"_dna4, "TTTTTACGTATGTCCCCC"_dna4, - align_config_semi_seq2 - | seqan3::align_cfg::scoring_scheme{seqan3::nucleotide_scoring_scheme{seqan3::match_score{4}, - seqan3::mismatch_score{-5}}}, + config_semi_seq2 | config_gap | config_band_m4_8 | config_scoring_m4_mm5, -5, "ACGTAAAACGT", "------TACGT", From c392f451f61fcd3898ae2807236213e2666679ae Mon Sep 17 00:00:00 2001 From: rrahn Date: Tue, 8 Sep 2020 15:08:32 +0200 Subject: [PATCH 10/16] [FIX] Do not track last cell in column if not in last row of matrix. Before, the algorithm always tracked the last cell of the current column irrespective of its position within the global matrix. This means, that the optimum could point to a cell that does not represent the full sequence and hence would not be a valid semi-gobal alignment. --- .../pairwise/alignment_configurator.hpp | 6 +- .../pairwise_alignment_algorithm_banded.hpp | 4 + .../detail/policy_optimum_tracker.hpp | 96 +++++++++++++++++++ .../fixture/semi_global_affine_banded.hpp | 47 +++++++++ .../semi_global_affine_banded_test.cpp | 3 +- 5 files changed, 154 insertions(+), 2 deletions(-) diff --git a/include/seqan3/alignment/pairwise/alignment_configurator.hpp b/include/seqan3/alignment/pairwise/alignment_configurator.hpp index d72fdac1ec..a591da61b1 100644 --- a/include/seqan3/alignment/pairwise/alignment_configurator.hpp +++ b/include/seqan3/alignment/pairwise/alignment_configurator.hpp @@ -536,10 +536,14 @@ struct alignment_configurator } else // Use new alignment algorithm implementation. { + using scalar_optimum_updater_t = std::conditional_t; + using optimum_tracker_policy_t = lazy_conditional_t, - lazy>; + lazy>; using gap_cost_policy_t = std::conditional_t, diff --git a/include/seqan3/alignment/pairwise/detail/pairwise_alignment_algorithm_banded.hpp b/include/seqan3/alignment/pairwise/detail/pairwise_alignment_algorithm_banded.hpp index 235cac3999..fbcb326170 100644 --- a/include/seqan3/alignment/pairwise/detail/pairwise_alignment_algorithm_banded.hpp +++ b/include/seqan3/alignment/pairwise/detail/pairwise_alignment_algorithm_banded.hpp @@ -88,6 +88,10 @@ class pairwise_alignment_algorithm_banded : size_t const sequence1_size = std::ranges::distance(get<0>(sequence_pair)); size_t const sequence2_size = std::ranges::distance(get<1>(sequence_pair)); + // Initialise the cell updater with the dimensions of the regular matrix. + this->compare_and_set_optimum.set_target_indices(row_index_type{sequence2_size}, + column_index_type{sequence1_size}); + auto && [alignment_matrix, index_matrix] = this->acquire_matrices(sequence1_size, sequence2_size, this->lowest_viable_score()); diff --git a/include/seqan3/alignment/pairwise/detail/policy_optimum_tracker.hpp b/include/seqan3/alignment/pairwise/detail/policy_optimum_tracker.hpp index 953a56fd1b..99255a4b7a 100644 --- a/include/seqan3/alignment/pairwise/detail/policy_optimum_tracker.hpp +++ b/include/seqan3/alignment/pairwise/detail/policy_optimum_tracker.hpp @@ -32,6 +32,8 @@ namespace seqan3::detail * * Updates the current alignment optimum with the new score and the respective coordinate if the new score * compares greater or equal to the score of the current optimum. + * + * \see seqan3::detail::max_score_banded_updater */ struct max_score_updater { @@ -66,6 +68,100 @@ struct max_score_updater } }; +/*!\brief A function object that compares and possibly updates the alignment optimum with the current cell. + * \ingroup pairwise_alignment + * + * \details + * + * This special function object is used for global alignments including free end-gaps. + * It updates the current alignment optimum with the new score and the respective coordinate if the new score + * is greater or equal to the score of the current optimum and the cell is either in the target row or column. + * For the banded alignment this additional check is needed because the band might not reach the last row of the matrix + * for all columns. + * Thus, the last cell of a column will only be checked if the corresponding coordinate points to the last row of the + * matrix. + * + * ### Example + * + * Consider the following banded matrix: + * + * ``` + * 0 1 + * 012345678901 + * ____________ + * 0 | \ | + * 1 | \ | + * 2 |\ \ | + * 3 | \ \ | + * 4 | \ *| + * 5 | \ *| + * 6 | ********| + * –––––––––––– + * ``` + * + * When computing the columns from index 0 to 3 the last row of the matrix is not covered by the band. + * But the algorithm that computes the column does not know if the last computed cell of it is in fact also the last + * cell of the matrix (row index 6). Thus, by comparing the cell coordinate, which corresponds to the absolute + * matrix coordinate, with the initialised target row and column index of this function object, it can be guaranteed + * that only the scores of the cells are tracked that are in the last row or column (cells marked with an `*` + * in the picture above). + * + * \see seqan3::detail::max_score_updater + */ +struct max_score_banded_updater +{ +private: + //!\brief The row index indicating the last row of the alignment matrix. + size_t target_row_index{}; + //!\brief The column index indicating the last column of the alignment matrix. + size_t target_col_index{}; + +public: + /*!\brief Compares and updates the optimal score-coordinate pair. + * \tparam score_t The type of the score to track; must model std::totally_ordered and + * std::assignable_from `const & score_t`. + * \tparam coordinate_t The type of the coordinate to track; must model std::assignable_from `const & coordinate_t`. + * + * \param[in,out] optimal_score The optimal score to update. + * \param[in,out] optimal_coordinate The optimal coordinate to update. + * \param[in] current_score The score of the current cell. + * \param[in] current_coordinate The coordinate of the current cell. + * + * \details + * + * First, checks if the coordinate of the current alignment cell is either in the target row or column, i.e. + * the last row/column of the alignment matrix. If this is not the case the score won't be updated. + * Otherwise, compares the current_score with the optimal score and updates the optimal score and coordinate + * accordingly. + */ + template + //!\cond + requires (std::totally_ordered && std::assignable_from && + std::assignable_from) + //!\endcond + void operator()(score_t & optimal_score, + coordinate_t & optimal_coordinate, + score_t current_score, + coordinate_t current_coordinate) const noexcept + { + bool const is_better_score = (target_row_index == current_coordinate.row || + target_col_index == current_coordinate.col) && + (current_score >= optimal_score); + optimal_score = (is_better_score) ? std::move(current_score) : optimal_score; + optimal_coordinate = (is_better_score) ? std::move(current_coordinate) : optimal_coordinate; + } + + /*!\brief Sets the target index for the last row and column of the matrix. + * \param row_index The target index of the last row. + * \param col_index The target index of the last column. + */ + void set_target_indices(row_index_type row_index, column_index_type col_index) noexcept + { + target_row_index = row_index.get(); + target_col_index = col_index.get(); + } +}; + /*!\brief Implements the tracker to store the global optimum for a particular alignment computation. * \ingroup pairwise_alignment * diff --git a/test/unit/alignment/pairwise/fixture/semi_global_affine_banded.hpp b/test/unit/alignment/pairwise/fixture/semi_global_affine_banded.hpp index d6de9bc3e3..6b405b9660 100644 --- a/test/unit/alignment/pairwise/fixture/semi_global_affine_banded.hpp +++ b/test/unit/alignment/pairwise/fixture/semi_global_affine_banded.hpp @@ -236,4 +236,51 @@ static auto dna4_04_semi_second = []() }; }(); +static auto dna4_free_lb_with_band_tl2br_no_matches = []() +{ + return alignment_fixture + { + "AAAAAAAAAAA"_dna4, + "TTTTTTTTTTT"_dna4, + config_free_gaps_sl_ft | config_gap | config_band_m4_8 | config_scoring_m4_mm5, + -34, + "AAAAAAA-------", + "-------TTTTTTT", + seqan3::alignment_coordinate{seqan3::detail::column_index_type{0u}, seqan3::detail::row_index_type{4u}}, + seqan3::alignment_coordinate{seqan3::detail::column_index_type{7u}, seqan3::detail::row_index_type{11u}}, + std::vector> + { + // e, A, A, A, A, A, A, A, A, A, A, A, + /*e*/ 0 ,-11,-12,-13,-14,-15,-16,-17,-18,INF,INF,INF, + /*T*/ 0 ,-5 ,-12,-13,-14,-15,-16,-17,-18,-19,INF,INF, + /*T*/ 0 ,-5 ,-10,-13,-14,-15,-16,-17,-18,-19,-20,INF, + /*T*/ 0 ,-5 ,-10,-13,-14,-15,-16,-17,-18,-19,-20,-21, + /*T*/ 0 ,-5 ,-10,-13,-14,-15,-16,-17,-18,-19,-20,-21, + /*T*/ INF,-5 ,-10,-15,-18,-19,-20,-21,-22,-23,-24,-25, + /*T*/ INF,INF,-10,-15,-20,-23,-24,-25,-26,-27,-28,-29, + /*T*/ INF,INF,INF,-15,-20,-25,-28,-29,-30,-31,-32,-33, + /*T*/ INF,INF,INF,INF,-20,-25,-30,-31,-32,-33,-34,-35, + /*T*/ INF,INF,INF,INF,INF,-25,-30,-32,-33,-34,-35,-36, + /*T*/ INF,INF,INF,INF,INF,INF,-30,-33,-34,-35,-36,-37, + /*T*/ INF,INF,INF,INF,INF,INF,INF,-34,-35,-36,-37,-38 + }, + std::vector> + { + // e, A, A, A, A, A, A, A, A, A, A, A, + /*e*/ N ,L ,l ,l ,l ,l ,l ,l ,l ,INF,INF,INF, + /*T*/ N ,DUL,l ,l ,l ,l ,l ,l ,l ,l ,INF,INF, + /*T*/ N ,DUL,DUl,l ,l ,l ,l ,l ,l ,l ,l ,INF, + /*T*/ N ,DUL,DUl,l ,l ,l ,l ,l ,l ,l ,l ,l , + /*T*/ N ,DUL,DUl,l ,l ,l ,l ,l ,l ,l ,l ,l , + /*T*/ INF,DU ,DUL,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl, + /*T*/ INF,INF,DU ,DuL,Dul,Dul,Dul,Dul,Dul,Dul,Dul,Dul, + /*T*/ INF,INF,INF,Du ,DuL,Dul,Dul,Dul,Dul,Dul,Dul,Dul, + /*T*/ INF,INF,INF,INF,Du ,DuL,Dul,ul ,ul ,ul ,ul ,ul , + /*T*/ INF,INF,INF,INF,INF,Du ,DuL,ul ,ul ,ul ,ul ,ul , + /*T*/ INF,INF,INF,INF,INF,INF,Du ,uL ,ul ,ul ,ul ,ul , + /*T*/ INF,INF,INF,INF,INF,INF,INF,u ,uL ,ul ,ul ,ul + } + }; +}(); + } // namespace seqan3::test::alignment::fixture::global::affine::banded diff --git a/test/unit/alignment/pairwise/semi_global_affine_banded_test.cpp b/test/unit/alignment/pairwise/semi_global_affine_banded_test.cpp index 5e18167517..be8cffd54e 100644 --- a/test/unit/alignment/pairwise/semi_global_affine_banded_test.cpp +++ b/test/unit/alignment/pairwise/semi_global_affine_banded_test.cpp @@ -16,7 +16,8 @@ using pairwise_semiglobal_affine_banded_testing_types = ::testing::Types< pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_01_semi_first>, // pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_02_semi_first>, pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_03_semi_second>, - pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_04_semi_second> + pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_04_semi_second>, + pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_free_lb_with_band_tl2br_no_matches> >; INSTANTIATE_TYPED_TEST_SUITE_P(pairwise_semiglobal_affine_banded, From a0cfcc7f8bb367e9368b508a74fe8bb847fe6539 Mon Sep 17 00:00:00 2001 From: rrahn Date: Tue, 8 Sep 2020 18:59:02 +0200 Subject: [PATCH 11/16] [TEST] More tests for semi-global banded alignment. --- .../fixture/semi_global_affine_banded.hpp | 107 ++++++++++++++++++ .../semi_global_affine_banded_test.cpp | 4 +- 2 files changed, 110 insertions(+), 1 deletion(-) diff --git a/test/unit/alignment/pairwise/fixture/semi_global_affine_banded.hpp b/test/unit/alignment/pairwise/fixture/semi_global_affine_banded.hpp index 6b405b9660..291613dfa3 100644 --- a/test/unit/alignment/pairwise/fixture/semi_global_affine_banded.hpp +++ b/test/unit/alignment/pairwise/fixture/semi_global_affine_banded.hpp @@ -57,6 +57,13 @@ inline constexpr auto config_free_gaps_sl_ft = seqan3::align_cfg::method_global{ seqan3::end_gaps{seqan3::front_end_second{std::true_type{}}, seqan3::back_end_first{std::true_type{}}}}; +inline constexpr auto config_free_gaps_tlbr = seqan3::align_cfg::method_global{ + seqan3::align_cfg::free_end_gaps_sequence1_leading{true}, + seqan3::align_cfg::free_end_gaps_sequence2_leading{true}, + seqan3::align_cfg::free_end_gaps_sequence1_trailing{true}, + seqan3::align_cfg::free_end_gaps_sequence2_trailing{true} + } | seqan3::align_cfg::aligned_ends{seqan3::free_ends_all}; + inline constexpr auto config_scoring_m4_mm5 = seqan3::align_cfg::scoring_scheme{ seqan3::nucleotide_scoring_scheme{seqan3::match_score{4}, seqan3::mismatch_score{-5}}}; @@ -283,4 +290,104 @@ static auto dna4_free_lb_with_band_tl2br_no_matches = []() }; }(); +// band starts in top left with lower diagonal exceeding size of sequence 2 and ends in the bottom. +static auto dna4_free_tlbr_with_band_tl2b = [] () +{ + return alignment_fixture + { + "AGATTTACTACGCAT"_dna4, + "GTAGCAT"_dna4, + config_free_gaps_tlbr | config_gap | config_scoring_m4_mm5 | + seqan3::align_cfg::band_fixed_size{seqan3::align_cfg::lower_diagonal{-10}, + seqan3::align_cfg::upper_diagonal{4}}, + 5, + "AG-AT", + "AGCAT", + seqan3::alignment_coordinate{seqan3::detail::column_index_type{0u}, seqan3::detail::row_index_type{2u}}, + seqan3::alignment_coordinate{seqan3::detail::column_index_type{4u}, seqan3::detail::row_index_type{7u}}, + std::vector> + { + // e ,A ,G ,A ,T ,T ,T ,A ,C ,T ,A ,C ,G ,C ,A ,T , + /*e*/ 0 ,0 ,0 ,0 ,0 ,INF,INF,INF,INF,INF,INF,INF,INF,INF,INF,INF, + /*G*/ 0 ,-5 ,4 ,-5 ,-5 ,-5 ,INF,INF,INF,INF,INF,INF,INF,INF,INF,INF, + /*T*/ 0 ,-5 ,-7 ,-1 ,-1 ,-1 ,-1 ,INF,INF,INF,INF,INF,INF,INF,INF,INF, + /*A*/ 0 ,4 ,-7 ,-3 ,-6 ,-6 ,-6 ,3 ,INF,INF,INF,INF,INF,INF,INF,INF, + /*G*/ 0 ,-5 ,8 ,-3 ,-4 ,-5 ,-6 ,-7 ,-2 ,INF,INF,INF,INF,INF,INF,INF, + /*C*/ 0 ,-5 ,-3 ,3 ,-8 ,-9 ,-10,-9 ,-3 ,-7 ,INF,INF,INF,INF,INF,INF, + /*A*/ 0 ,4 ,-4 ,1 ,-2 ,-10,-11,-6 ,-13,-8 ,-3 ,INF,INF,INF,INF,INF, + /*T*/ 0 ,-5 ,-1 ,-9 ,5 ,2 ,-6 ,-8 ,-9 ,-9 ,-11,-8 ,INF,INF,INF,INF + }, + std::vector> + { + // e ,A ,G ,A ,T ,T ,T ,A ,C ,T ,A ,C ,G ,C ,A ,T , + /*e*/ N ,N ,N ,N ,N ,INF,INF,INF,INF,INF,INF,INF,INF,INF,INF,INF, + /*G*/ N ,DUL,DUl,DUL,DUl,D ,INF,INF,INF,INF,INF,INF,INF,INF,INF,INF, + /*T*/ N ,DuL,Ul ,Dul,DuL,DUL,D ,INF,INF,INF,INF,INF,INF,INF,INF,INF, + /*A*/ N ,DuL,L ,DUl,DUl,DUl,DUl,D ,INF,INF,INF,INF,INF,INF,INF,INF, + /*G*/ N ,DUL,Dul,L ,l ,l ,l ,l ,D ,INF,INF,INF,INF,INF,INF,INF, + /*C*/ N ,DuL,Ul ,Dul,DuL,Dul,Dul,ul ,DUl,D ,INF,INF,INF,INF,INF,INF, + /*A*/ N ,DuL,uL ,DUl,Dul,l ,l ,Dul,l ,DUl,D ,INF,INF,INF,INF,INF, + /*T*/ N ,DUL,Dul,DuL,DUl,DuL,Dul,l ,l ,Dul,l ,D ,INF,INF,INF,INF + } + }; +}(); + +// band starts in top left with upper diagonal exceeding size of sequence 1 and ends in the right side. +static auto dna4_free_tlbr_with_band_tl2r = [] () +{ + return alignment_fixture + { + "GTAGCAT"_dna4, + "AGATTTACTACGCAT"_dna4, + config_free_gaps_tlbr | config_gap | config_scoring_m4_mm5 | + seqan3::align_cfg::band_fixed_size{seqan3::align_cfg::lower_diagonal{-3}, + seqan3::align_cfg::upper_diagonal{100}}, + 5, + "AGCAT", + "AG-AT", + seqan3::alignment_coordinate{seqan3::detail::column_index_type{2u}, seqan3::detail::row_index_type{0u}}, + seqan3::alignment_coordinate{seqan3::detail::column_index_type{7u}, seqan3::detail::row_index_type{4u}}, + std::vector> + { + // e ,G ,T ,A ,G ,C ,A ,T , + /*e*/ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , + /*A*/ 0 ,-5 ,-5 ,4 ,-5 ,-5 ,4 ,-5 , + /*G*/ 0 ,4 ,-7 ,-7 ,8 ,-3 ,-4 ,-1 , + /*A*/ 0 ,-5 ,-1 ,-3 ,-3 ,3 ,1 ,-9 , + /*T*/ INF,-5 ,-1 ,-6 ,-4 ,-8 ,-2 ,5 , + /*T*/ INF,INF,-1 ,-6 ,-5 ,-9 ,-10,2 , + /*T*/ INF,INF,INF,-6 ,-6 ,-10,-11,-6 , + /*A*/ INF,INF,INF,INF,-7 ,-11,-6 ,-8 , + /*C*/ INF,INF,INF,INF,INF,-3 ,-13,-9 , + /*T*/ INF,INF,INF,INF,INF,INF,-8 ,-9 , + /*A*/ INF,INF,INF,INF,INF,INF,INF,-11, + /*C*/ INF,INF,INF,INF,INF,INF,INF,INF, + /*G*/ INF,INF,INF,INF,INF,INF,INF,INF, + /*C*/ INF,INF,INF,INF,INF,INF,INF,INF, + /*A*/ INF,INF,INF,INF,INF,INF,INF,INF, + /*T*/ INF,INF,INF,INF,INF,INF,INF,INF + }, + std::vector> + { + // e ,G ,T ,A ,G ,C ,A ,T , + /*e*/ N ,N ,N ,N ,N ,N ,N ,N , + /*A*/ N ,DUL,DUl,DUl,DUL,DUl,DUl,DUL, + /*G*/ N ,DuL,L ,Ul ,Dul,L ,l ,Dul, + /*A*/ N ,DUL,Dul,DuL,Ul ,Dul,DuL,DUl, + /*T*/ INF,Du ,DUL,DuL,ul ,DUl,Dul,DuL, + /*T*/ INF,INF,DU ,DuL,ul ,Dul,ul ,DUl, + /*T*/ INF,INF,INF,Du ,uL ,DuL,ul ,Dul, + /*A*/ INF,INF,INF,INF,u ,DuL,Dul,uL , + /*C*/ INF,INF,INF,INF,INF,Du ,uL ,ul , + /*T*/ INF,INF,INF,INF,INF,INF,Du ,DuL, + /*A*/ INF,INF,INF,INF,INF,INF,INF,u , + /*C*/ INF,INF,INF,INF,INF,INF,INF,INF, + /*G*/ INF,INF,INF,INF,INF,INF,INF,INF, + /*C*/ INF,INF,INF,INF,INF,INF,INF,INF, + /*A*/ INF,INF,INF,INF,INF,INF,INF,INF, + /*T*/ INF,INF,INF,INF,INF,INF,INF,INF + } + }; +}(); + } // namespace seqan3::test::alignment::fixture::global::affine::banded diff --git a/test/unit/alignment/pairwise/semi_global_affine_banded_test.cpp b/test/unit/alignment/pairwise/semi_global_affine_banded_test.cpp index be8cffd54e..46038222c8 100644 --- a/test/unit/alignment/pairwise/semi_global_affine_banded_test.cpp +++ b/test/unit/alignment/pairwise/semi_global_affine_banded_test.cpp @@ -17,7 +17,9 @@ using pairwise_semiglobal_affine_banded_testing_types = ::testing::Types< // pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_02_semi_first>, pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_03_semi_second>, pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_04_semi_second>, - pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_free_lb_with_band_tl2br_no_matches> + pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_free_lb_with_band_tl2br_no_matches>, + pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_free_tlbr_with_band_tl2b>, + pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_free_tlbr_with_band_tl2r> >; INSTANTIATE_TYPED_TEST_SUITE_P(pairwise_semiglobal_affine_banded, From 7ad19a74ce80052b9113f5b4e846b2de9f1456c7 Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Sat, 12 Sep 2020 12:40:34 +0200 Subject: [PATCH 12/16] [FIX] Timeout in debug nightlies --- .../global_affine_alignment_simd_benchmark_template.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/performance/alignment/global_affine_alignment_simd_benchmark_template.hpp b/test/performance/alignment/global_affine_alignment_simd_benchmark_template.hpp index d53bca51d2..6a1bff3667 100644 --- a/test/performance/alignment/global_affine_alignment_simd_benchmark_template.hpp +++ b/test/performance/alignment/global_affine_alignment_simd_benchmark_template.hpp @@ -28,7 +28,11 @@ // Globally defined constants to ensure same test data. inline constexpr size_t sequence_length = 150; +#ifndef NDEBUG +inline constexpr size_t set_size = 128; +#else inline constexpr size_t set_size = 1024; +#endif // NDEBUG // We don't know if the system supports hyper-threading so we use only half the threads so that the // simd benchmark is likely to run on physical cores only. From 5ec8be7cb97301a8f269c90290a407948d7e6947 Mon Sep 17 00:00:00 2001 From: marehr Date: Sun, 13 Sep 2020 23:15:22 +0200 Subject: [PATCH 13/16] [DOC] Update cppreference index --- test/documentation/seqan3-doxygen.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/documentation/seqan3-doxygen.cmake b/test/documentation/seqan3-doxygen.cmake index 27a0c89380..4a72695ecc 100644 --- a/test/documentation/seqan3-doxygen.cmake +++ b/test/documentation/seqan3-doxygen.cmake @@ -34,8 +34,8 @@ set(SEQAN3_DOXYGEN_STD_TAGFILE "${PROJECT_BINARY_DIR}/cppreference-doxygen-web.t include(ExternalProject) ExternalProject_Add ( download-cppreference-doxygen-web-tag - URL "https://github.com/PeterFeicht/cppreference-doc/releases/download/v20190928/html-book-20190928.tar.xz" - URL_HASH SHA256=6c250b02f6cb0fb54ea372d81c8f50703c01cc9f6887281c88a84dd91f463abc + URL "https://github.com/PeterFeicht/cppreference-doc/releases/download/v20200913/html-book-20200913.tar.xz" + URL_HASH SHA256=91c6d7881ca0ffb9780d40723c415b20b2dce0fafc2ea7ab6abaec04a5934f55 TLS_VERIFY ON DOWNLOAD_DIR "${PROJECT_BINARY_DIR}" DOWNLOAD_NAME "html-book.tar.xz" From e6137543975d6bcdd7fe7e8bc44cb6d45d6f23b7 Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Tue, 15 Sep 2020 11:09:48 +0200 Subject: [PATCH 14/16] [FIX] Wrong ranks in search algorithm --- include/seqan3/search/search.hpp | 27 +++++++++++++++++++-- test/unit/search/search_collection_test.cpp | 23 ++++++++++++++++++ test/unit/search/search_test.cpp | 23 ++++++++++++++++++ 3 files changed, 71 insertions(+), 2 deletions(-) diff --git a/include/seqan3/search/search.hpp b/include/seqan3/search/search.hpp index 51a99504b9..1c16375ce8 100644 --- a/include/seqan3/search/search.hpp +++ b/include/seqan3/search/search.hpp @@ -15,8 +15,11 @@ #include #include +#include #include #include +#include +#include #include #include #include @@ -24,7 +27,6 @@ #include #include #include -#include namespace seqan3::detail { @@ -100,7 +102,8 @@ template //!\cond - requires std::ranges::forward_range> + requires std::ranges::forward_range> && + std::same_as, typename index_t::alphabet_type> //!\endcond inline auto search(queries_t && queries, index_t const & index, @@ -165,6 +168,26 @@ inline auto search(queries_t && queries, } //!\cond DEV +// Convert query sequence if it does not match the alphabet type of the index. +//!\overload +template + requires std::ranges::forward_range> && + (!std::same_as, typename index_t::alphabet_type>) +inline auto search(queries_t && queries, + index_t const & index, + configuration_t const & cfg = search_cfg::default_configuration) +{ + static_assert(std::convertible_to, typename index_t::alphabet_type>, + "The alphabet of the text collection must be convertible to the alphabet of the index."); + + if constexpr (range_dimension_v == 2u) + return search(queries | views::deep{views::convert}, index, cfg); + else + return search(queries | views::convert, index, cfg); +} + // Overload for a single query (not a collection of queries) //!\overload template index, cfg), std::runtime_error); } +// https://github.com/seqan/seqan3/issues/2115 +TYPED_TEST(search_test, issue_2115) +{ + std::vector> const genomes{"ACAG"_dna4}; + TypeParam const index{genomes}; + + // One substitution error, report all best hits + seqan3::configuration const cfg = seqan3::search_cfg::max_error_total{seqan3::search_cfg::error_count{1}} | + seqan3::search_cfg::max_error_substitution{seqan3::search_cfg::error_count{1}} | + seqan3::search_cfg::max_error_insertion{seqan3::search_cfg::error_count{0}} | + seqan3::search_cfg::max_error_deletion{seqan3::search_cfg::error_count{0}} | + seqan3::search_cfg::hit_all_best; + + std::vector const dna4_query{"ACGG"_dna4}; + std::vector> const dna4q_query{{'A'_dna4, seqan3::phred42{0}}, + {'C'_dna4, seqan3::phred42{15}}, + {'G'_dna4, seqan3::phred42{30}}, + {'G'_dna4, seqan3::phred42{41}}}; + + // Quality should not alter search results. + EXPECT_RANGE_EQ(seqan3::search(dna4q_query, index, cfg), seqan3::search(dna4_query, index, cfg)); +} + TYPED_TEST(search_string_test, error_free_string) { typename TestFixture::hits_result_t empty_result{}; diff --git a/test/unit/search/search_test.cpp b/test/unit/search/search_test.cpp index 383954e44d..1eb00ac13f 100644 --- a/test/unit/search/search_test.cpp +++ b/test/unit/search/search_test.cpp @@ -483,6 +483,29 @@ TYPED_TEST(search_test, parallel_without_parameter) EXPECT_THROW(search("AAAA"_dna4, this->index, cfg), std::runtime_error); } +// https://github.com/seqan/seqan3/issues/2115 +TYPED_TEST(search_test, issue_2115) +{ + std::vector const genome{"ACAG"_dna4}; + TypeParam const index{genome}; + + // One substitution error, report all best hits + seqan3::configuration const cfg = seqan3::search_cfg::max_error_total{seqan3::search_cfg::error_count{1}} | + seqan3::search_cfg::max_error_substitution{seqan3::search_cfg::error_count{1}} | + seqan3::search_cfg::max_error_insertion{seqan3::search_cfg::error_count{0}} | + seqan3::search_cfg::max_error_deletion{seqan3::search_cfg::error_count{0}} | + seqan3::search_cfg::hit_all_best; + + std::vector const dna4_query{"ACGG"_dna4}; + std::vector> const dna4q_query{{'A'_dna4, seqan3::phred42{0}}, + {'C'_dna4, seqan3::phred42{15}}, + {'G'_dna4, seqan3::phred42{30}}, + {'G'_dna4, seqan3::phred42{41}}}; + + // Quality should not alter search results. + EXPECT_RANGE_EQ(seqan3::search(dna4q_query, index, cfg), seqan3::search(dna4_query, index, cfg)); +} + TYPED_TEST(search_string_test, error_free_string) { // successful and unsuccesful exact search without cfg From 93efca75cb53431d7ab82f0d7ae3ca6493be5b32 Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Tue, 15 Sep 2020 12:16:13 +0200 Subject: [PATCH 15/16] [TEST] add debug_stream test for search --- test/unit/search/search_collection_test.cpp | 11 +++++++++++ test/unit/search/search_test.cpp | 9 +++++++++ 2 files changed, 20 insertions(+) diff --git a/test/unit/search/search_collection_test.cpp b/test/unit/search/search_collection_test.cpp index a35a2b2484..0310b1b427 100644 --- a/test/unit/search/search_collection_test.cpp +++ b/test/unit/search/search_collection_test.cpp @@ -211,6 +211,17 @@ TYPED_TEST(search_test, parallel_without_parameter) EXPECT_THROW(search("AAAA"_dna4, this->index, cfg), std::runtime_error); } +TYPED_TEST(search_test, debug_streaming) +{ + std::ostringstream oss; + seqan3::debug_stream_type stream{oss}; + stream << search("TAC"_dna4, this->index); + EXPECT_EQ(oss.str(), "[" + "," + "," + ",]"); +} + // https://github.com/seqan/seqan3/issues/2115 TYPED_TEST(search_test, issue_2115) { diff --git a/test/unit/search/search_test.cpp b/test/unit/search/search_test.cpp index 1eb00ac13f..8a3448f43f 100644 --- a/test/unit/search/search_test.cpp +++ b/test/unit/search/search_test.cpp @@ -483,6 +483,15 @@ TYPED_TEST(search_test, parallel_without_parameter) EXPECT_THROW(search("AAAA"_dna4, this->index, cfg), std::runtime_error); } +TYPED_TEST(search_test, debug_streaming) +{ + std::ostringstream oss; + seqan3::debug_stream_type stream{oss}; + stream << search("TAC"_dna4, this->index); + EXPECT_EQ(oss.str(), "[" + ",]"); +} + // https://github.com/seqan/seqan3/issues/2115 TYPED_TEST(search_test, issue_2115) { From 2aff6ec85c37495e6e92085f3f320e6a490988a8 Mon Sep 17 00:00:00 2001 From: rrahn Date: Thu, 17 Sep 2020 14:49:14 +0200 Subject: [PATCH 16/16] [MISC] Apply type erasure on the alphabet types to reduce the compatibility overhead. By applying this type erasure, we are actually generic to allow any user input as long as it fulfils the respective concepts that are modeled explicitly for alphabets and scoring schemes. It also is an initial step to reduce the boiler plate overloads in seqan2 when we start adapting the algorithms step by step. --- .../alignment/multiple/align_multiple.hpp | 34 ++++- .../align_multiple_seqan2_adaptation.hpp | 133 +++++++----------- .../align_multiple_seqan2_adaptation_test.cpp | 31 ++-- 3 files changed, 97 insertions(+), 101 deletions(-) diff --git a/include/seqan3/alignment/multiple/align_multiple.hpp b/include/seqan3/alignment/multiple/align_multiple.hpp index 35e807e88a..625b0ee0d6 100644 --- a/include/seqan3/alignment/multiple/align_multiple.hpp +++ b/include/seqan3/alignment/multiple/align_multiple.hpp @@ -18,6 +18,9 @@ #include #include +#include +#include +#include #if SEQAN3_HAS_SEQAN2 #include @@ -54,27 +57,44 @@ namespace seqan3 /*!\brief The algorithm for multiple sequence alignment. * \ingroup multiple_alignment - * \tparam range_t Type of the input sequences, must model std::ranges::forward_range. - * \tparam config_t Type of the configuration; defaults to the type of seqan3::align_cfg::msa_default_configuration. + * \tparam range_t Type of the input sequences; must model std::ranges::forward_range && its reference type must model + * seqan::sequence && std::ranges::forward_range. + * \tparam config_t Type of the configuration. * \param input A vector of sequences that you want to align. - * \param config A configuration object that stores the settings for the algorithm; - * defaults to seqan3::align_cfg::msa_default_configuration. + * \param config A configuration object that stores the settings for the algorithm [optional: see below for the + * defaults if no configuration is given]. * \return The multiple sequence alignment as a vector of gapped sequences. + * * \details * * Computes a multiple sequence alignment from the given input sequences, using a consistency-based progressive * alignment algorithm on a graph of sequence segments. You can use the configuration object to * specify various parameters, like gap scores, alignment scores and band constraints. The return type is * `std::vector>`, with the inner type derived from the input sequence type. + * + * ### Default Configuration + * + * The following defaults are used if they are not configured explicitly by the user: + * + * * general gap score: -1 + * * additional gap open score: -13 + * * band constraints: none + * * scoring for amino acid sequences: Blosum62 matrix (set internally) + * * scoring for nucleotide sequences: match = +5 and mismatch = -4 (set internally) */ -template -auto align_multiple(std::vector const & input, config_t config = align_cfg::msa_default_configuration) +template > +//!\cond + requires std::ranges::forward_range && + seqan3::sequence> && + std::ranges::forward_range> +//!\endcond +auto align_multiple(range_t && input, config_t config = seqan3::configuration<>{}) { #if !SEQAN3_HAS_SEQAN2 // multiple sequence alignment is only enabled with seqan2 static_assert(false, "You need to have at least seqan 2.4"); #else // SEQAN3_HAS_SEQAN2 - using seqan3_alphabet_type = std::ranges::range_value_t; + using seqan3_alphabet_type = std::ranges::range_value_t>; using seqan2_adaptation_type = detail::align_multiple_seqan2_adaptation; using seqan2_graph_type = typename seqan2_adaptation_type::graph_type; diff --git a/include/seqan3/alignment/multiple/detail/align_multiple_seqan2_adaptation.hpp b/include/seqan3/alignment/multiple/detail/align_multiple_seqan2_adaptation.hpp index a14a427d3e..11eadf89ab 100644 --- a/include/seqan3/alignment/multiple/detail/align_multiple_seqan2_adaptation.hpp +++ b/include/seqan3/alignment/multiple/detail/align_multiple_seqan2_adaptation.hpp @@ -20,15 +20,10 @@ #include #include - -#if SEQAN3_HAS_SEQAN2 -#include -#include -#endif - #include #include #include +#include #include #include #include @@ -40,6 +35,11 @@ #include #include +#if SEQAN3_HAS_SEQAN2 +#include +#include +#endif + namespace seqan3::detail { @@ -47,30 +47,13 @@ namespace seqan3::detail * \ingroup multiple_alignment * \sa seqan3::align_multiple */ -template +template class align_multiple_seqan2_adaptation { -private: - //!\brief A list of SeqAn3 types that are currently supported for seqan3::align_multiple. - using seqan3_types = type_list; - //!\brief The corresponding list of SeqAn2 types (the order must be the same!). - using seqan2_types = type_list, - seqan::ReducedAminoAcid>; - //!\brief The index of the seqan3_alphabet_type in the list of seqan3_types. - static constexpr auto index = list_traits::find; - - static_assert(index != -1, "The seqan3_alphabet_type is currently not supported."); - public: //!\brief The (SeqAn2) alphabet type used in the multiple sequence alignment algorithm. - using alphabet_type = list_traits::at; - //!\brief The (SeqAn2) sequence tupe used in the multiple sequence alignment algorithm. + using alphabet_type = char; + //!\brief The (SeqAn2) sequence type used in the multiple sequence alignment algorithm. using sequence_type = seqan::String; //!\brief The output graph type of the multiple sequence alignment algorithm. using graph_type = seqan::Graph>, @@ -139,7 +122,7 @@ class align_multiple_seqan2_adaptation seqan::clear(tmp); } - return std::make_pair(std::move(sequences), std::move(ids)); + return std::pair{std::move(sequences), std::move(ids)}; } /*!\brief Create a vector of gapped SeqAn3 sequences from a SeqAn2 alignment graph. @@ -174,17 +157,14 @@ class align_multiple_seqan2_adaptation } private: - /*!\brief Predicate that represents whether `candidate_t` is a type that is not allowed as MSA configuration. + /*!\brief Helper variable that checks whether `candidate_t` is a type that is allowed as MSA configuration. * \tparam candidate_t The type to test. - * - * \details - * The predicate's value is set to: NOT (band OR gap OR scoring). */ template - struct is_invalid_msa_config : - std::negation, - is_type_specialisation_of, - is_type_specialisation_of>>{}; + static constexpr bool is_valid_msa_config = + std::is_same_v || + is_type_specialisation_of_v || + is_type_specialisation_of_v; /*!\brief Validate the given MSA configuration. * \tparam configs_t The specified configuration elements. @@ -192,86 +172,77 @@ class align_multiple_seqan2_adaptation template void validate_configuration(seqan3::configuration const &) { - static_assert(seqan3::pack_traits::find_if == -1, - "The given MSA configuration is not valid."); + static_assert((is_valid_msa_config && ...), + "You are trying to use an unsupported alignment configuration for the multiple sequence " + "alignment. Please, use only valid configurations as specified by seqan3::align_multiple!"); } /*!\brief Create the SeqAn2 scoring scheme based on the given SeqAn3 configuration. * \tparam seqan3_configuration_t The type of the configuration object. - * \param config The configuration that contains scoring parameters. + * \param[in] config The configuration that contains scoring parameters. * \return A SeqAn2 *MsaOptions* object with initialised scores. * * \details + * * The scoring scheme matrices are copied from SeqAn3 into the corresponding SeqAn2 type. * Note that the alphabet type can be different to that of the input sequences * (seqan3::detail::align_multiple_seqan2_adaptation::alphabet_type), e.g., the SeqAn3 nucleotide scoring scheme is * defined for dna16 but works for most other nucleotide alphabets too. The same holds for the SeqAn2 equivalent. */ template - //!\cond - requires (seqan3_configuration_t::template exists()) - //!\endcond auto initialise_scoring_scheme(seqan3_configuration_t const & config) { - auto scoring_scheme = get(config).value; + auto scoring_scheme = config.get_or(seqan3::align_cfg::scoring_scheme{default_scoring_scheme(config)}).value; - // A SeqAn3 scoring scheme might be of alphabet type dna15 although the aligned alphabet is dna4 - // SeqAn3 alphabet type used: - using scoring_scheme_alphabet_type = typename decltype(scoring_scheme)::alphabet_type; - constexpr auto scoring_scheme_alphabet_index = list_traits::find; - // SeqAn2 alphabet type to use accordingly: - using score_matrix_alphabet_type = list_traits::at; - using score_matrix_type = seqan::Score>; + static_assert(scoring_scheme_for, + "The selected scoring scheme is not suitable to be used with the given sequence input."); + + // Use type erasure over the alphabet to model all scoring schemes inside of the seqan2 alignment. + using score_matrix_type = seqan::Score>; seqan::MsaOptions msa_options{}; - score_matrix_type score_matrix; - for (size_t i = 0; i < seqan3::alphabet_size; ++i) + // Transform the given scoring scheme into the type erased scoring scheme. + for (size_t i = 0; i < seqan3::alphabet_size; ++i) { - for (size_t j = 0; j < seqan3::alphabet_size; ++j) + for (size_t j = 0; j < seqan3::alphabet_size; ++j) { - auto seqan3_i = seqan3::assign_rank_to(i, scoring_scheme_alphabet_type{}); - auto seqan3_j = seqan3::assign_rank_to(j, scoring_scheme_alphabet_type{}); - score_matrix_alphabet_type seqan2_i{seqan3::to_char(seqan3_i)}; - score_matrix_alphabet_type seqan2_j{seqan3::to_char(seqan3_j)}; + auto seqan3_i = seqan3::assign_rank_to(i, seqan3_alphabet_type{}); + auto seqan3_j = seqan3::assign_rank_to(j, seqan3_alphabet_type{}); + alphabet_type seqan2_i{seqan3::to_char(seqan3_i)}; + alphabet_type seqan2_j{seqan3::to_char(seqan3_j)}; - setScore(score_matrix, seqan2_i, seqan2_j, scoring_scheme.score(seqan3_i, seqan3_j)); + setScore(msa_options.sc, seqan2_i, seqan2_j, scoring_scheme.score(seqan3_i, seqan3_j)); } } - msa_options.sc = score_matrix; return msa_options; } /*!\brief Create the SeqAn2 scoring scheme based on default values. * \tparam seqan3_configuration_t The type of a configuration object which does not contain scoring parameters. - * \return A SeqAn2 *MsaOptions* object with initialised scores. + * \return The default scoring scheme. + * + * \details + * + * Currently, this method supports two defaults for the aminoacid alphabet and the nucleotide alphabet. If the + * user specifies any other alphabet type that is not one of the before mentioned alphabets a static assertion + * will be thrown. In this case the user has to provide an additional scoring scheme. * - * Settings: - * * scoring for amino acid sequences: Blosum62 matrix - * * scoring for nucleotide sequences: match = +5 and mismatch = -4 + * The following defaults are used: + * * scoring for amino acid sequences: \ref seqan3::aminoacid_similarity_matrix::BLOSUM62 "Blosum62" + * seqan3::aminoacid_scoring_scheme + * * scoring for nucleotide sequences: seqan3::nucleotide_scoring_scheme with match = +5 and mismatch = -4. */ template - //!\cond - requires (!seqan3_configuration_t::template exists()) - //!\endcond - auto initialise_scoring_scheme(seqan3_configuration_t const &) + auto default_scoring_scheme(seqan3_configuration_t const &) { - using score_type = std::conditional_t || - std::same_as> || - std::same_as>, - seqan::Blosum62, - seqan::Score>; - - seqan::MsaOptions msa_options{}; - - if constexpr (std::is_same_v>) - { - msa_options.sc.data_match = 5; // tcoffee app default - msa_options.sc.data_mismatch = -4; // tcoffee app default - } - - return msa_options; + if constexpr (seqan3::aminoacid_alphabet) + return aminoacid_scoring_scheme{aminoacid_similarity_matrix::BLOSUM62}; + else if constexpr (seqan3::nucleotide_alphabet) + return nucleotide_scoring_scheme{match_score{5}, mismatch_score{-4}}; + else + static_assert(!seqan3::alphabet, "We do not have a generic scoring scheme yet."); } //!\brief Befriend seqan3::detail::test_accessor to grant access to private member functions. diff --git a/test/unit/alignment/multiple/align_multiple_seqan2_adaptation_test.cpp b/test/unit/alignment/multiple/align_multiple_seqan2_adaptation_test.cpp index 89e3d54cf6..1b8cae940c 100644 --- a/test/unit/alignment/multiple/align_multiple_seqan2_adaptation_test.cpp +++ b/test/unit/alignment/multiple/align_multiple_seqan2_adaptation_test.cpp @@ -31,15 +31,14 @@ using adaptation_t = seqan3::detail::align_multiple_seqan2_adaptation::alphabet_type, seqan::Dna>)); - EXPECT_TRUE((std::same_as::alphabet_type, seqan::Dna5>)); - EXPECT_TRUE((std::same_as::alphabet_type, seqan::Iupac>)); - EXPECT_TRUE((std::same_as::alphabet_type, seqan::Rna>)); - EXPECT_TRUE((std::same_as::alphabet_type, seqan::Rna5>)); - EXPECT_TRUE((std::same_as::alphabet_type, seqan::AminoAcid>)); - EXPECT_TRUE((std::same_as::alphabet_type, seqan::ReducedAminoAcid>)); - EXPECT_TRUE((std::same_as::alphabet_type, - seqan::ReducedAminoAcid>)); + EXPECT_TRUE((std::same_as::alphabet_type, char>)); + EXPECT_TRUE((std::same_as::alphabet_type, char>)); + EXPECT_TRUE((std::same_as::alphabet_type, char>)); + EXPECT_TRUE((std::same_as::alphabet_type, char>)); + EXPECT_TRUE((std::same_as::alphabet_type, char>)); + EXPECT_TRUE((std::same_as::alphabet_type, char>)); + EXPECT_TRUE((std::same_as::alphabet_type, char>)); + EXPECT_TRUE((std::same_as::alphabet_type, char>)); } TEST(initialise_scoring_scheme, config_no_scoring_configuration) @@ -50,7 +49,7 @@ TEST(initialise_scoring_scheme, config_no_scoring_configuration) auto msaOpt = seqan3::detail::test_accessor::initialise_scoring_scheme(config); - EXPECT_TRUE((std::same_as>)); + EXPECT_TRUE((std::same_as>>)); } TEST(initialise_scoring_scheme, blosum62) @@ -62,11 +61,17 @@ TEST(initialise_scoring_scheme, blosum62) auto msaOpt = seqan3::detail::test_accessor::initialise_scoring_scheme(config); // compare matrix size and abort test if not equal so the value comparison does not segfault - ASSERT_EQ(static_cast(decltype(msaOpt.sc)::TAB_SIZE), static_cast(seqan::Blosum62::TAB_SIZE)); + ASSERT_EQ(static_cast(decltype(msaOpt.sc)::TAB_SIZE), 256ull * 256ull); seqan::Blosum62 expected_matrix{}; - for (size_t i = 0; i < static_cast(seqan::Blosum62::TAB_SIZE); ++i) - EXPECT_EQ(msaOpt.sc.data_tab[i], expected_matrix.data_tab[i]); + for (size_t aa_rank1 = 0; aa_rank1 < seqan::ValueSize::VALUE; ++aa_rank1) + for (size_t aa_rank2 = 0; aa_rank2 < seqan::ValueSize::VALUE; ++aa_rank2) + EXPECT_EQ(seqan::score(msaOpt.sc, + seqan::convert(aa_rank1), + seqan::convert(aa_rank2)), + seqan::score(expected_matrix, + seqan::convert(aa_rank1), + seqan::convert(aa_rank2))); } TEST(configuration, gap_score_conversion)