diff --git a/include/seqan3/alignment/multiple/align_multiple.hpp b/include/seqan3/alignment/multiple/align_multiple.hpp index 35e807e88a..625b0ee0d6 100644 --- a/include/seqan3/alignment/multiple/align_multiple.hpp +++ b/include/seqan3/alignment/multiple/align_multiple.hpp @@ -18,6 +18,9 @@ #include #include +#include +#include +#include #if SEQAN3_HAS_SEQAN2 #include @@ -54,27 +57,44 @@ namespace seqan3 /*!\brief The algorithm for multiple sequence alignment. * \ingroup multiple_alignment - * \tparam range_t Type of the input sequences, must model std::ranges::forward_range. - * \tparam config_t Type of the configuration; defaults to the type of seqan3::align_cfg::msa_default_configuration. + * \tparam range_t Type of the input sequences; must model std::ranges::forward_range && its reference type must model + * seqan::sequence && std::ranges::forward_range. + * \tparam config_t Type of the configuration. * \param input A vector of sequences that you want to align. - * \param config A configuration object that stores the settings for the algorithm; - * defaults to seqan3::align_cfg::msa_default_configuration. + * \param config A configuration object that stores the settings for the algorithm [optional: see below for the + * defaults if no configuration is given]. * \return The multiple sequence alignment as a vector of gapped sequences. + * * \details * * Computes a multiple sequence alignment from the given input sequences, using a consistency-based progressive * alignment algorithm on a graph of sequence segments. You can use the configuration object to * specify various parameters, like gap scores, alignment scores and band constraints. The return type is * `std::vector>`, with the inner type derived from the input sequence type. + * + * ### Default Configuration + * + * The following defaults are used if they are not configured explicitly by the user: + * + * * general gap score: -1 + * * additional gap open score: -13 + * * band constraints: none + * * scoring for amino acid sequences: Blosum62 matrix (set internally) + * * scoring for nucleotide sequences: match = +5 and mismatch = -4 (set internally) */ -template -auto align_multiple(std::vector const & input, config_t config = align_cfg::msa_default_configuration) +template > +//!\cond + requires std::ranges::forward_range && + seqan3::sequence> && + std::ranges::forward_range> +//!\endcond +auto align_multiple(range_t && input, config_t config = seqan3::configuration<>{}) { #if !SEQAN3_HAS_SEQAN2 // multiple sequence alignment is only enabled with seqan2 static_assert(false, "You need to have at least seqan 2.4"); #else // SEQAN3_HAS_SEQAN2 - using seqan3_alphabet_type = std::ranges::range_value_t; + using seqan3_alphabet_type = std::ranges::range_value_t>; using seqan2_adaptation_type = detail::align_multiple_seqan2_adaptation; using seqan2_graph_type = typename seqan2_adaptation_type::graph_type; diff --git a/include/seqan3/alignment/multiple/detail/align_multiple_seqan2_adaptation.hpp b/include/seqan3/alignment/multiple/detail/align_multiple_seqan2_adaptation.hpp index a14a427d3e..11eadf89ab 100644 --- a/include/seqan3/alignment/multiple/detail/align_multiple_seqan2_adaptation.hpp +++ b/include/seqan3/alignment/multiple/detail/align_multiple_seqan2_adaptation.hpp @@ -20,15 +20,10 @@ #include #include - -#if SEQAN3_HAS_SEQAN2 -#include -#include -#endif - #include #include #include +#include #include #include #include @@ -40,6 +35,11 @@ #include #include +#if SEQAN3_HAS_SEQAN2 +#include +#include +#endif + namespace seqan3::detail { @@ -47,30 +47,13 @@ namespace seqan3::detail * \ingroup multiple_alignment * \sa seqan3::align_multiple */ -template +template class align_multiple_seqan2_adaptation { -private: - //!\brief A list of SeqAn3 types that are currently supported for seqan3::align_multiple. - using seqan3_types = type_list; - //!\brief The corresponding list of SeqAn2 types (the order must be the same!). - using seqan2_types = type_list, - seqan::ReducedAminoAcid>; - //!\brief The index of the seqan3_alphabet_type in the list of seqan3_types. - static constexpr auto index = list_traits::find; - - static_assert(index != -1, "The seqan3_alphabet_type is currently not supported."); - public: //!\brief The (SeqAn2) alphabet type used in the multiple sequence alignment algorithm. - using alphabet_type = list_traits::at; - //!\brief The (SeqAn2) sequence tupe used in the multiple sequence alignment algorithm. + using alphabet_type = char; + //!\brief The (SeqAn2) sequence type used in the multiple sequence alignment algorithm. using sequence_type = seqan::String; //!\brief The output graph type of the multiple sequence alignment algorithm. using graph_type = seqan::Graph>, @@ -139,7 +122,7 @@ class align_multiple_seqan2_adaptation seqan::clear(tmp); } - return std::make_pair(std::move(sequences), std::move(ids)); + return std::pair{std::move(sequences), std::move(ids)}; } /*!\brief Create a vector of gapped SeqAn3 sequences from a SeqAn2 alignment graph. @@ -174,17 +157,14 @@ class align_multiple_seqan2_adaptation } private: - /*!\brief Predicate that represents whether `candidate_t` is a type that is not allowed as MSA configuration. + /*!\brief Helper variable that checks whether `candidate_t` is a type that is allowed as MSA configuration. * \tparam candidate_t The type to test. - * - * \details - * The predicate's value is set to: NOT (band OR gap OR scoring). */ template - struct is_invalid_msa_config : - std::negation, - is_type_specialisation_of, - is_type_specialisation_of>>{}; + static constexpr bool is_valid_msa_config = + std::is_same_v || + is_type_specialisation_of_v || + is_type_specialisation_of_v; /*!\brief Validate the given MSA configuration. * \tparam configs_t The specified configuration elements. @@ -192,86 +172,77 @@ class align_multiple_seqan2_adaptation template void validate_configuration(seqan3::configuration const &) { - static_assert(seqan3::pack_traits::find_if == -1, - "The given MSA configuration is not valid."); + static_assert((is_valid_msa_config && ...), + "You are trying to use an unsupported alignment configuration for the multiple sequence " + "alignment. Please, use only valid configurations as specified by seqan3::align_multiple!"); } /*!\brief Create the SeqAn2 scoring scheme based on the given SeqAn3 configuration. * \tparam seqan3_configuration_t The type of the configuration object. - * \param config The configuration that contains scoring parameters. + * \param[in] config The configuration that contains scoring parameters. * \return A SeqAn2 *MsaOptions* object with initialised scores. * * \details + * * The scoring scheme matrices are copied from SeqAn3 into the corresponding SeqAn2 type. * Note that the alphabet type can be different to that of the input sequences * (seqan3::detail::align_multiple_seqan2_adaptation::alphabet_type), e.g., the SeqAn3 nucleotide scoring scheme is * defined for dna16 but works for most other nucleotide alphabets too. The same holds for the SeqAn2 equivalent. */ template - //!\cond - requires (seqan3_configuration_t::template exists()) - //!\endcond auto initialise_scoring_scheme(seqan3_configuration_t const & config) { - auto scoring_scheme = get(config).value; + auto scoring_scheme = config.get_or(seqan3::align_cfg::scoring_scheme{default_scoring_scheme(config)}).value; - // A SeqAn3 scoring scheme might be of alphabet type dna15 although the aligned alphabet is dna4 - // SeqAn3 alphabet type used: - using scoring_scheme_alphabet_type = typename decltype(scoring_scheme)::alphabet_type; - constexpr auto scoring_scheme_alphabet_index = list_traits::find; - // SeqAn2 alphabet type to use accordingly: - using score_matrix_alphabet_type = list_traits::at; - using score_matrix_type = seqan::Score>; + static_assert(scoring_scheme_for, + "The selected scoring scheme is not suitable to be used with the given sequence input."); + + // Use type erasure over the alphabet to model all scoring schemes inside of the seqan2 alignment. + using score_matrix_type = seqan::Score>; seqan::MsaOptions msa_options{}; - score_matrix_type score_matrix; - for (size_t i = 0; i < seqan3::alphabet_size; ++i) + // Transform the given scoring scheme into the type erased scoring scheme. + for (size_t i = 0; i < seqan3::alphabet_size; ++i) { - for (size_t j = 0; j < seqan3::alphabet_size; ++j) + for (size_t j = 0; j < seqan3::alphabet_size; ++j) { - auto seqan3_i = seqan3::assign_rank_to(i, scoring_scheme_alphabet_type{}); - auto seqan3_j = seqan3::assign_rank_to(j, scoring_scheme_alphabet_type{}); - score_matrix_alphabet_type seqan2_i{seqan3::to_char(seqan3_i)}; - score_matrix_alphabet_type seqan2_j{seqan3::to_char(seqan3_j)}; + auto seqan3_i = seqan3::assign_rank_to(i, seqan3_alphabet_type{}); + auto seqan3_j = seqan3::assign_rank_to(j, seqan3_alphabet_type{}); + alphabet_type seqan2_i{seqan3::to_char(seqan3_i)}; + alphabet_type seqan2_j{seqan3::to_char(seqan3_j)}; - setScore(score_matrix, seqan2_i, seqan2_j, scoring_scheme.score(seqan3_i, seqan3_j)); + setScore(msa_options.sc, seqan2_i, seqan2_j, scoring_scheme.score(seqan3_i, seqan3_j)); } } - msa_options.sc = score_matrix; return msa_options; } /*!\brief Create the SeqAn2 scoring scheme based on default values. * \tparam seqan3_configuration_t The type of a configuration object which does not contain scoring parameters. - * \return A SeqAn2 *MsaOptions* object with initialised scores. + * \return The default scoring scheme. + * + * \details + * + * Currently, this method supports two defaults for the aminoacid alphabet and the nucleotide alphabet. If the + * user specifies any other alphabet type that is not one of the before mentioned alphabets a static assertion + * will be thrown. In this case the user has to provide an additional scoring scheme. * - * Settings: - * * scoring for amino acid sequences: Blosum62 matrix - * * scoring for nucleotide sequences: match = +5 and mismatch = -4 + * The following defaults are used: + * * scoring for amino acid sequences: \ref seqan3::aminoacid_similarity_matrix::BLOSUM62 "Blosum62" + * seqan3::aminoacid_scoring_scheme + * * scoring for nucleotide sequences: seqan3::nucleotide_scoring_scheme with match = +5 and mismatch = -4. */ template - //!\cond - requires (!seqan3_configuration_t::template exists()) - //!\endcond - auto initialise_scoring_scheme(seqan3_configuration_t const &) + auto default_scoring_scheme(seqan3_configuration_t const &) { - using score_type = std::conditional_t || - std::same_as> || - std::same_as>, - seqan::Blosum62, - seqan::Score>; - - seqan::MsaOptions msa_options{}; - - if constexpr (std::is_same_v>) - { - msa_options.sc.data_match = 5; // tcoffee app default - msa_options.sc.data_mismatch = -4; // tcoffee app default - } - - return msa_options; + if constexpr (seqan3::aminoacid_alphabet) + return aminoacid_scoring_scheme{aminoacid_similarity_matrix::BLOSUM62}; + else if constexpr (seqan3::nucleotide_alphabet) + return nucleotide_scoring_scheme{match_score{5}, mismatch_score{-4}}; + else + static_assert(!seqan3::alphabet, "We do not have a generic scoring scheme yet."); } //!\brief Befriend seqan3::detail::test_accessor to grant access to private member functions. diff --git a/include/seqan3/alignment/pairwise/alignment_configurator.hpp b/include/seqan3/alignment/pairwise/alignment_configurator.hpp index e29924afd5..a371cd046b 100644 --- a/include/seqan3/alignment/pairwise/alignment_configurator.hpp +++ b/include/seqan3/alignment/pairwise/alignment_configurator.hpp @@ -48,7 +48,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -523,7 +525,7 @@ struct alignment_configurator if constexpr (traits_t::is_local || // it is a local alignment, traits_t::is_debug || // it runs in debug mode, (traits_t::compute_begin_positions || traits_t::compute_sequence_alignment) || // it computes more than the end position. - (traits_t::is_banded && traits_t::compute_end_positions) || // banded + (traits_t::is_banded && traits_t::compute_begin_positions) || // banded (traits_t::is_vectorised && traits_t::is_banded) || // it is vectorised and banded, (traits_t::is_vectorised && traits_t::compute_end_positions)) // simd and more than the score. { @@ -536,32 +538,64 @@ struct alignment_configurator } else // Use new alignment algorithm implementation. { + using scalar_optimum_updater_t = std::conditional_t; + using optimum_tracker_policy_t = lazy_conditional_t, - lazy>; + lazy>; using gap_cost_policy_t = std::conditional_t, policy_affine_gap_recursion>; using result_builder_policy_t = policy_alignment_result_builder; - using alignment_method_t = typename std::conditional_t; + // ---------------------------------------------------------------------------- + // Configure scoring scheme policy + // ---------------------------------------------------------------------------- + + using alignment_method_t = std::conditional_t; using score_t = typename traits_t::score_type; - using alignment_scoring_scheme_t = - lazy_conditional_t, - typename traits_t::scoring_scheme_type>; + using scoring_scheme_t = typename traits_t::scoring_scheme_type; + constexpr bool is_aminoacid_scheme = is_type_specialisation_of_v; + + using simple_simd_scheme_t = lazy_conditional_t, + void>; + using matrix_simd_scheme_t = lazy_conditional_t, + void>; + + using alignment_scoring_scheme_t = std::conditional_t, + scoring_scheme_t>; using scoring_scheme_policy_t = policy_scoring_scheme; + + // ---------------------------------------------------------------------------- + // Configure alignment matrix policy + // ---------------------------------------------------------------------------- + using alignment_matrix_policy_t = policy_alignment_matrix>; + // ---------------------------------------------------------------------------- + // Configure alignment algorithm + // ---------------------------------------------------------------------------- + using algorithm_t = select_alignment_algorithm_t; - using alignment_scoring_scheme_t = - lazy_conditional_t>, - typename traits_t::scoring_scheme_type>; + using scoring_scheme_t = typename traits_t::scoring_scheme_type; + constexpr bool is_aminoacid_scheme = is_type_specialisation_of_v; + using alignment_type_t = typename std::conditional_t; + + using simple_simd_scheme_t = lazy_conditional_t, + void>; + using matrix_simd_scheme_t = lazy_conditional_t, + void>; + + using alignment_scoring_scheme_t = std::conditional_t, + scoring_scheme_t>; using scoring_scheme_policy_t = deferred_crtp_base; return configure_free_ends(cfg); diff --git a/include/seqan3/alignment/pairwise/detail/pairwise_alignment_algorithm_banded.hpp b/include/seqan3/alignment/pairwise/detail/pairwise_alignment_algorithm_banded.hpp index 4c51aea1ba..fbcb326170 100644 --- a/include/seqan3/alignment/pairwise/detail/pairwise_alignment_algorithm_banded.hpp +++ b/include/seqan3/alignment/pairwise/detail/pairwise_alignment_algorithm_banded.hpp @@ -88,6 +88,10 @@ class pairwise_alignment_algorithm_banded : size_t const sequence1_size = std::ranges::distance(get<0>(sequence_pair)); size_t const sequence2_size = std::ranges::distance(get<1>(sequence_pair)); + // Initialise the cell updater with the dimensions of the regular matrix. + this->compare_and_set_optimum.set_target_indices(row_index_type{sequence2_size}, + column_index_type{sequence1_size}); + auto && [alignment_matrix, index_matrix] = this->acquire_matrices(sequence1_size, sequence2_size, this->lowest_viable_score()); @@ -228,7 +232,7 @@ class pairwise_alignment_algorithm_banded : } /*!\brief Computes a column of the band that does not start in the first row of the alignment matrix. - * \tparam alignment_column_t The type of the alignment column; must model std::ranges::input_range. + * \tparam alignment_column_t The type of the alignment column; must model std::ranges::forward_range. * \tparam cell_index_column_t The type of the indexed column; must model std::ranges::input_range. * \tparam sequence1_value_t The value type of sequence1; must model seqan3::semialphabet. * \tparam sequence2_t The type of the second sequence; must model std::ranges::input_range. @@ -245,8 +249,40 @@ class pairwise_alignment_algorithm_banded : * phase the first cell of the column is computed and in the iteration phase all remaining cells are computed. * In the final phase the last cell is possibly evaluated for a new alignment optimum. * Note that the length of `sequence2` determines the size of the column. + * + * ### Implementation with a score matrix using linear memory + * + * When the score matrix uses only linear memory, i.e. only one column is stored, the algorithm reuses the cells of + * the same column to compute the current one. This means, before the current cell is updated its value is + * cached and used as the previous diagonal value when computing the next cell below. + * In the banded case, however, the position of the referenced cell is shifted by one and needs a different + * handling. + * + * ``` + * 0 1 2 3 4 5 6 + * _____________ + * 0 |0|0|0|\ | + * 1 |1|1|1|0|\ | + * 2 |\|2|2|1|0|\ | + * 3 | \|3|2|1|0|\| + * 4 | \|3|2|1|0| + * 5 | \|3|2|1| + * ––––––––––––– + * + * ``` + * The picture above depicts the banded matrix with the indices of the column. As long as the band touches + * the first row (column 0 - 2), the indices of the actual stored column refer to the same position as the + * previous column. Hence, to compute these columns the regular algorithm + * seqan3::detail::pairwise_alignment_algorithm::compute_column can be used. + * Starting at column 3 the first cell of the band moves downwards, causing a shift in the position of the previous + * cell. In this state, the value of the current cell represents the previous diagonal value before it is updated. + * To read the previous horizontal value the next cell below has to be dereferenced. + * Accordingly, two iterators are used to point to the respective cells in the matrix. The first one points to the + * current cell (the one that is written to) and the second points to the next cell (the one where the + * horizontal and vertical scores are read from). After computing the last cell of the column the value of the + * current iterator can be used to track the score of the cell. */ - template @@ -259,13 +295,15 @@ class pairwise_alignment_algorithm_banded : // Initial phase: prepare column and initialise first cell // --------------------------------------------------------------------- - auto alignment_column_it = alignment_column.begin(); + auto current_alignment_column_it = alignment_column.begin(); auto cell_index_column_it = cell_index_column.begin(); - auto cell = *alignment_column_it; + // Points to the last valid cell in the column. + decltype(current_alignment_column_it) next_alignment_column_it{current_alignment_column_it}; + auto cell = *current_alignment_column_it; cell = this->track_cell( this->initialise_band_first_cell(cell.best_score(), - *++alignment_column_it, + *++next_alignment_column_it, this->scoring_scheme.score(sequence1_value, *std::ranges::begin(sequence2))), *cell_index_column_it); @@ -276,10 +314,11 @@ class pairwise_alignment_algorithm_banded : for (auto && sequence2_value : sequence2 | views::drop(1)) { - auto cell = *alignment_column_it; + current_alignment_column_it = next_alignment_column_it; + auto cell = *current_alignment_column_it; cell = this->track_cell( this->compute_inner_cell(cell.best_score(), - *++alignment_column_it, + *++next_alignment_column_it, this->scoring_scheme.score(sequence1_value, sequence2_value)), *++cell_index_column_it); } @@ -288,7 +327,7 @@ class pairwise_alignment_algorithm_banded : // Final phase: track last cell // --------------------------------------------------------------------- - this->track_last_row_cell(*alignment_column_it, *cell_index_column_it); + this->track_last_row_cell(*current_alignment_column_it, *cell_index_column_it); } }; diff --git a/include/seqan3/alignment/pairwise/detail/policy_optimum_tracker.hpp b/include/seqan3/alignment/pairwise/detail/policy_optimum_tracker.hpp index 953a56fd1b..99255a4b7a 100644 --- a/include/seqan3/alignment/pairwise/detail/policy_optimum_tracker.hpp +++ b/include/seqan3/alignment/pairwise/detail/policy_optimum_tracker.hpp @@ -32,6 +32,8 @@ namespace seqan3::detail * * Updates the current alignment optimum with the new score and the respective coordinate if the new score * compares greater or equal to the score of the current optimum. + * + * \see seqan3::detail::max_score_banded_updater */ struct max_score_updater { @@ -66,6 +68,100 @@ struct max_score_updater } }; +/*!\brief A function object that compares and possibly updates the alignment optimum with the current cell. + * \ingroup pairwise_alignment + * + * \details + * + * This special function object is used for global alignments including free end-gaps. + * It updates the current alignment optimum with the new score and the respective coordinate if the new score + * is greater or equal to the score of the current optimum and the cell is either in the target row or column. + * For the banded alignment this additional check is needed because the band might not reach the last row of the matrix + * for all columns. + * Thus, the last cell of a column will only be checked if the corresponding coordinate points to the last row of the + * matrix. + * + * ### Example + * + * Consider the following banded matrix: + * + * ``` + * 0 1 + * 012345678901 + * ____________ + * 0 | \ | + * 1 | \ | + * 2 |\ \ | + * 3 | \ \ | + * 4 | \ *| + * 5 | \ *| + * 6 | ********| + * –––––––––––– + * ``` + * + * When computing the columns from index 0 to 3 the last row of the matrix is not covered by the band. + * But the algorithm that computes the column does not know if the last computed cell of it is in fact also the last + * cell of the matrix (row index 6). Thus, by comparing the cell coordinate, which corresponds to the absolute + * matrix coordinate, with the initialised target row and column index of this function object, it can be guaranteed + * that only the scores of the cells are tracked that are in the last row or column (cells marked with an `*` + * in the picture above). + * + * \see seqan3::detail::max_score_updater + */ +struct max_score_banded_updater +{ +private: + //!\brief The row index indicating the last row of the alignment matrix. + size_t target_row_index{}; + //!\brief The column index indicating the last column of the alignment matrix. + size_t target_col_index{}; + +public: + /*!\brief Compares and updates the optimal score-coordinate pair. + * \tparam score_t The type of the score to track; must model std::totally_ordered and + * std::assignable_from `const & score_t`. + * \tparam coordinate_t The type of the coordinate to track; must model std::assignable_from `const & coordinate_t`. + * + * \param[in,out] optimal_score The optimal score to update. + * \param[in,out] optimal_coordinate The optimal coordinate to update. + * \param[in] current_score The score of the current cell. + * \param[in] current_coordinate The coordinate of the current cell. + * + * \details + * + * First, checks if the coordinate of the current alignment cell is either in the target row or column, i.e. + * the last row/column of the alignment matrix. If this is not the case the score won't be updated. + * Otherwise, compares the current_score with the optimal score and updates the optimal score and coordinate + * accordingly. + */ + template + //!\cond + requires (std::totally_ordered && std::assignable_from && + std::assignable_from) + //!\endcond + void operator()(score_t & optimal_score, + coordinate_t & optimal_coordinate, + score_t current_score, + coordinate_t current_coordinate) const noexcept + { + bool const is_better_score = (target_row_index == current_coordinate.row || + target_col_index == current_coordinate.col) && + (current_score >= optimal_score); + optimal_score = (is_better_score) ? std::move(current_score) : optimal_score; + optimal_coordinate = (is_better_score) ? std::move(current_coordinate) : optimal_coordinate; + } + + /*!\brief Sets the target index for the last row and column of the matrix. + * \param row_index The target index of the last row. + * \param col_index The target index of the last column. + */ + void set_target_indices(row_index_type row_index, column_index_type col_index) noexcept + { + target_row_index = row_index.get(); + target_col_index = col_index.get(); + } +}; + /*!\brief Implements the tracker to store the global optimum for a particular alignment computation. * \ingroup pairwise_alignment * diff --git a/include/seqan3/search/search.hpp b/include/seqan3/search/search.hpp index 51a99504b9..1c16375ce8 100644 --- a/include/seqan3/search/search.hpp +++ b/include/seqan3/search/search.hpp @@ -15,8 +15,11 @@ #include #include +#include #include #include +#include +#include #include #include #include @@ -24,7 +27,6 @@ #include #include #include -#include namespace seqan3::detail { @@ -100,7 +102,8 @@ template //!\cond - requires std::ranges::forward_range> + requires std::ranges::forward_range> && + std::same_as, typename index_t::alphabet_type> //!\endcond inline auto search(queries_t && queries, index_t const & index, @@ -165,6 +168,26 @@ inline auto search(queries_t && queries, } //!\cond DEV +// Convert query sequence if it does not match the alphabet type of the index. +//!\overload +template + requires std::ranges::forward_range> && + (!std::same_as, typename index_t::alphabet_type>) +inline auto search(queries_t && queries, + index_t const & index, + configuration_t const & cfg = search_cfg::default_configuration) +{ + static_assert(std::convertible_to, typename index_t::alphabet_type>, + "The alphabet of the text collection must be convertible to the alphabet of the index."); + + if constexpr (range_dimension_v == 2u) + return search(queries | views::deep{views::convert}, index, cfg); + else + return search(queries | views::convert, index, cfg); +} + // Overload for a single query (not a collection of queries) //!\overload template + +#include "global_affine_alignment_simd_benchmark_template.hpp" + +// Range to test for sequence length variance +inline constexpr size_t deviation_begin = 0; +inline constexpr size_t deviation_end = 64; +inline constexpr size_t deviation_step = 8; + +// ---------------------------------------------------------------------------- +// SeqAn3 +// ---------------------------------------------------------------------------- + +constexpr auto aa_score_scheme = seqan3::aminoacid_scoring_scheme{seqan3::aminoacid_similarity_matrix::BLOSUM62}; +constexpr auto affine_cfg = seqan3::align_cfg::method_global{} | + seqan3::align_cfg::gap{seqan3::gap_scheme{seqan3::gap_score{-1}, + seqan3::gap_open_score{-10}}} | + seqan3::align_cfg::scoring_scheme{aa_score_scheme}; + +BENCHMARK_CAPTURE(seqan3_affine_accelerated, + simd_with_score, + seqan3::aa27{}, + affine_cfg, + seqan3::align_cfg::result{seqan3::with_score, seqan3::using_score_type}, + seqan3::align_cfg::output_score, + seqan3::align_cfg::vectorised) + ->UseRealTime() + ->DenseRange(deviation_begin, deviation_end, deviation_step); + +BENCHMARK_CAPTURE(seqan3_affine_accelerated, + simd_parallel_with_score, + seqan3::aa27{}, + affine_cfg, + seqan3::align_cfg::result{seqan3::with_score, seqan3::using_score_type}, + seqan3::align_cfg::output_score, + seqan3::align_cfg::vectorised, + seqan3::align_cfg::parallel{get_number_of_threads()}) + ->UseRealTime() + ->DenseRange(deviation_begin, deviation_end, deviation_step); + +#ifdef SEQAN3_HAS_SEQAN2 + +// ---------------------------------------------------------------------------- +// SeqAn2 +// ---------------------------------------------------------------------------- + +using scoring_scheme_t = seqan::Score>; + +// Note SeqAn2 has no parallel interface yet for computing the traceback as well. +BENCHMARK_CAPTURE(seqan2_affine_accelerated, + simd_with_score, + seqan::AminoAcid{}, + scoring_scheme_t{-1, -11}, + seqan::ExecutionPolicy{}, + 1, + affine_cfg) + ->UseRealTime() + ->DenseRange(deviation_begin, deviation_end, deviation_step); + +BENCHMARK_CAPTURE(seqan2_affine_accelerated, + simd_parallel_with_score, + seqan::AminoAcid{}, + scoring_scheme_t{-1, -11}, + seqan::ExecutionPolicy{}, + get_number_of_threads(), + affine_cfg) + ->UseRealTime() + ->DenseRange(deviation_begin, deviation_end, deviation_step); +#endif // SEQAN3_HAS_SEQAN2 + +// ============================================================================ +// instantiate tests +// ============================================================================ + +BENCHMARK_MAIN(); diff --git a/test/performance/alignment/global_affine_alignment_simd_benchmark.cpp b/test/performance/alignment/global_affine_alignment_simd_benchmark.cpp index 5c1d2454a7..f25764fabc 100644 --- a/test/performance/alignment/global_affine_alignment_simd_benchmark.cpp +++ b/test/performance/alignment/global_affine_alignment_simd_benchmark.cpp @@ -7,24 +7,16 @@ #include -#include -#include -#include +#include "global_affine_alignment_simd_benchmark_template.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include +// Range to test for sequence length variance +inline constexpr size_t deviation_begin = 0; +inline constexpr size_t deviation_end = 64; +inline constexpr size_t deviation_step = 8; -#ifdef SEQAN3_HAS_SEQAN2 - #include - #include -#endif +// ---------------------------------------------------------------------------- +// SeqAn3 +// ---------------------------------------------------------------------------- constexpr auto nt_score_scheme = seqan3::nucleotide_scoring_scheme{seqan3::match_score{4}, seqan3::mismatch_score{-5}}; @@ -33,73 +25,45 @@ constexpr auto affine_cfg = seqan3::align_cfg::method_global{} | seqan3::gap_open_score{-10}}} | seqan3::align_cfg::scoring_scheme{nt_score_scheme}; -// Globally defined constants to ensure same test data. -inline constexpr size_t sequence_length = 150; -inline constexpr size_t set_size = 1024; - -// Range to test for sequence length variance -inline constexpr size_t deviation_begin = 0; -inline constexpr size_t deviation_end = 64; -inline constexpr size_t deviation_step = 8; - -//We don't know if the system supports hyper-threading so we use only half the threads so that the -// simd benchmark is likely to run on physical cores only. -uint32_t get_number_of_threads() -{ - uint32_t thread_count = std::thread::hardware_concurrency(); - return (thread_count == 1) ? thread_count : thread_count >> 1; -} - -// ============================================================================ -// affine; score; dna4; collection -// ============================================================================ - -template -void seqan3_affine_dna4_accelerated(benchmark::State & state, align_configs_t && ...configs) -{ - size_t sequence_length_variance = state.range(0); - auto data = seqan3::test::generate_sequence_pairs(sequence_length, - set_size, - sequence_length_variance); - - int64_t total = 0; - auto accelerate_config = (affine_cfg | ... | configs); - for (auto _ : state) - { - for (auto && res : seqan3::align_pairwise(data, accelerate_config)) - total += res.score(); - } - - state.counters["cells"] = seqan3::test::pairwise_cell_updates(data, affine_cfg); - state.counters["CUPS"] = seqan3::test::cell_updates_per_second(state.counters["cells"]); - state.counters["total"] = total; -} - -BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, +BENCHMARK_CAPTURE(seqan3_affine_accelerated, simd_with_score, + seqan3::dna4{}, + affine_cfg, seqan3::align_cfg::result{seqan3::with_score, seqan3::using_score_type}, + seqan3::align_cfg::output_score, seqan3::align_cfg::vectorised) ->UseRealTime() ->DenseRange(deviation_begin, deviation_end, deviation_step); -BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, +BENCHMARK_CAPTURE(seqan3_affine_accelerated, simd_with_end_position, + seqan3::dna4{}, + affine_cfg, seqan3::align_cfg::result{seqan3::with_end_positions, seqan3::using_score_type}, + seqan3::align_cfg::output_score, + seqan3::align_cfg::output_end_position, seqan3::align_cfg::vectorised) ->UseRealTime() ->DenseRange(deviation_begin, deviation_end, deviation_step); -BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, +BENCHMARK_CAPTURE(seqan3_affine_accelerated, simd_parallel_with_score, + seqan3::dna4{}, + affine_cfg, seqan3::align_cfg::result{seqan3::with_score, seqan3::using_score_type}, + seqan3::align_cfg::output_score, seqan3::align_cfg::vectorised, seqan3::align_cfg::parallel{get_number_of_threads()}) ->UseRealTime() ->DenseRange(deviation_begin, deviation_end, deviation_step); -BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, +BENCHMARK_CAPTURE(seqan3_affine_accelerated, simd_parallel_with_end_position, + seqan3::dna4{}, + affine_cfg, seqan3::align_cfg::result{seqan3::with_end_positions, seqan3::using_score_type}, + seqan3::align_cfg::output_score, + seqan3::align_cfg::output_end_position, seqan3::align_cfg::vectorised, seqan3::align_cfg::parallel{get_number_of_threads()}) ->UseRealTime() @@ -107,43 +71,28 @@ BENCHMARK_CAPTURE(seqan3_affine_dna4_accelerated, #ifdef SEQAN3_HAS_SEQAN2 -template -void seqan2_affine_dna4_accelerated(benchmark::State & state, args_t && ...args) -{ - std::tuple captured_args{args...}; - - size_t sequence_length_variance = state.range(0); - auto [vec1, vec2] = seqan3::test::generate_sequence_pairs_seqan2(sequence_length, - set_size, - sequence_length_variance); - auto exec = std::get<0>(captured_args); - setNumThreads(exec, std::get<1>(captured_args)); - - int64_t total = 0; - for (auto _ : state) - { - // In SeqAn2 the gap open contains already the gap extension costs, that's why we use -11 here. - auto res = seqan::globalAlignmentScore(exec, vec1, vec2, seqan::Score{4, -5, -1, -11}); - total = std::accumulate(seqan::begin(res), seqan::end(res), total); - } - - state.counters["cells"] = seqan3::test::pairwise_cell_updates(seqan3::views::zip(vec1, vec2), affine_cfg); - state.counters["CUPS"] = seqan3::test::cell_updates_per_second(state.counters["cells"]); - state.counters["total"] = total; -} +// ---------------------------------------------------------------------------- +// SeqAn2 +// ---------------------------------------------------------------------------- // Note SeqAn2 has no parallel interface yet for computing the traceback as well. -BENCHMARK_CAPTURE(seqan2_affine_dna4_accelerated, +BENCHMARK_CAPTURE(seqan2_affine_accelerated, simd_with_score, + seqan::Dna{}, + seqan::Score{4, -5, -1, -11}, seqan::ExecutionPolicy{}, - 1) + 1, + affine_cfg) ->UseRealTime() ->DenseRange(deviation_begin, deviation_end, deviation_step); -BENCHMARK_CAPTURE(seqan2_affine_dna4_accelerated, +BENCHMARK_CAPTURE(seqan2_affine_accelerated, simd_parallel_with_score, + seqan::Dna{}, + seqan::Score{4, -5, -1, -11}, seqan::ExecutionPolicy{}, - get_number_of_threads()) + get_number_of_threads(), + affine_cfg) ->UseRealTime() ->DenseRange(deviation_begin, deviation_end, deviation_step); #endif // SEQAN3_HAS_SEQAN2 diff --git a/test/performance/alignment/global_affine_alignment_simd_benchmark_template.hpp b/test/performance/alignment/global_affine_alignment_simd_benchmark_template.hpp new file mode 100644 index 0000000000..6a1bff3667 --- /dev/null +++ b/test/performance/alignment/global_affine_alignment_simd_benchmark_template.hpp @@ -0,0 +1,104 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef SEQAN3_HAS_SEQAN2 + #include + #include +#endif + +// Globally defined constants to ensure same test data. +inline constexpr size_t sequence_length = 150; +#ifndef NDEBUG +inline constexpr size_t set_size = 128; +#else +inline constexpr size_t set_size = 1024; +#endif // NDEBUG + +// We don't know if the system supports hyper-threading so we use only half the threads so that the +// simd benchmark is likely to run on physical cores only. +uint32_t get_number_of_threads() +{ + uint32_t thread_count = std::thread::hardware_concurrency(); + return (thread_count == 1) ? thread_count : thread_count >> 1; +} + +// ---------------------------------------------------------------------------- +// seqan3 pairwise alignment +// ---------------------------------------------------------------------------- + +template +void seqan3_affine_accelerated(benchmark::State & state, alphabet_t, align_configs_t && ...configs) +{ + size_t sequence_length_variance = state.range(0); + auto data = seqan3::test::generate_sequence_pairs(sequence_length, + set_size, + sequence_length_variance); + + int64_t total = 0; + auto accelerate_config = (configs | ...); + for (auto _ : state) + { + for (auto && res : seqan3::align_pairwise(data, accelerate_config)) + total += res.score(); + } + + state.counters["cells"] = seqan3::test::pairwise_cell_updates(data, accelerate_config); + state.counters["CUPS"] = seqan3::test::cell_updates_per_second(state.counters["cells"]); + state.counters["total"] = total; +} + +#ifdef SEQAN3_HAS_SEQAN2 + +// ---------------------------------------------------------------------------- +// seqan2 pairwise alignment +// ---------------------------------------------------------------------------- + +template +void seqan2_affine_accelerated(benchmark::State & state, alphabet_t, args_t && ...args) +{ + std::tuple captured_args{args...}; + + size_t sequence_length_variance = state.range(0); + auto [vec1, vec2] = seqan3::test::generate_sequence_pairs_seqan2(sequence_length, + set_size, + sequence_length_variance); + + auto scoring_scheme = std::get<0>(captured_args); + auto exec = std::get<1>(captured_args); + setNumThreads(exec, std::get<2>(captured_args)); + + int64_t total = 0; + for (auto _ : state) + { + // In SeqAn2 the gap open contains already the gap extension costs, that's why we use -11 here. + auto res = seqan::globalAlignmentScore(exec, vec1, vec2, scoring_scheme); + total = std::accumulate(seqan::begin(res), seqan::end(res), total); + } + + state.counters["cells"] = seqan3::test::pairwise_cell_updates(seqan3::views::zip(vec1, vec2), + std::get<3>(captured_args)); + state.counters["CUPS"] = seqan3::test::cell_updates_per_second(state.counters["cells"]); + state.counters["total"] = total; +} + +#endif // SEQAN3_HAS_SEQAN2 diff --git a/test/unit/alignment/multiple/align_multiple_seqan2_adaptation_test.cpp b/test/unit/alignment/multiple/align_multiple_seqan2_adaptation_test.cpp index 89e3d54cf6..1b8cae940c 100644 --- a/test/unit/alignment/multiple/align_multiple_seqan2_adaptation_test.cpp +++ b/test/unit/alignment/multiple/align_multiple_seqan2_adaptation_test.cpp @@ -31,15 +31,14 @@ using adaptation_t = seqan3::detail::align_multiple_seqan2_adaptation::alphabet_type, seqan::Dna>)); - EXPECT_TRUE((std::same_as::alphabet_type, seqan::Dna5>)); - EXPECT_TRUE((std::same_as::alphabet_type, seqan::Iupac>)); - EXPECT_TRUE((std::same_as::alphabet_type, seqan::Rna>)); - EXPECT_TRUE((std::same_as::alphabet_type, seqan::Rna5>)); - EXPECT_TRUE((std::same_as::alphabet_type, seqan::AminoAcid>)); - EXPECT_TRUE((std::same_as::alphabet_type, seqan::ReducedAminoAcid>)); - EXPECT_TRUE((std::same_as::alphabet_type, - seqan::ReducedAminoAcid>)); + EXPECT_TRUE((std::same_as::alphabet_type, char>)); + EXPECT_TRUE((std::same_as::alphabet_type, char>)); + EXPECT_TRUE((std::same_as::alphabet_type, char>)); + EXPECT_TRUE((std::same_as::alphabet_type, char>)); + EXPECT_TRUE((std::same_as::alphabet_type, char>)); + EXPECT_TRUE((std::same_as::alphabet_type, char>)); + EXPECT_TRUE((std::same_as::alphabet_type, char>)); + EXPECT_TRUE((std::same_as::alphabet_type, char>)); } TEST(initialise_scoring_scheme, config_no_scoring_configuration) @@ -50,7 +49,7 @@ TEST(initialise_scoring_scheme, config_no_scoring_configuration) auto msaOpt = seqan3::detail::test_accessor::initialise_scoring_scheme(config); - EXPECT_TRUE((std::same_as>)); + EXPECT_TRUE((std::same_as>>)); } TEST(initialise_scoring_scheme, blosum62) @@ -62,11 +61,17 @@ TEST(initialise_scoring_scheme, blosum62) auto msaOpt = seqan3::detail::test_accessor::initialise_scoring_scheme(config); // compare matrix size and abort test if not equal so the value comparison does not segfault - ASSERT_EQ(static_cast(decltype(msaOpt.sc)::TAB_SIZE), static_cast(seqan::Blosum62::TAB_SIZE)); + ASSERT_EQ(static_cast(decltype(msaOpt.sc)::TAB_SIZE), 256ull * 256ull); seqan::Blosum62 expected_matrix{}; - for (size_t i = 0; i < static_cast(seqan::Blosum62::TAB_SIZE); ++i) - EXPECT_EQ(msaOpt.sc.data_tab[i], expected_matrix.data_tab[i]); + for (size_t aa_rank1 = 0; aa_rank1 < seqan::ValueSize::VALUE; ++aa_rank1) + for (size_t aa_rank2 = 0; aa_rank2 < seqan::ValueSize::VALUE; ++aa_rank2) + EXPECT_EQ(seqan::score(msaOpt.sc, + seqan::convert(aa_rank1), + seqan::convert(aa_rank2)), + seqan::score(expected_matrix, + seqan::convert(aa_rank1), + seqan::convert(aa_rank2))); } TEST(configuration, gap_score_conversion) diff --git a/test/unit/alignment/pairwise/CMakeLists.txt b/test/unit/alignment/pairwise/CMakeLists.txt index a19b8163b0..21ac7b410e 100644 --- a/test/unit/alignment/pairwise/CMakeLists.txt +++ b/test/unit/alignment/pairwise/CMakeLists.txt @@ -4,8 +4,10 @@ seqan3_test(alignment_result_test.cpp) seqan3_test(align_result_selector_test.cpp) seqan3_test(alignment_configurator_test.cpp) seqan3_test(global_affine_banded_test.cpp) +seqan3_test(global_affine_unbanded_aa27_test.cpp) seqan3_test(global_affine_unbanded_callback_test.cpp) seqan3_test(global_affine_unbanded_collection_callback_test.cpp) +seqan3_test(global_affine_unbanded_collection_simd_aa27_test.cpp) seqan3_test(global_affine_unbanded_collection_simd_test.cpp) seqan3_test(global_affine_unbanded_collection_test.cpp) seqan3_test(global_affine_unbanded_test.cpp) diff --git a/test/unit/alignment/pairwise/fixture/global_affine_unbanded.hpp b/test/unit/alignment/pairwise/fixture/global_affine_unbanded.hpp index ff59078cc3..16f14db2ce 100644 --- a/test/unit/alignment/pairwise/fixture/global_affine_unbanded.hpp +++ b/test/unit/alignment/pairwise/fixture/global_affine_unbanded.hpp @@ -401,4 +401,107 @@ static auto dna4_match_4_mismatch_5_gap_1_open_10_both_empty = []() }; }(); +// ---------------------------------------------------------------------------- +// alignment fixtures using amino acid alphabet +// ---------------------------------------------------------------------------- + +inline constexpr auto config_blosum62_scheme = + seqan3::align_cfg::scoring_scheme{seqan3::aminoacid_scoring_scheme{seqan3::aminoacid_similarity_matrix::BLOSUM62}}; + +static auto aa27_blosum62_gap_1_open_10 = [] () +{ + using seqan3::operator""_aa27; + return seqan3::test::alignment::fixture::alignment_fixture + { + "FNQSAEYPDISHCGVMQLKWRATLGT"_aa27, + "EIKSDVLLHRWSMKNPGNILMIDVGMQVAESYFAT"_aa27, + align_config | config_blosum62_scheme, + -26, + "--------FNQSAEYP-DISHCGVMQLKWRATLGT", + "EIKSDVLLHRWSMKNPGNILMIDVGMQVAESYFAT", + seqan3::alignment_coordinate{seqan3::detail::column_index_type{0u}, seqan3::detail::row_index_type{0u}}, + seqan3::alignment_coordinate{seqan3::detail::column_index_type{26u}, seqan3::detail::row_index_type{35u}}, + std::vector + { + // e ,F ,N ,Q ,S ,A ,E ,Y ,P ,D ,I ,S ,H ,C ,G ,V ,M ,Q ,L ,K ,W ,R ,A ,T ,L ,G ,T , + /*e*/ 0 ,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,-27,-28,-29,-30,-31,-32,-33,-34,-35,-36, + /*E*/ -11,-3 ,-11,-10,-13,-15,-10,-18,-18,-16,-22,-20,-21,-25,-25,-26,-27,-24,-30,-27,-32,-30,-32,-33,-36,-36,-36, + /*I*/ -12,-11,-6 ,-14,-12,-14,-18,-11,-21,-21,-12,-23,-23,-22,-26,-22,-25,-29,-22,-31,-30,-33,-31,-33,-31,-37,-37, + /*K*/ -13,-15,-11,-5 ,-14,-13,-13,-19,-12,-21,-22,-12,-23,-24,-24,-26,-23,-24,-29,-17,-28,-28,-30,-31,-32,-33,-34, + /*S*/ -14,-15,-14,-11,-1 ,-12,-13,-14,-15,-12,-17,-18,-13,-20,-21,-22,-23,-23,-25,-26,-20,-28,-27,-29,-31,-32,-32, + /*D*/ -15,-17,-14,-14,-11,-3 ,-10,-15,-15,-9 ,-15,-17,-19,-16,-21,-23,-24,-23,-26,-26,-28,-22,-30,-28,-32,-32,-33, + /*V*/ -16,-16,-20,-16,-13,-11,-5 ,-11,-17,-18,-6 ,-17,-18,-19,-19,-17,-22,-23,-22,-25,-26,-27,-22,-29,-27,-31,-32, + /*L*/ -17,-16,-19,-19,-14,-14,-14,-6 ,-14,-18,-16,-8 ,-19,-19,-21,-18,-15,-24,-19,-24,-27,-28,-28,-23,-25,-31,-32, + /*L*/ -18,-17,-19,-20,-15,-15,-17,-15,-9 ,-18,-16,-18,-11,-20,-23,-20,-16,-17,-20,-21,-26,-29,-29,-29,-19,-29,-31, + /*H*/ -19,-19,-16,-19,-16,-17,-15,-15,-17,-10,-19,-17,-10,-14,-22,-23,-22,-16,-20,-21,-23,-26,-30,-31,-30,-21,-31, + /*R*/ -20,-22,-19,-15,-17,-17,-17,-17,-17,-19,-13,-20,-17,-13,-16,-25,-24,-21,-18,-18,-24,-18,-27,-30,-31,-32,-22, + /*W*/ -21,-19,-25,-21,-18,-19,-20,-15,-21,-21,-21,-16,-22,-19,-15,-19,-26,-26,-23,-21,-7 ,-18,-19,-20,-21,-22,-23, + /*S*/ -22,-23,-18,-24,-17,-17,-19,-21,-16,-21,-22,-17,-17,-23,-19,-17,-20,-26,-28,-23,-18,-8 ,-17,-18,-21,-21,-21, + /*M*/ -23,-22,-25,-18,-20,-18,-19,-20,-23,-19,-20,-23,-19,-18,-26,-18,-12,-20,-24,-25,-19,-19,-9 ,-18,-16,-22,-22, + /*K*/ -24,-26,-22,-24,-18,-21,-17,-21,-21,-24,-22,-20,-24,-22,-20,-28,-19,-11,-22,-19,-20,-17,-20,-10,-20,-18,-23, + /*N*/ -25,-27,-20,-22,-22,-20,-21,-19,-23,-20,-25,-21,-19,-27,-22,-23,-24,-19,-14,-22,-21,-20,-19,-20,-13,-20,-18, + /*P*/ -26,-28,-29,-21,-23,-23,-21,-24,-12,-23,-23,-25,-23,-22,-28,-24,-25,-23,-22,-15,-22,-22,-21,-20,-23,-15,-21, + /*G*/ -27,-29,-28,-29,-21,-23,-25,-24,-23,-13,-24,-23,-26,-26,-16,-27,-26,-24,-26,-24,-17,-23,-22,-23,-24,-17,-17, + /*N*/ -28,-30,-23,-28,-25,-23,-23,-27,-24,-22,-16,-23,-22,-29,-26,-19,-27,-25,-27,-26,-24,-17,-24,-22,-26,-24,-17, + /*I*/ -29,-28,-33,-26,-26,-26,-26,-24,-25,-25,-18,-18,-26,-23,-28,-23,-18,-26,-23,-28,-25,-25,-18,-25,-20,-28,-25, + /*L*/ -30,-29,-31,-32,-27,-27,-29,-27,-26,-26,-23,-20,-21,-27,-27,-27,-21,-20,-22,-25,-26,-26,-26,-19,-21,-24,-29, + /*M*/ -31,-30,-31,-31,-28,-28,-29,-30,-27,-27,-25,-24,-22,-22,-30,-26,-22,-21,-18,-23,-26,-27,-27,-27,-17,-24,-25, + /*I*/ -32,-31,-33,-34,-29,-29,-31,-30,-28,-28,-23,-27,-27,-23,-26,-27,-25,-25,-19,-21,-26,-28,-28,-28,-25,-21,-25, + /*D*/ -33,-35,-30,-33,-30,-31,-27,-32,-29,-22,-31,-23,-28,-30,-24,-29,-30,-25,-29,-20,-25,-28,-29,-29,-29,-26,-22, + /*V*/ -34,-34,-38,-32,-31,-30,-33,-28,-30,-30,-19,-30,-26,-29,-33,-20,-28,-31,-24,-31,-23,-28,-28,-29,-28,-32,-26, + /*G*/ -35,-37,-34,-37,-32,-31,-32,-34,-30,-31,-30,-19,-30,-29,-23,-31,-23,-30,-32,-26,-31,-25,-28,-30,-31,-22,-33, + /*M*/ -36,-35,-39,-34,-33,-33,-33,-33,-32,-32,-30,-30,-21,-31,-32,-22,-26,-23,-28,-33,-27,-32,-26,-29,-28,-33,-23, + /*Q*/ -37,-39,-35,-34,-34,-34,-31,-34,-33,-32,-32,-30,-30,-24,-33,-33,-22,-21,-25,-27,-33,-26,-33,-27,-31,-30,-34, + /*V*/ -38,-38,-42,-37,-35,-34,-36,-32,-34,-34,-29,-32,-33,-31,-27,-29,-32,-24,-20,-27,-30,-33,-26,-33,-26,-34,-30, + /*A*/ -39,-40,-40,-41,-36,-31,-35,-38,-33,-35,-34,-28,-34,-33,-31,-27,-30,-33,-25,-21,-30,-31,-29,-26,-34,-26,-34, + /*E*/ -40,-42,-40,-38,-37,-37,-26,-37,-36,-31,-35,-34,-28,-37,-35,-33,-29,-28,-32,-24,-24,-30,-32,-30,-29,-36,-27, + /*S*/ -41,-42,-41,-40,-34,-36,-37,-28,-37,-36,-33,-31,-35,-29,-37,-37,-34,-29,-30,-32,-27,-25,-29,-31,-32,-29,-35, + /*Y*/ -42,-38,-44,-42,-39,-36,-38,-30,-31,-38,-37,-35,-29,-37,-32,-38,-37,-35,-30,-32,-30,-29,-27,-31,-32,-35,-31, + /*F*/ -43,-36,-41,-45,-40,-41,-39,-35,-34,-34,-38,-37,-36,-31,-40,-33,-38,-37,-35,-33,-31,-33,-31,-29,-31,-35,-37, + /*A*/ -44,-45,-38,-42,-41,-36,-40,-41,-36,-36,-35,-37,-39,-36,-31,-40,-34,-38,-36,-36,-36,-32,-29,-31,-30,-31,-35, + /*T*/ -45,-46,-45,-39,-41,-41,-37,-42,-41,-37,-37,-34,-39,-40,-38,-31,-40,-35,-37,-37,-38,-37,-32,-24,-32,-32,-26 + }, + std::vector + { + // e ,F ,N ,Q ,S ,A ,E ,Y ,P ,D ,I ,S ,H ,C ,G ,V ,M ,Q ,L ,K ,W ,R ,A ,T ,L ,G ,T , + /*e*/ N ,L ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l ,l , + /*E*/ U ,DUL,DUL,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,l ,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl, + /*I*/ u ,DUL,DUL,DUL,DUl,DUl,DUl,Dul,DUl,DUl,Dul,L ,DUl,Dul,l ,Dul,Dul,l ,Dul,l ,Dul,l ,DUl,DUl,Dul,l ,DUl, + /*K*/ u ,DuL,DUl,DuL,DUL,DUl,Dul,l ,Dul,l ,l ,Dul,L ,l ,Dul,l ,DUl,Dul,l ,Dul,L ,Dul,l ,l ,l ,Dul,l , + /*S*/ u ,DuL,Dul,DUL,DuL,L ,Dul,l ,l ,Dul,l ,DUl,Dul,l ,l ,l ,l ,DUl,l ,l ,DUl,l ,DUl,DUl,l ,DUl,DUl, + /*D*/ u ,DuL,Dul,DuL,DUL,DUL,DuL,l ,Dul,DUl,Dul,Dul,DUl,DUl,DUl,l ,l ,DUl,l ,Dul,l ,DUl,DUl,DUl,l ,DUl,DUl, + /*V*/ u ,DuL,DuL,Dul,uL ,DUL,DUL,DuL,Dul,DUl,Dul,DuL,l ,l ,DUl,Dul,Dul,l ,Dul,l ,l ,l ,Dul,l ,Dul,l ,Dul, + /*L*/ u ,DuL,DuL,ul ,ul ,DuL,DUL,DUL,DuL,l ,DUl,Dul,L ,Dul,l ,DUl,DUl,DUl,DUl,Dul,Dul,Dul,DUl,Dul,DUl,DUl,DUl, + /*L*/ u ,DuL,DuL,ul ,ul ,DuL,DuL,DUl,DUL,DuL,Dul,DUl,Dul,DuL,Dul,Dul,DUl,Dul,DUl,Dul,Dul,Dul,Dul,DUl,DUl,DUL,l , + /*H*/ u ,DuL,DuL,DuL,ul ,DuL,Dul,DuL,DUL,Dul,uL ,Dul,DUl,DuL,Dul,l ,Dul,DUl,Dul,DUl,Dul,Dul,l ,Dul,Ul ,DUl,DUL, + /*R*/ u ,DuL,Dul,DuL,uL ,Dul,Dul,DuL,DuL,DUL,Dul,DuL,DUl,DUl,DuL,Dul,Dul,DUl,DUl,DUl,DUL,Dul,DuL,l ,ul ,DUl,DUl, + /*W*/ u ,DuL,uL ,Dul,Dul,uL ,Dul,Dul,DuL,Dul,ul ,Dul,DuL,DUl,DUl,DuL,Dul,Dul,DUl,DUl,Dul,L ,l ,l ,l ,l ,l , + /*S*/ u ,DuL,Dul,uL ,Dul,DuL,DuL,ul ,Dul,DuL,ul ,Dul,DuL,DuL,DUl,DUl,DuL,Dul,Dul,Dul,Ul ,DUL,DUL,DUl,l ,DUl,Dul, + /*M*/ u ,DuL,DuL,Dul,uL ,Dul,DuL,Dul,Dul,Dul,DuL,Dul,Dul,DuL,DuL,DUl,DuL,DuL,Dul,l ,ul ,DUl,DUl,DUL,DUl,l ,DUl, + /*K*/ u ,DuL,Dul,DuL,Dul,DuL,Dul,DuL,Dul,Dul,Dul,Dul,DuL,Dul,Dul,DuL,DUl,DuL,DuL,Dul,ul ,Dul,DUl,DUl,DUL,Dul,Dul, + /*N*/ u ,DuL,Dul,DuL,ul ,Dul,DuL,Dul,DuL,Dul,uL ,Dul,DuL,DuL,Dul,Dul,ul ,DUl,DuL,DUL,ul ,Dul,Dul,DUl,Dul,DUL,Dul, + /*P*/ u ,uL ,Dul,Dul,DuL,Dul,Dul,DuL,Dul,L ,Dul,l ,Dul,Dul,l ,Dul,Dul,ul ,DUl,Dul,uL ,ul ,Dul,Dul,DUl,Dul,DUL, + /*G*/ u ,DuL,Dul,uL ,Dul,DuL,Dul,Dul,Ul ,DuL,L ,Dul,l ,Dul,Dul,L ,ul ,ul ,ul ,DUl,Dul,uL ,Dul,Dul,Dul,DUl,DuL, + /*N*/ u ,DuL,Dul,DuL,ul ,Dul,DuL,DuL,ul ,DUL,DuL,DuL,Dul,Dul,DUl,Dul,uL ,ul ,Dul,Dul,ul ,Dul,uL ,Dul,Dul,Dul,DUl, + /*I*/ u ,DuL,DuL,Dul,uL ,DuL,DuL,DuL,uL ,ul ,DUL,DuL,DuL,Dul,ul ,DUl,Dul,uL ,Dul,ul ,ul ,ul ,Dul,DuL,Dul,ul ,DUl, + /*L*/ u ,DuL,DuL,ul ,ul ,DuL,DuL,Dul,uL ,uL ,DuL,DUL,DuL,Dul,Dul,Dul,Dul,DuL,DuL,Dul,ul ,ul ,Dul,Dul,DuL,Dul,Dul, + /*M*/ u ,DuL,DuL,Dul,uL ,DuL,DuL,Dul,ul ,uL ,DuL,DuL,DuL,DuL,DuL,Dul,Dul,DuL,DuL,DuL,Dul,Dul,Dul,Dul,Dul,DuL,Dul, + /*I*/ u ,DuL,DuL,Dul,ul ,DuL,DuL,Dul,uL ,uL ,DuL,DuL,Dul,DUl,DuL,Dul,Dul,DuL,DUL,DuL,Dul,ul ,Dul,Dul,DUl,Dul,DuL, + /*D*/ u ,DuL,Dul,DuL,ul ,DuL,Dul,uL ,ul ,Dul,DuL,Dul,DuL,Dul,Dul,DuL,Dul,Dul,DuL,Dul,DuL,Dul,ul ,Dul,ul ,Dul,Dul, + /*V*/ u ,DuL,DuL,Dul,uL ,DuL,DuL,Dul,uL ,ul ,Dul,L ,Dul,Dul,Dul,Dul,DuL,ul ,Dul,DUl,Dul,DuL,Dul,Dul,Dul,Dul,Dul, + /*G*/ u ,DuL,Dul,uL ,Dul,DuL,DuL,ul ,Dul,DuL,Ul ,DuL,L ,Dul,Dul,Ul ,Dul,DuL,ul ,Dul,ul ,Dul,DuL,Dul,ul ,Dul,L , + /*M*/ u ,DuL,DuL,Dul,uL ,DuL,DuL,DuL,uL ,uL ,DuL,UL ,DuL,DuL,DUl,Dul,DUL,Dul,DuL,Dul,Dul,Dul,Dul,DuL,Dul,Ul ,Dul, + /*Q*/ u ,DuL,Dul,DuL,DuL,DuL,DuL,DuL,ul ,Dul,uL ,DuL,DUL,DuL,DuL,ul ,Dul,DuL,DuL,Dul,ul ,Dul,Dul,Dul,Dul,Dul,DUl, + /*V*/ u ,DuL,DuL,Dul,uL ,DuL,DuL,Dul,uL ,ul ,Dul,uL ,Dul,DUl,DuL,DuL,DUl,DUl,DuL,DuL,Dul,l ,Dul,Dul,Dul,Dul,Dul, + /*A*/ u ,DuL,Dul,uL ,Dul,DuL,DuL,Dul,Dul,uL ,ul ,DuL,DuL,Dul,Dul,Dul,DuL,Dul,DUl,DuL,DuL,Dul,Dul,Dul,Dul,Dul,DuL, + /*E*/ u ,DuL,Dul,DuL,uL ,DuL,DuL,DuL,ul ,Dul,ul ,Dul,Dul,uL ,Dul,Dul,Dul,DuL,uL ,DUl,DuL,DuL,Dul,Dul,Dul,Dul,Dul, + /*S*/ u ,DuL,Dul,DuL,DuL,DuL,DUl,Dul,uL ,Dul,Dul,Dul,DuL,Dul,DuL,Dul,Dul,Dul,DuL,Dul,DUl,DuL,DuL,Dul,Dul,Dul,Dul, + /*Y*/ u ,DuL,DuL,Dul,ul ,DuL,DuL,DUl,DuL,ul ,Dul,Dul,Dul,DuL,Dul,Dul,ul ,Dul,Dul,DuL,Dul,DUL,DuL,DuL,Dul,Dul,Dul, + /*F*/ u ,DuL,DuL,ul ,ul ,Dul,Dul,DuL,DuL,DuL,DuL,ul ,Dul,DuL,DuL,Dul,Dul,ul ,Dul,DuL,DuL,DuL,DUl,DuL,DuL,Dul,Dul, + /*A*/ u ,DuL,Dul,DuL,ul ,Dul,uL ,Dul,Dul,DuL,DuL,DuL,Dul,Dul,DuL,DuL,Dul,ul ,ul ,Dul,Dul,DuL,DuL,DuL,Dul,DuL,Dul, + /*T*/ u ,DuL,Dul,DuL,DuL,Dul,Dul,DuL,ul ,Dul,DuL,DuL,DuL,Dul,DUl,Dul,uL ,Dul,ul ,Dul,Dul,Dul,Dul,DuL,DuL,Dul,Dul + } + }; +}(); + } // namespace seqan3::test::alignment::fixture::global::affine::unbanded diff --git a/test/unit/alignment/pairwise/fixture/semi_global_affine_banded.hpp b/test/unit/alignment/pairwise/fixture/semi_global_affine_banded.hpp index f56d126c92..291613dfa3 100644 --- a/test/unit/alignment/pairwise/fixture/semi_global_affine_banded.hpp +++ b/test/unit/alignment/pairwise/fixture/semi_global_affine_banded.hpp @@ -27,27 +27,46 @@ using seqan3::operator""_dna4; namespace seqan3::test::alignment::fixture::semi_global::affine::banded { -inline constexpr auto align_config = seqan3::align_cfg::gap{seqan3::gap_scheme{seqan3::gap_score{-1}, - seqan3::gap_open_score{-10}}} | - seqan3::align_cfg::band_fixed_size{seqan3::align_cfg::lower_diagonal{-4}, - seqan3::align_cfg::upper_diagonal{8}}; - -inline constexpr auto align_config_semi_seq1 = align_config | - seqan3::align_cfg::method_global{ - seqan3::align_cfg::free_end_gaps_sequence1_leading{true}, - seqan3::align_cfg::free_end_gaps_sequence2_leading{false}, - seqan3::align_cfg::free_end_gaps_sequence1_trailing{true}, - seqan3::align_cfg::free_end_gaps_sequence2_trailing{false} - } | - seqan3::align_cfg::aligned_ends{seqan3::free_ends_first}; -inline constexpr auto align_config_semi_seq2 = align_config | - seqan3::align_cfg::method_global{ - seqan3::align_cfg::free_end_gaps_sequence1_leading{false}, - seqan3::align_cfg::free_end_gaps_sequence2_leading{true}, - seqan3::align_cfg::free_end_gaps_sequence1_trailing{false}, - seqan3::align_cfg::free_end_gaps_sequence2_trailing{true} - } | - seqan3::align_cfg::aligned_ends{seqan3::free_ends_second}; +inline constexpr auto config_band_m4_8 = seqan3::align_cfg::band_fixed_size{seqan3::align_cfg::lower_diagonal{-4}, + seqan3::align_cfg::upper_diagonal{8}}; + +inline constexpr auto config_gap = seqan3::align_cfg::gap{seqan3::gap_scheme{seqan3::gap_score{-1}, + seqan3::gap_open_score{-10}}}; + +inline constexpr auto config_semi_seq1 = seqan3::align_cfg::method_global{ + seqan3::align_cfg::free_end_gaps_sequence1_leading{true}, + seqan3::align_cfg::free_end_gaps_sequence2_leading{false}, + seqan3::align_cfg::free_end_gaps_sequence1_trailing{true}, + seqan3::align_cfg::free_end_gaps_sequence2_trailing{false} + } | seqan3::align_cfg::aligned_ends{seqan3::free_ends_first}; + +inline constexpr auto config_semi_seq2 = seqan3::align_cfg::method_global{ + seqan3::align_cfg::free_end_gaps_sequence1_leading{false}, + seqan3::align_cfg::free_end_gaps_sequence2_leading{true}, + seqan3::align_cfg::free_end_gaps_sequence1_trailing{false}, + seqan3::align_cfg::free_end_gaps_sequence2_trailing{true} + } | seqan3::align_cfg::aligned_ends{seqan3::free_ends_second}; + +// free gaps for the second leading and first trailing gaps. +inline constexpr auto config_free_gaps_sl_ft = seqan3::align_cfg::method_global{ + seqan3::align_cfg::free_end_gaps_sequence1_leading{false}, + seqan3::align_cfg::free_end_gaps_sequence2_leading{true}, + seqan3::align_cfg::free_end_gaps_sequence1_trailing{true}, + seqan3::align_cfg::free_end_gaps_sequence2_trailing{false} + } | seqan3::align_cfg::aligned_ends{ + seqan3::end_gaps{seqan3::front_end_second{std::true_type{}}, + seqan3::back_end_first{std::true_type{}}}}; + +inline constexpr auto config_free_gaps_tlbr = seqan3::align_cfg::method_global{ + seqan3::align_cfg::free_end_gaps_sequence1_leading{true}, + seqan3::align_cfg::free_end_gaps_sequence2_leading{true}, + seqan3::align_cfg::free_end_gaps_sequence1_trailing{true}, + seqan3::align_cfg::free_end_gaps_sequence2_trailing{true} + } | seqan3::align_cfg::aligned_ends{seqan3::free_ends_all}; + +inline constexpr auto config_scoring_m4_mm5 = seqan3::align_cfg::scoring_scheme{ + seqan3::nucleotide_scoring_scheme{seqan3::match_score{4}, + seqan3::mismatch_score{-5}}}; static auto dna4_01_semi_first = []() { @@ -55,9 +74,7 @@ static auto dna4_01_semi_first = []() { "TTTTTACGTATGTCCCCC"_dna4, "ACGTAAAACGT"_dna4, - align_config_semi_seq1 - | seqan3::align_cfg::scoring_scheme{seqan3::nucleotide_scoring_scheme{seqan3::match_score{4}, - seqan3::mismatch_score{-5}}}, + config_semi_seq1 | config_gap | config_band_m4_8 | config_scoring_m4_mm5, 10, "ACGT---ATGT", "ACGTAAAACGT", @@ -124,9 +141,7 @@ static auto dna4_03_semi_second = []() { "TTTTTACGTATGTCCCCC"_dna4, "ACGTAAAACGT"_dna4, - align_config_semi_seq2 - | seqan3::align_cfg::scoring_scheme{seqan3::nucleotide_scoring_scheme{seqan3::match_score{4}, - seqan3::mismatch_score{-5}}}, + config_semi_seq2 | config_gap | config_band_m4_8 | config_scoring_m4_mm5, -19, "TTTTTACGTATGTCCCCC", "GTAAAACGT---------", @@ -173,9 +188,7 @@ static auto dna4_04_semi_second = []() { "ACGTAAAACGT"_dna4, "TTTTTACGTATGTCCCCC"_dna4, - align_config_semi_seq2 - | seqan3::align_cfg::scoring_scheme{seqan3::nucleotide_scoring_scheme{seqan3::match_score{4}, - seqan3::mismatch_score{-5}}}, + config_semi_seq2 | config_gap | config_band_m4_8 | config_scoring_m4_mm5, -5, "ACGTAAAACGT", "------TACGT", @@ -230,4 +243,151 @@ static auto dna4_04_semi_second = []() }; }(); +static auto dna4_free_lb_with_band_tl2br_no_matches = []() +{ + return alignment_fixture + { + "AAAAAAAAAAA"_dna4, + "TTTTTTTTTTT"_dna4, + config_free_gaps_sl_ft | config_gap | config_band_m4_8 | config_scoring_m4_mm5, + -34, + "AAAAAAA-------", + "-------TTTTTTT", + seqan3::alignment_coordinate{seqan3::detail::column_index_type{0u}, seqan3::detail::row_index_type{4u}}, + seqan3::alignment_coordinate{seqan3::detail::column_index_type{7u}, seqan3::detail::row_index_type{11u}}, + std::vector> + { + // e, A, A, A, A, A, A, A, A, A, A, A, + /*e*/ 0 ,-11,-12,-13,-14,-15,-16,-17,-18,INF,INF,INF, + /*T*/ 0 ,-5 ,-12,-13,-14,-15,-16,-17,-18,-19,INF,INF, + /*T*/ 0 ,-5 ,-10,-13,-14,-15,-16,-17,-18,-19,-20,INF, + /*T*/ 0 ,-5 ,-10,-13,-14,-15,-16,-17,-18,-19,-20,-21, + /*T*/ 0 ,-5 ,-10,-13,-14,-15,-16,-17,-18,-19,-20,-21, + /*T*/ INF,-5 ,-10,-15,-18,-19,-20,-21,-22,-23,-24,-25, + /*T*/ INF,INF,-10,-15,-20,-23,-24,-25,-26,-27,-28,-29, + /*T*/ INF,INF,INF,-15,-20,-25,-28,-29,-30,-31,-32,-33, + /*T*/ INF,INF,INF,INF,-20,-25,-30,-31,-32,-33,-34,-35, + /*T*/ INF,INF,INF,INF,INF,-25,-30,-32,-33,-34,-35,-36, + /*T*/ INF,INF,INF,INF,INF,INF,-30,-33,-34,-35,-36,-37, + /*T*/ INF,INF,INF,INF,INF,INF,INF,-34,-35,-36,-37,-38 + }, + std::vector> + { + // e, A, A, A, A, A, A, A, A, A, A, A, + /*e*/ N ,L ,l ,l ,l ,l ,l ,l ,l ,INF,INF,INF, + /*T*/ N ,DUL,l ,l ,l ,l ,l ,l ,l ,l ,INF,INF, + /*T*/ N ,DUL,DUl,l ,l ,l ,l ,l ,l ,l ,l ,INF, + /*T*/ N ,DUL,DUl,l ,l ,l ,l ,l ,l ,l ,l ,l , + /*T*/ N ,DUL,DUl,l ,l ,l ,l ,l ,l ,l ,l ,l , + /*T*/ INF,DU ,DUL,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl,DUl, + /*T*/ INF,INF,DU ,DuL,Dul,Dul,Dul,Dul,Dul,Dul,Dul,Dul, + /*T*/ INF,INF,INF,Du ,DuL,Dul,Dul,Dul,Dul,Dul,Dul,Dul, + /*T*/ INF,INF,INF,INF,Du ,DuL,Dul,ul ,ul ,ul ,ul ,ul , + /*T*/ INF,INF,INF,INF,INF,Du ,DuL,ul ,ul ,ul ,ul ,ul , + /*T*/ INF,INF,INF,INF,INF,INF,Du ,uL ,ul ,ul ,ul ,ul , + /*T*/ INF,INF,INF,INF,INF,INF,INF,u ,uL ,ul ,ul ,ul + } + }; +}(); + +// band starts in top left with lower diagonal exceeding size of sequence 2 and ends in the bottom. +static auto dna4_free_tlbr_with_band_tl2b = [] () +{ + return alignment_fixture + { + "AGATTTACTACGCAT"_dna4, + "GTAGCAT"_dna4, + config_free_gaps_tlbr | config_gap | config_scoring_m4_mm5 | + seqan3::align_cfg::band_fixed_size{seqan3::align_cfg::lower_diagonal{-10}, + seqan3::align_cfg::upper_diagonal{4}}, + 5, + "AG-AT", + "AGCAT", + seqan3::alignment_coordinate{seqan3::detail::column_index_type{0u}, seqan3::detail::row_index_type{2u}}, + seqan3::alignment_coordinate{seqan3::detail::column_index_type{4u}, seqan3::detail::row_index_type{7u}}, + std::vector> + { + // e ,A ,G ,A ,T ,T ,T ,A ,C ,T ,A ,C ,G ,C ,A ,T , + /*e*/ 0 ,0 ,0 ,0 ,0 ,INF,INF,INF,INF,INF,INF,INF,INF,INF,INF,INF, + /*G*/ 0 ,-5 ,4 ,-5 ,-5 ,-5 ,INF,INF,INF,INF,INF,INF,INF,INF,INF,INF, + /*T*/ 0 ,-5 ,-7 ,-1 ,-1 ,-1 ,-1 ,INF,INF,INF,INF,INF,INF,INF,INF,INF, + /*A*/ 0 ,4 ,-7 ,-3 ,-6 ,-6 ,-6 ,3 ,INF,INF,INF,INF,INF,INF,INF,INF, + /*G*/ 0 ,-5 ,8 ,-3 ,-4 ,-5 ,-6 ,-7 ,-2 ,INF,INF,INF,INF,INF,INF,INF, + /*C*/ 0 ,-5 ,-3 ,3 ,-8 ,-9 ,-10,-9 ,-3 ,-7 ,INF,INF,INF,INF,INF,INF, + /*A*/ 0 ,4 ,-4 ,1 ,-2 ,-10,-11,-6 ,-13,-8 ,-3 ,INF,INF,INF,INF,INF, + /*T*/ 0 ,-5 ,-1 ,-9 ,5 ,2 ,-6 ,-8 ,-9 ,-9 ,-11,-8 ,INF,INF,INF,INF + }, + std::vector> + { + // e ,A ,G ,A ,T ,T ,T ,A ,C ,T ,A ,C ,G ,C ,A ,T , + /*e*/ N ,N ,N ,N ,N ,INF,INF,INF,INF,INF,INF,INF,INF,INF,INF,INF, + /*G*/ N ,DUL,DUl,DUL,DUl,D ,INF,INF,INF,INF,INF,INF,INF,INF,INF,INF, + /*T*/ N ,DuL,Ul ,Dul,DuL,DUL,D ,INF,INF,INF,INF,INF,INF,INF,INF,INF, + /*A*/ N ,DuL,L ,DUl,DUl,DUl,DUl,D ,INF,INF,INF,INF,INF,INF,INF,INF, + /*G*/ N ,DUL,Dul,L ,l ,l ,l ,l ,D ,INF,INF,INF,INF,INF,INF,INF, + /*C*/ N ,DuL,Ul ,Dul,DuL,Dul,Dul,ul ,DUl,D ,INF,INF,INF,INF,INF,INF, + /*A*/ N ,DuL,uL ,DUl,Dul,l ,l ,Dul,l ,DUl,D ,INF,INF,INF,INF,INF, + /*T*/ N ,DUL,Dul,DuL,DUl,DuL,Dul,l ,l ,Dul,l ,D ,INF,INF,INF,INF + } + }; +}(); + +// band starts in top left with upper diagonal exceeding size of sequence 1 and ends in the right side. +static auto dna4_free_tlbr_with_band_tl2r = [] () +{ + return alignment_fixture + { + "GTAGCAT"_dna4, + "AGATTTACTACGCAT"_dna4, + config_free_gaps_tlbr | config_gap | config_scoring_m4_mm5 | + seqan3::align_cfg::band_fixed_size{seqan3::align_cfg::lower_diagonal{-3}, + seqan3::align_cfg::upper_diagonal{100}}, + 5, + "AGCAT", + "AG-AT", + seqan3::alignment_coordinate{seqan3::detail::column_index_type{2u}, seqan3::detail::row_index_type{0u}}, + seqan3::alignment_coordinate{seqan3::detail::column_index_type{7u}, seqan3::detail::row_index_type{4u}}, + std::vector> + { + // e ,G ,T ,A ,G ,C ,A ,T , + /*e*/ 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 , + /*A*/ 0 ,-5 ,-5 ,4 ,-5 ,-5 ,4 ,-5 , + /*G*/ 0 ,4 ,-7 ,-7 ,8 ,-3 ,-4 ,-1 , + /*A*/ 0 ,-5 ,-1 ,-3 ,-3 ,3 ,1 ,-9 , + /*T*/ INF,-5 ,-1 ,-6 ,-4 ,-8 ,-2 ,5 , + /*T*/ INF,INF,-1 ,-6 ,-5 ,-9 ,-10,2 , + /*T*/ INF,INF,INF,-6 ,-6 ,-10,-11,-6 , + /*A*/ INF,INF,INF,INF,-7 ,-11,-6 ,-8 , + /*C*/ INF,INF,INF,INF,INF,-3 ,-13,-9 , + /*T*/ INF,INF,INF,INF,INF,INF,-8 ,-9 , + /*A*/ INF,INF,INF,INF,INF,INF,INF,-11, + /*C*/ INF,INF,INF,INF,INF,INF,INF,INF, + /*G*/ INF,INF,INF,INF,INF,INF,INF,INF, + /*C*/ INF,INF,INF,INF,INF,INF,INF,INF, + /*A*/ INF,INF,INF,INF,INF,INF,INF,INF, + /*T*/ INF,INF,INF,INF,INF,INF,INF,INF + }, + std::vector> + { + // e ,G ,T ,A ,G ,C ,A ,T , + /*e*/ N ,N ,N ,N ,N ,N ,N ,N , + /*A*/ N ,DUL,DUl,DUl,DUL,DUl,DUl,DUL, + /*G*/ N ,DuL,L ,Ul ,Dul,L ,l ,Dul, + /*A*/ N ,DUL,Dul,DuL,Ul ,Dul,DuL,DUl, + /*T*/ INF,Du ,DUL,DuL,ul ,DUl,Dul,DuL, + /*T*/ INF,INF,DU ,DuL,ul ,Dul,ul ,DUl, + /*T*/ INF,INF,INF,Du ,uL ,DuL,ul ,Dul, + /*A*/ INF,INF,INF,INF,u ,DuL,Dul,uL , + /*C*/ INF,INF,INF,INF,INF,Du ,uL ,ul , + /*T*/ INF,INF,INF,INF,INF,INF,Du ,DuL, + /*A*/ INF,INF,INF,INF,INF,INF,INF,u , + /*C*/ INF,INF,INF,INF,INF,INF,INF,INF, + /*G*/ INF,INF,INF,INF,INF,INF,INF,INF, + /*C*/ INF,INF,INF,INF,INF,INF,INF,INF, + /*A*/ INF,INF,INF,INF,INF,INF,INF,INF, + /*T*/ INF,INF,INF,INF,INF,INF,INF,INF + } + }; +}(); + } // namespace seqan3::test::alignment::fixture::global::affine::banded diff --git a/test/unit/alignment/pairwise/global_affine_unbanded_aa27_test.cpp b/test/unit/alignment/pairwise/global_affine_unbanded_aa27_test.cpp new file mode 100644 index 0000000000..7b7ca52837 --- /dev/null +++ b/test/unit/alignment/pairwise/global_affine_unbanded_aa27_test.cpp @@ -0,0 +1,21 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +#include + +#include + +#include "fixture/global_affine_unbanded.hpp" +#include "pairwise_alignment_single_test_template.hpp" + +using pairwise_global_affine_unbanded_testing_types = ::testing::Types< + pairwise_alignment_fixture<&seqan3::test::alignment::fixture::global::affine::unbanded::aa27_blosum62_gap_1_open_10> + >; + +INSTANTIATE_TYPED_TEST_SUITE_P(pairwise_global_affine_unbanded_aa27, + pairwise_alignment_test, + pairwise_global_affine_unbanded_testing_types, ); diff --git a/test/unit/alignment/pairwise/global_affine_unbanded_collection_simd_aa27_test.cpp b/test/unit/alignment/pairwise/global_affine_unbanded_collection_simd_aa27_test.cpp new file mode 100644 index 0000000000..b5d60aabf9 --- /dev/null +++ b/test/unit/alignment/pairwise/global_affine_unbanded_collection_simd_aa27_test.cpp @@ -0,0 +1,39 @@ +// ----------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md +// ----------------------------------------------------------------------------------------------------- + +#include + +#include + +#include + +#include "fixture/global_affine_unbanded.hpp" +#include "pairwise_alignment_collection_test_template.hpp" + +namespace seqan3::test::alignment::collection::simd::global::affine::unbanded +{ + +static auto aa27_all_same = []() +{ + auto base_fixture = fixture::global::affine::unbanded::aa27_blosum62_gap_1_open_10; + using fixture_t = decltype(base_fixture); + + std::vector data{}; + for (size_t i = 0; i < 100; ++i) + data.push_back(base_fixture); + + return alignment_fixture_collection{base_fixture.config | seqan3::align_cfg::vectorised, data}; +}(); +} // namespace seqan3::test::alignment::collection::simd::global::affine::unbanded + +using pairwise_collection_simd_global_affine_unbanded_testing_types = ::testing::Types< + pairwise_alignment_fixture<&seqan3::test::alignment::collection::simd::global::affine::unbanded::aa27_all_same> + >; + +INSTANTIATE_TYPED_TEST_SUITE_P(pairwise_collection_simd_global_affine_unbanded_aa27, + pairwise_alignment_collection_test, + pairwise_collection_simd_global_affine_unbanded_testing_types, ); diff --git a/test/unit/alignment/pairwise/semi_global_affine_banded_test.cpp b/test/unit/alignment/pairwise/semi_global_affine_banded_test.cpp index 5e18167517..46038222c8 100644 --- a/test/unit/alignment/pairwise/semi_global_affine_banded_test.cpp +++ b/test/unit/alignment/pairwise/semi_global_affine_banded_test.cpp @@ -16,7 +16,10 @@ using pairwise_semiglobal_affine_banded_testing_types = ::testing::Types< pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_01_semi_first>, // pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_02_semi_first>, pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_03_semi_second>, - pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_04_semi_second> + pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_04_semi_second>, + pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_free_lb_with_band_tl2br_no_matches>, + pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_free_tlbr_with_band_tl2b>, + pairwise_alignment_fixture<&seqan3::test::alignment::fixture::semi_global::affine::banded::dna4_free_tlbr_with_band_tl2r> >; INSTANTIATE_TYPED_TEST_SUITE_P(pairwise_semiglobal_affine_banded, diff --git a/test/unit/search/search_collection_test.cpp b/test/unit/search/search_collection_test.cpp index c894efc324..0310b1b427 100644 --- a/test/unit/search/search_collection_test.cpp +++ b/test/unit/search/search_collection_test.cpp @@ -211,6 +211,40 @@ TYPED_TEST(search_test, parallel_without_parameter) EXPECT_THROW(search("AAAA"_dna4, this->index, cfg), std::runtime_error); } +TYPED_TEST(search_test, debug_streaming) +{ + std::ostringstream oss; + seqan3::debug_stream_type stream{oss}; + stream << search("TAC"_dna4, this->index); + EXPECT_EQ(oss.str(), "[" + "," + "," + ",]"); +} + +// https://github.com/seqan/seqan3/issues/2115 +TYPED_TEST(search_test, issue_2115) +{ + std::vector> const genomes{"ACAG"_dna4}; + TypeParam const index{genomes}; + + // One substitution error, report all best hits + seqan3::configuration const cfg = seqan3::search_cfg::max_error_total{seqan3::search_cfg::error_count{1}} | + seqan3::search_cfg::max_error_substitution{seqan3::search_cfg::error_count{1}} | + seqan3::search_cfg::max_error_insertion{seqan3::search_cfg::error_count{0}} | + seqan3::search_cfg::max_error_deletion{seqan3::search_cfg::error_count{0}} | + seqan3::search_cfg::hit_all_best; + + std::vector const dna4_query{"ACGG"_dna4}; + std::vector> const dna4q_query{{'A'_dna4, seqan3::phred42{0}}, + {'C'_dna4, seqan3::phred42{15}}, + {'G'_dna4, seqan3::phred42{30}}, + {'G'_dna4, seqan3::phred42{41}}}; + + // Quality should not alter search results. + EXPECT_RANGE_EQ(seqan3::search(dna4q_query, index, cfg), seqan3::search(dna4_query, index, cfg)); +} + TYPED_TEST(search_string_test, error_free_string) { typename TestFixture::hits_result_t empty_result{}; diff --git a/test/unit/search/search_test.cpp b/test/unit/search/search_test.cpp index 383954e44d..8a3448f43f 100644 --- a/test/unit/search/search_test.cpp +++ b/test/unit/search/search_test.cpp @@ -483,6 +483,38 @@ TYPED_TEST(search_test, parallel_without_parameter) EXPECT_THROW(search("AAAA"_dna4, this->index, cfg), std::runtime_error); } +TYPED_TEST(search_test, debug_streaming) +{ + std::ostringstream oss; + seqan3::debug_stream_type stream{oss}; + stream << search("TAC"_dna4, this->index); + EXPECT_EQ(oss.str(), "[" + ",]"); +} + +// https://github.com/seqan/seqan3/issues/2115 +TYPED_TEST(search_test, issue_2115) +{ + std::vector const genome{"ACAG"_dna4}; + TypeParam const index{genome}; + + // One substitution error, report all best hits + seqan3::configuration const cfg = seqan3::search_cfg::max_error_total{seqan3::search_cfg::error_count{1}} | + seqan3::search_cfg::max_error_substitution{seqan3::search_cfg::error_count{1}} | + seqan3::search_cfg::max_error_insertion{seqan3::search_cfg::error_count{0}} | + seqan3::search_cfg::max_error_deletion{seqan3::search_cfg::error_count{0}} | + seqan3::search_cfg::hit_all_best; + + std::vector const dna4_query{"ACGG"_dna4}; + std::vector> const dna4q_query{{'A'_dna4, seqan3::phred42{0}}, + {'C'_dna4, seqan3::phred42{15}}, + {'G'_dna4, seqan3::phred42{30}}, + {'G'_dna4, seqan3::phred42{41}}}; + + // Quality should not alter search results. + EXPECT_RANGE_EQ(seqan3::search(dna4q_query, index, cfg), seqan3::search(dna4_query, index, cfg)); +} + TYPED_TEST(search_string_test, error_free_string) { // successful and unsuccesful exact search without cfg