diff --git a/include/hibf/layout/data_store.hpp b/include/hibf/layout/data_store.hpp index 67e281c6..f38b20af 100644 --- a/include/hibf/layout/data_store.hpp +++ b/include/hibf/layout/data_store.hpp @@ -44,9 +44,6 @@ struct data_store /*!\name References to global instances of the HIBF. * \{ */ - //!\brief The desired maximum false positive rate of the resulting index. - double false_positive_rate{}; - //!\brief The layout that is built by layout::hierarchical_binning. layout * hibf_layout; // Will be modified by {simple,hierarchical}_binning. @@ -68,6 +65,8 @@ struct data_store std::vector positions = [this]() { std::vector ps; + if (this->kmer_counts == nullptr) + return ps; // GCOVR_EXCL_LINE ps.resize(this->kmer_counts->size()); std::iota(ps.begin(), ps.end(), 0); return ps; @@ -93,6 +92,8 @@ struct data_store //!\brief Tracks the time the algorithm spends on rearranging user bins (merged bins). concurrent_timer rearrangement_timer{}; //!\} + + void validate() const; }; } // namespace seqan::hibf::layout diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5b64369c..9d240b38 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,6 +11,7 @@ set (HIBF_SOURCE_FILES layout/compute_fpr_correction.cpp layout/compute_layout.cpp layout/compute_relaxed_fpr_correction.cpp + layout/data_store.cpp sketch/compute_sketches.cpp layout/graph.cpp layout/hierarchical_binning.cpp diff --git a/src/layout/compute_layout.cpp b/src/layout/compute_layout.cpp index 9b5c70d9..af656a76 100644 --- a/src/layout/compute_layout.cpp +++ b/src/layout/compute_layout.cpp @@ -37,8 +37,7 @@ layout compute_layout(config const & config, layout resulting_layout{}; - data_store store{.false_positive_rate = config.maximum_fpr, - .hibf_layout = &resulting_layout, + data_store store{.hibf_layout = &resulting_layout, .kmer_counts = std::addressof(kmer_counts), .sketches = std::addressof(sketches), .positions = std::move(positions)}; diff --git a/src/layout/data_store.cpp b/src/layout/data_store.cpp new file mode 100644 index 00000000..68a5fe46 --- /dev/null +++ b/src/layout/data_store.cpp @@ -0,0 +1,33 @@ +// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +#include // for data_store + +namespace seqan::hibf::layout +{ + +void data_store::validate() const +{ + if (hibf_layout == nullptr) + throw std::invalid_argument{"[HIBF ERROR] data_store::hibf_layout must not be nullptr."}; + + if (kmer_counts == nullptr) + throw std::invalid_argument{"[HIBF ERROR] data_store::kmer_counts must not be nullptr."}; + + if (sketches != nullptr && kmer_counts->size() != sketches->size()) + throw std::invalid_argument{ + "[HIBF ERROR] data_store::kmer_counts and data_store::sketches must have the same size."}; + + if (positions.size() > kmer_counts->size()) + throw std::invalid_argument{ + "[HIBF ERROR] data_store::kmer_counts.size() must not be smaller than data_store::positions.size()."}; + + if (fpr_correction.empty()) + throw std::invalid_argument{"[HIBF ERROR] data_store::fpr_correction must not be empty."}; + + if (relaxed_fpr_correction <= 0.0 || relaxed_fpr_correction > 1.0) + throw std::invalid_argument{"[HIBF ERROR] data_store::relaxed_fpr_correction must be in (0.0,1.0]."}; +} + +} // namespace seqan::hibf::layout diff --git a/src/layout/hierarchical_binning.cpp b/src/layout/hierarchical_binning.cpp index 6a30f7ec..a519deed 100644 --- a/src/layout/hierarchical_binning.cpp +++ b/src/layout/hierarchical_binning.cpp @@ -29,8 +29,7 @@ namespace seqan::hibf::layout size_t hierarchical_binning::execute() { assert(data != nullptr); - assert(data->kmer_counts != nullptr); - assert(data->positions.size() <= data->kmer_counts->size()); + data->validate(); static constexpr size_t max_size_t{std::numeric_limits::max()}; @@ -40,7 +39,9 @@ size_t hierarchical_binning::execute() if (!config.disable_estimate_union && !config.disable_rearrangement) { - assert(data->sketches != nullptr); + if (data->sketches == nullptr) + throw std::invalid_argument{"[HIBF ERROR] data_store::sketches must not be nullptr if union estimation " + "or rearrangement is enabled."}; data->rearrangement_timer.start(); sketch::toolbox::rearrange_bins(*data->sketches, *data->kmer_counts, @@ -364,8 +365,7 @@ size_t hierarchical_binning::backtracking(std::vectorfalse_positive_rate, - .hibf_layout = data->hibf_layout, + data_store libf_data{.hibf_layout = data->hibf_layout, .kmer_counts = data->kmer_counts, .sketches = data->sketches, .positions = {data->positions[trace_j]}, diff --git a/src/layout/simple_binning.cpp b/src/layout/simple_binning.cpp index d1b9cb5e..2fae7337 100644 --- a/src/layout/simple_binning.cpp +++ b/src/layout/simple_binning.cpp @@ -19,6 +19,7 @@ namespace seqan::hibf::layout size_t simple_binning::execute() { assert(data != nullptr); + data->validate(); assert(num_technical_bins > 0u); assert(num_user_bins > 0u); diff --git a/test/include/hibf/test/expect_throw_msg.hpp b/test/include/hibf/test/expect_throw_msg.hpp new file mode 100644 index 00000000..688b917d --- /dev/null +++ b/test/include/hibf/test/expect_throw_msg.hpp @@ -0,0 +1,56 @@ +// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +/*!\file + * \brief Provides EXPECT_THROW_MSG. + * \author Enrico Seiler + */ + +#pragma once + +#include + +#include + +#ifdef EXPECT_THROW_MSG +# warning "EXPECT_THROW_MSG is already defined." +#else +# define EXPECT_THROW_MSG(statement, expected_exception, expected_message) \ + try \ + { \ + statement; \ + std::string const message = "Expected: " #statement " throws an exception of type " #expected_exception \ + ".\n Actual: it throws nothing."; \ + GTEST_NONFATAL_FAILURE_(message.data()); \ + } \ + catch (expected_exception const & exception) \ + { \ + if (auto result = ::testing::internal::EqHelper::Compare("Expected", \ + "Actual", \ + std::string_view{expected_message}, \ + std::string_view{exception.what()}); \ + !result) \ + { \ + std::string message = #statement " throws the correct exception, but the description is incorrect.\n"; \ + message += result.failure_message(); \ + GTEST_NONFATAL_FAILURE_(message.data()); \ + } \ + } \ + catch (std::exception const & exception) \ + { \ + std::string message = "Expected: " #statement " throws an exception of type " #expected_exception ".\n "; \ + message += "Actual: it throws "; \ + message += ::testing::internal::GetTypeName(typeid(exception)); \ + message += " with description \""; \ + message += exception.what(); \ + message += "\"."; \ + GTEST_NONFATAL_FAILURE_(message.data()); \ + } \ + catch (...) \ + { \ + std::string message = "Expected: " #statement " throws an exception of type " #expected_exception ".\n "; \ + message += "Actual: it throws an unknown exception."; \ + GTEST_NONFATAL_FAILURE_(message.data()); \ + } +#endif diff --git a/test/unit/hibf/config_test.cpp b/test/unit/hibf/config_test.cpp index 707ddd8f..1f4d70e3 100644 --- a/test/unit/hibf/config_test.cpp +++ b/test/unit/hibf/config_test.cpp @@ -14,6 +14,7 @@ #include // for config, insert_iterator #include // for test_serialisation +#include TEST(config_test, write_to) { @@ -139,39 +140,26 @@ TEST(config_test, validate_and_set_defaults) { auto dummy_input_fn = [](size_t const, seqan::hibf::insert_iterator) {}; - auto check_error_message = [](seqan::hibf::config & configuration, std::string_view const expected_message) - { - try - { - configuration.validate_and_set_defaults(); - FAIL(); - } - catch (std::invalid_argument const & exception) - { - EXPECT_STREQ(expected_message.data(), exception.what()); - } - catch (...) - { - FAIL(); - } - }; - // input_fn is not set { seqan::hibf::config configuration{}; - check_error_message(configuration, "[HIBF CONFIG ERROR] You did not set the required config::input_fn."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] You did not set the required config::input_fn."); } // number_of_user_bins cannot be 0 or bin_kind::merged (18'446'744'073'709'551'615ULL) { seqan::hibf::config configuration{.input_fn = dummy_input_fn}; - check_error_message(configuration, - "[HIBF CONFIG ERROR] You did not set the required config::number_of_user_bins."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] You did not set the required config::number_of_user_bins."); configuration.number_of_user_bins = 18'446'744'073'709'551'615ULL; - check_error_message(configuration, - "[HIBF CONFIG ERROR] The maximum possible config::number_of_user_bins " - "is 18446744073709551614."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] The maximum possible config::number_of_user_bins " + "is 18446744073709551614."); } // number_of_hash_functions must be in [1,5] @@ -179,28 +167,40 @@ TEST(config_test, validate_and_set_defaults) seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .number_of_hash_functions = 0u}; - check_error_message(configuration, "[HIBF CONFIG ERROR] config::number_of_hash_functions must be in [1,5]."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] config::number_of_hash_functions must be in [1,5]."); configuration.number_of_hash_functions = 6u; - check_error_message(configuration, "[HIBF CONFIG ERROR] config::number_of_hash_functions must be in [1,5]."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] config::number_of_hash_functions must be in [1,5]."); } // maximum_fpr must be in (0.0,1.0) { seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .maximum_fpr = 0.0}; - check_error_message(configuration, "[HIBF CONFIG ERROR] config::maximum_fpr must be in (0.0,1.0)."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] config::maximum_fpr must be in (0.0,1.0)."); configuration.maximum_fpr = 1.0; - check_error_message(configuration, "[HIBF CONFIG ERROR] config::maximum_fpr must be in (0.0,1.0)."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] config::maximum_fpr must be in (0.0,1.0)."); } // relaxed_fpr must be in (0.0,1.0) { seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .relaxed_fpr = 0.0}; - check_error_message(configuration, "[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0)."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0)."); configuration.relaxed_fpr = 1.0; - check_error_message(configuration, "[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0)."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0)."); } // relaxed_fpr must equal to or greater than maximum_fpr @@ -209,24 +209,31 @@ TEST(config_test, validate_and_set_defaults) .number_of_user_bins = 1u, .maximum_fpr = 0.3, .relaxed_fpr = 0.2}; - check_error_message(configuration, - "[HIBF CONFIG ERROR] config::relaxed_fpr must be " - "greater than or equal to config::maximum_fpr."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] config::relaxed_fpr must be " + "greater than or equal to config::maximum_fpr."); } // threads cannot be 0 { seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .threads = 0u}; - check_error_message(configuration, "[HIBF CONFIG ERROR] config::threads must be greater than 0."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] config::threads must be greater than 0."); } // sketch_bits must be in [5,32] { seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .sketch_bits = 4u}; - check_error_message(configuration, "[HIBF CONFIG ERROR] config::sketch_bits must be in [5,32]."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] config::sketch_bits must be in [5,32]."); configuration.sketch_bits = 33u; - check_error_message(configuration, "[HIBF CONFIG ERROR] config::sketch_bits must be in [5,32]."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] config::sketch_bits must be in [5,32]."); } // Set default tmax @@ -260,9 +267,10 @@ TEST(config_test, validate_and_set_defaults) .number_of_user_bins = 1u, .tmax = 18'446'744'073'709'551'553ULL}; - check_error_message(configuration, - "[HIBF CONFIG ERROR] The maximum possible config::tmax " - "is 18446744073709551552."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] The maximum possible config::tmax " + "is 18446744073709551552."); } // Given tmax is not a multiple of 64 @@ -282,7 +290,9 @@ TEST(config_test, validate_and_set_defaults) // alpha must be positive { seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .alpha = -0.1}; - check_error_message(configuration, "[HIBF CONFIG ERROR] config::alpha must be positive."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] config::alpha must be positive."); } // max_rearrangement_ratio must be in [0.0,1.0] @@ -290,10 +300,14 @@ TEST(config_test, validate_and_set_defaults) seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .max_rearrangement_ratio = -0.1}; - check_error_message(configuration, "[HIBF CONFIG ERROR] config::max_rearrangement_ratio must be in [0.0,1.0]."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] config::max_rearrangement_ratio must be in [0.0,1.0]."); configuration.max_rearrangement_ratio = 1.1; - check_error_message(configuration, "[HIBF CONFIG ERROR] config::max_rearrangement_ratio must be in [0.0,1.0]."); + EXPECT_THROW_MSG(configuration.validate_and_set_defaults(), + std::invalid_argument, + "[HIBF CONFIG ERROR] config::max_rearrangement_ratio must be in [0.0,1.0]."); } // Set disable_rearrangement if disable_estimate_union is set diff --git a/test/unit/hibf/layout/CMakeLists.txt b/test/unit/hibf/layout/CMakeLists.txt index 452ce8e1..95b3fc94 100644 --- a/test/unit/hibf/layout/CMakeLists.txt +++ b/test/unit/hibf/layout/CMakeLists.txt @@ -8,3 +8,4 @@ hibf_test (layout_test.cpp) hibf_test (fpr_correction_test.cpp) hibf_test (graph_test.cpp) hibf_test (compute_layout_test.cpp) +hibf_test (data_store_test.cpp) diff --git a/test/unit/hibf/layout/data_store_test.cpp b/test/unit/hibf/layout/data_store_test.cpp new file mode 100644 index 00000000..a2796d3c --- /dev/null +++ b/test/unit/hibf/layout/data_store_test.cpp @@ -0,0 +1,87 @@ +// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +#include + +#include +#include + +TEST(data_store_test, validate) +{ + seqan::hibf::layout::layout layout{}; + std::vector kmer_counts(3); + std::vector sketches(3); + + // hibf_layout must not be nullptr + { + seqan::hibf::layout::data_store store{}; + EXPECT_THROW_MSG(store.validate(), + std::invalid_argument, + "[HIBF ERROR] data_store::hibf_layout must not be nullptr."); + } + + // kmer_counts must not be nullptr + { + seqan::hibf::layout::data_store store{.hibf_layout = &layout}; + EXPECT_THROW_MSG(store.validate(), + std::invalid_argument, + "[HIBF ERROR] data_store::kmer_counts must not be nullptr."); + } + + // kmer_counts and sketches must have the same size + { + std::vector wrong_sketches(2); + + seqan::hibf::layout::data_store store{.hibf_layout = &layout, + .kmer_counts = &kmer_counts, + .sketches = &wrong_sketches}; + EXPECT_THROW_MSG(store.validate(), + std::invalid_argument, + "[HIBF ERROR] data_store::kmer_counts and data_store::sketches must have the same size."); + } + + // kmer_counts size must be greater than positions size + { + seqan::hibf::layout::data_store store{.hibf_layout = &layout, + .kmer_counts = &kmer_counts, + .sketches = &sketches, + .positions = {1, 2, 3, 4}}; + + EXPECT_THROW_MSG( + store.validate(), + std::invalid_argument, + "[HIBF ERROR] data_store::kmer_counts.size() must not be smaller than data_store::positions.size()."); + } + + // fpr_correction must not be empty + { + seqan::hibf::layout::data_store store{.hibf_layout = &layout, + .kmer_counts = &kmer_counts, + .sketches = &sketches}; + + EXPECT_THROW_MSG(store.validate(), + std::invalid_argument, + "[HIBF ERROR] data_store::fpr_correction must not be empty."); + } + + // relaxed_fpr_correction must be in (0.0,1.0] + { + seqan::hibf::layout::data_store store{.hibf_layout = &layout, + .kmer_counts = &kmer_counts, + .sketches = &sketches, + .fpr_correction = {1.0, 2.0, 3.0}}; + + EXPECT_THROW_MSG(store.validate(), + std::invalid_argument, + "[HIBF ERROR] data_store::relaxed_fpr_correction must be in (0.0,1.0]."); + + store.relaxed_fpr_correction = 1.01; + EXPECT_THROW_MSG(store.validate(), + std::invalid_argument, + "[HIBF ERROR] data_store::relaxed_fpr_correction must be in (0.0,1.0]."); + + store.relaxed_fpr_correction = 0.5; + EXPECT_NO_THROW(store.validate()); + } +} diff --git a/test/unit/hibf/layout/hierarchical_binning_test.cpp b/test/unit/hibf/layout/hierarchical_binning_test.cpp index 035c5827..10d3ffca 100644 --- a/test/unit/hibf/layout/hierarchical_binning_test.cpp +++ b/test/unit/hibf/layout/hierarchical_binning_test.cpp @@ -14,6 +14,24 @@ #include // for hierarchical_binning #include // for layout #include // for expect_range_eq, EXPECT_RANGE_EQ +#include + +TEST(hierarchical_binning_test, missing_sketches) +{ + seqan::hibf::config config; + seqan::hibf::layout::layout hibf_layout{}; + std::vector kmer_counts{500, 500, 500, 500}; + seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, + .kmer_counts = &kmer_counts, + .fpr_correction = {1.0}, + .relaxed_fpr_correction = 1.0}; + + seqan::hibf::layout::hierarchical_binning algo{data, config}; + EXPECT_THROW_MSG(algo.execute(), + std::invalid_argument, + "[HIBF ERROR] data_store::sketches must not be nullptr if union estimation or rearrangement is " + "enabled."); +} TEST(hierarchical_binning_test, small_example) { diff --git a/test/unit/hibf/layout/simple_binning_test.cpp b/test/unit/hibf/layout/simple_binning_test.cpp index eb97278e..edf7a9d7 100644 --- a/test/unit/hibf/layout/simple_binning_test.cpp +++ b/test/unit/hibf/layout/simple_binning_test.cpp @@ -20,7 +20,8 @@ TEST(simple_binning_test, small_example) seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts, - .fpr_correction = std::vector(65, 1.0)}; + .fpr_correction = std::vector(65, 1.0), + .relaxed_fpr_correction = 1.0}; seqan::hibf::layout::simple_binning algo{data, 9}; size_t max_bin = algo.execute(); @@ -41,7 +42,8 @@ TEST(simple_binning_test, uniform_distribution) seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts, - .fpr_correction = std::vector(65, 1.0)}; + .fpr_correction = std::vector(65, 1.0), + .relaxed_fpr_correction = 1.0}; seqan::hibf::layout::simple_binning algo{data, 4u}; size_t max_bin = algo.execute();