Skip to content

Commit

Permalink
Merge pull request #230 from eseiler/infra/check_datastore
Browse files Browse the repository at this point in the history
[FEATURE] Add data_store::validate
  • Loading branch information
eseiler authored Sep 12, 2024
2 parents 485b22b + 273f7f4 commit 169cafe
Show file tree
Hide file tree
Showing 12 changed files with 266 additions and 53 deletions.
7 changes: 4 additions & 3 deletions include/hibf/layout/data_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,6 @@ struct data_store
/*!\name References to global instances of the HIBF.
* \{
*/
//!\brief The desired maximum false positive rate of the resulting index.
double false_positive_rate{};

//!\brief The layout that is built by layout::hierarchical_binning.
layout * hibf_layout; // Will be modified by {simple,hierarchical}_binning.

Expand All @@ -68,6 +65,8 @@ struct data_store
std::vector<size_t> positions = [this]()
{
std::vector<size_t> ps;
if (this->kmer_counts == nullptr)
return ps; // GCOVR_EXCL_LINE
ps.resize(this->kmer_counts->size());
std::iota(ps.begin(), ps.end(), 0);
return ps;
Expand All @@ -93,6 +92,8 @@ struct data_store
//!\brief Tracks the time the algorithm spends on rearranging user bins (merged bins).
concurrent_timer rearrangement_timer{};
//!\}

void validate() const;
};

} // namespace seqan::hibf::layout
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ set (HIBF_SOURCE_FILES
layout/compute_fpr_correction.cpp
layout/compute_layout.cpp
layout/compute_relaxed_fpr_correction.cpp
layout/data_store.cpp
sketch/compute_sketches.cpp
layout/graph.cpp
layout/hierarchical_binning.cpp
Expand Down
3 changes: 1 addition & 2 deletions src/layout/compute_layout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ layout compute_layout(config const & config,

layout resulting_layout{};

data_store store{.false_positive_rate = config.maximum_fpr,
.hibf_layout = &resulting_layout,
data_store store{.hibf_layout = &resulting_layout,
.kmer_counts = std::addressof(kmer_counts),
.sketches = std::addressof(sketches),
.positions = std::move(positions)};
Expand Down
33 changes: 33 additions & 0 deletions src/layout/data_store.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

#include <hibf/layout/data_store.hpp> // for data_store

namespace seqan::hibf::layout
{

void data_store::validate() const
{
if (hibf_layout == nullptr)
throw std::invalid_argument{"[HIBF ERROR] data_store::hibf_layout must not be nullptr."};

if (kmer_counts == nullptr)
throw std::invalid_argument{"[HIBF ERROR] data_store::kmer_counts must not be nullptr."};

if (sketches != nullptr && kmer_counts->size() != sketches->size())
throw std::invalid_argument{
"[HIBF ERROR] data_store::kmer_counts and data_store::sketches must have the same size."};

if (positions.size() > kmer_counts->size())
throw std::invalid_argument{
"[HIBF ERROR] data_store::kmer_counts.size() must not be smaller than data_store::positions.size()."};

if (fpr_correction.empty())
throw std::invalid_argument{"[HIBF ERROR] data_store::fpr_correction must not be empty."};

if (relaxed_fpr_correction <= 0.0 || relaxed_fpr_correction > 1.0)
throw std::invalid_argument{"[HIBF ERROR] data_store::relaxed_fpr_correction must be in (0.0,1.0]."};
}

} // namespace seqan::hibf::layout
10 changes: 5 additions & 5 deletions src/layout/hierarchical_binning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ namespace seqan::hibf::layout
size_t hierarchical_binning::execute()
{
assert(data != nullptr);
assert(data->kmer_counts != nullptr);
assert(data->positions.size() <= data->kmer_counts->size());
data->validate();

static constexpr size_t max_size_t{std::numeric_limits<size_t>::max()};

Expand All @@ -40,7 +39,9 @@ size_t hierarchical_binning::execute()

if (!config.disable_estimate_union && !config.disable_rearrangement)
{
assert(data->sketches != nullptr);
if (data->sketches == nullptr)
throw std::invalid_argument{"[HIBF ERROR] data_store::sketches must not be nullptr if union estimation "
"or rearrangement is enabled."};
data->rearrangement_timer.start();
sketch::toolbox::rearrange_bins(*data->sketches,
*data->kmer_counts,
Expand Down Expand Up @@ -364,8 +365,7 @@ size_t hierarchical_binning::backtracking(std::vector<std::vector<std::pair<size

data_store hierarchical_binning::initialise_libf_data(size_t const trace_j) const
{
data_store libf_data{.false_positive_rate = data->false_positive_rate,
.hibf_layout = data->hibf_layout,
data_store libf_data{.hibf_layout = data->hibf_layout,
.kmer_counts = data->kmer_counts,
.sketches = data->sketches,
.positions = {data->positions[trace_j]},
Expand Down
1 change: 1 addition & 0 deletions src/layout/simple_binning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ namespace seqan::hibf::layout
size_t simple_binning::execute()
{
assert(data != nullptr);
data->validate();
assert(num_technical_bins > 0u);
assert(num_user_bins > 0u);

Expand Down
56 changes: 56 additions & 0 deletions test/include/hibf/test/expect_throw_msg.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

/*!\file
* \brief Provides EXPECT_THROW_MSG.
* \author Enrico Seiler <enrico.seiler AT fu-berlin.de>
*/

#pragma once

#include <gtest/gtest.h>

#include <hibf/platform.hpp>

#ifdef EXPECT_THROW_MSG
# warning "EXPECT_THROW_MSG is already defined."
#else
# define EXPECT_THROW_MSG(statement, expected_exception, expected_message) \
try \
{ \
statement; \
std::string const message = "Expected: " #statement " throws an exception of type " #expected_exception \
".\n Actual: it throws nothing."; \
GTEST_NONFATAL_FAILURE_(message.data()); \
} \
catch (expected_exception const & exception) \
{ \
if (auto result = ::testing::internal::EqHelper::Compare("Expected", \
"Actual", \
std::string_view{expected_message}, \
std::string_view{exception.what()}); \
!result) \
{ \
std::string message = #statement " throws the correct exception, but the description is incorrect.\n"; \
message += result.failure_message(); \
GTEST_NONFATAL_FAILURE_(message.data()); \
} \
} \
catch (std::exception const & exception) \
{ \
std::string message = "Expected: " #statement " throws an exception of type " #expected_exception ".\n "; \
message += "Actual: it throws "; \
message += ::testing::internal::GetTypeName(typeid(exception)); \
message += " with description \""; \
message += exception.what(); \
message += "\"."; \
GTEST_NONFATAL_FAILURE_(message.data()); \
} \
catch (...) \
{ \
std::string message = "Expected: " #statement " throws an exception of type " #expected_exception ".\n "; \
message += "Actual: it throws an unknown exception."; \
GTEST_NONFATAL_FAILURE_(message.data()); \
}
#endif
96 changes: 55 additions & 41 deletions test/unit/hibf/config_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include <hibf/config.hpp> // for config, insert_iterator
#include <hibf/test/cereal.hpp> // for test_serialisation
#include <hibf/test/expect_throw_msg.hpp>

TEST(config_test, write_to)
{
Expand Down Expand Up @@ -139,68 +140,67 @@ TEST(config_test, validate_and_set_defaults)
{
auto dummy_input_fn = [](size_t const, seqan::hibf::insert_iterator) {};

auto check_error_message = [](seqan::hibf::config & configuration, std::string_view const expected_message)
{
try
{
configuration.validate_and_set_defaults();
FAIL();
}
catch (std::invalid_argument const & exception)
{
EXPECT_STREQ(expected_message.data(), exception.what());
}
catch (...)
{
FAIL();
}
};

// input_fn is not set
{
seqan::hibf::config configuration{};
check_error_message(configuration, "[HIBF CONFIG ERROR] You did not set the required config::input_fn.");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] You did not set the required config::input_fn.");
}

// number_of_user_bins cannot be 0 or bin_kind::merged (18'446'744'073'709'551'615ULL)
{
seqan::hibf::config configuration{.input_fn = dummy_input_fn};
check_error_message(configuration,
"[HIBF CONFIG ERROR] You did not set the required config::number_of_user_bins.");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] You did not set the required config::number_of_user_bins.");

configuration.number_of_user_bins = 18'446'744'073'709'551'615ULL;
check_error_message(configuration,
"[HIBF CONFIG ERROR] The maximum possible config::number_of_user_bins "
"is 18446744073709551614.");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] The maximum possible config::number_of_user_bins "
"is 18446744073709551614.");
}

// number_of_hash_functions must be in [1,5]
{
seqan::hibf::config configuration{.input_fn = dummy_input_fn,
.number_of_user_bins = 1u,
.number_of_hash_functions = 0u};
check_error_message(configuration, "[HIBF CONFIG ERROR] config::number_of_hash_functions must be in [1,5].");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::number_of_hash_functions must be in [1,5].");

configuration.number_of_hash_functions = 6u;
check_error_message(configuration, "[HIBF CONFIG ERROR] config::number_of_hash_functions must be in [1,5].");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::number_of_hash_functions must be in [1,5].");
}

// maximum_fpr must be in (0.0,1.0)
{
seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .maximum_fpr = 0.0};
check_error_message(configuration, "[HIBF CONFIG ERROR] config::maximum_fpr must be in (0.0,1.0).");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::maximum_fpr must be in (0.0,1.0).");

configuration.maximum_fpr = 1.0;
check_error_message(configuration, "[HIBF CONFIG ERROR] config::maximum_fpr must be in (0.0,1.0).");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::maximum_fpr must be in (0.0,1.0).");
}

// relaxed_fpr must be in (0.0,1.0)
{
seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .relaxed_fpr = 0.0};
check_error_message(configuration, "[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0).");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0).");

configuration.relaxed_fpr = 1.0;
check_error_message(configuration, "[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0).");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0).");
}

// relaxed_fpr must equal to or greater than maximum_fpr
Expand All @@ -209,24 +209,31 @@ TEST(config_test, validate_and_set_defaults)
.number_of_user_bins = 1u,
.maximum_fpr = 0.3,
.relaxed_fpr = 0.2};
check_error_message(configuration,
"[HIBF CONFIG ERROR] config::relaxed_fpr must be "
"greater than or equal to config::maximum_fpr.");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::relaxed_fpr must be "
"greater than or equal to config::maximum_fpr.");
}

// threads cannot be 0
{
seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .threads = 0u};
check_error_message(configuration, "[HIBF CONFIG ERROR] config::threads must be greater than 0.");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::threads must be greater than 0.");
}

// sketch_bits must be in [5,32]
{
seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .sketch_bits = 4u};
check_error_message(configuration, "[HIBF CONFIG ERROR] config::sketch_bits must be in [5,32].");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::sketch_bits must be in [5,32].");

configuration.sketch_bits = 33u;
check_error_message(configuration, "[HIBF CONFIG ERROR] config::sketch_bits must be in [5,32].");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::sketch_bits must be in [5,32].");
}

// Set default tmax
Expand Down Expand Up @@ -260,9 +267,10 @@ TEST(config_test, validate_and_set_defaults)
.number_of_user_bins = 1u,
.tmax = 18'446'744'073'709'551'553ULL};

check_error_message(configuration,
"[HIBF CONFIG ERROR] The maximum possible config::tmax "
"is 18446744073709551552.");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] The maximum possible config::tmax "
"is 18446744073709551552.");
}

// Given tmax is not a multiple of 64
Expand All @@ -282,18 +290,24 @@ TEST(config_test, validate_and_set_defaults)
// alpha must be positive
{
seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .alpha = -0.1};
check_error_message(configuration, "[HIBF CONFIG ERROR] config::alpha must be positive.");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::alpha must be positive.");
}

// max_rearrangement_ratio must be in [0.0,1.0]
{
seqan::hibf::config configuration{.input_fn = dummy_input_fn,
.number_of_user_bins = 1u,
.max_rearrangement_ratio = -0.1};
check_error_message(configuration, "[HIBF CONFIG ERROR] config::max_rearrangement_ratio must be in [0.0,1.0].");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::max_rearrangement_ratio must be in [0.0,1.0].");

configuration.max_rearrangement_ratio = 1.1;
check_error_message(configuration, "[HIBF CONFIG ERROR] config::max_rearrangement_ratio must be in [0.0,1.0].");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::max_rearrangement_ratio must be in [0.0,1.0].");
}

// Set disable_rearrangement if disable_estimate_union is set
Expand Down
1 change: 1 addition & 0 deletions test/unit/hibf/layout/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ hibf_test (layout_test.cpp)
hibf_test (fpr_correction_test.cpp)
hibf_test (graph_test.cpp)
hibf_test (compute_layout_test.cpp)
hibf_test (data_store_test.cpp)
Loading

0 comments on commit 169cafe

Please sign in to comment.