Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] Add data_store::validate #230

Merged
merged 3 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions include/hibf/layout/data_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,6 @@ struct data_store
/*!\name References to global instances of the HIBF.
* \{
*/
//!\brief The desired maximum false positive rate of the resulting index.
double false_positive_rate{};

//!\brief The layout that is built by layout::hierarchical_binning.
layout * hibf_layout; // Will be modified by {simple,hierarchical}_binning.

Expand All @@ -68,6 +65,8 @@ struct data_store
std::vector<size_t> positions = [this]()
{
std::vector<size_t> ps;
if (this->kmer_counts == nullptr)
return ps; // GCOVR_EXCL_LINE
ps.resize(this->kmer_counts->size());
std::iota(ps.begin(), ps.end(), 0);
return ps;
Expand All @@ -93,6 +92,8 @@ struct data_store
//!\brief Tracks the time the algorithm spends on rearranging user bins (merged bins).
concurrent_timer rearrangement_timer{};
//!\}

void validate() const;
};

} // namespace seqan::hibf::layout
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ set (HIBF_SOURCE_FILES
layout/compute_fpr_correction.cpp
layout/compute_layout.cpp
layout/compute_relaxed_fpr_correction.cpp
layout/data_store.cpp
sketch/compute_sketches.cpp
layout/graph.cpp
layout/hierarchical_binning.cpp
Expand Down
3 changes: 1 addition & 2 deletions src/layout/compute_layout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ layout compute_layout(config const & config,

layout resulting_layout{};

data_store store{.false_positive_rate = config.maximum_fpr,
.hibf_layout = &resulting_layout,
data_store store{.hibf_layout = &resulting_layout,
.kmer_counts = std::addressof(kmer_counts),
.sketches = std::addressof(sketches),
.positions = std::move(positions)};
Expand Down
33 changes: 33 additions & 0 deletions src/layout/data_store.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

#include <hibf/layout/data_store.hpp> // for data_store

namespace seqan::hibf::layout
{

void data_store::validate() const
{
if (hibf_layout == nullptr)
throw std::invalid_argument{"[HIBF ERROR] data_store::hibf_layout must not be nullptr."};

if (kmer_counts == nullptr)
throw std::invalid_argument{"[HIBF ERROR] data_store::kmer_counts must not be nullptr."};

if (sketches != nullptr && kmer_counts->size() != sketches->size())
throw std::invalid_argument{
"[HIBF ERROR] data_store::kmer_counts and data_store::sketches must have the same size."};

eseiler marked this conversation as resolved.
Show resolved Hide resolved
if (positions.size() > kmer_counts->size())
throw std::invalid_argument{
"[HIBF ERROR] data_store::kmer_counts.size() must not be smaller than data_store::positions.size()."};

if (fpr_correction.empty())
throw std::invalid_argument{"[HIBF ERROR] data_store::fpr_correction must not be empty."};

if (relaxed_fpr_correction <= 0.0 || relaxed_fpr_correction > 1.0)
throw std::invalid_argument{"[HIBF ERROR] data_store::relaxed_fpr_correction must be in (0.0,1.0]."};
}

} // namespace seqan::hibf::layout
10 changes: 5 additions & 5 deletions src/layout/hierarchical_binning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ namespace seqan::hibf::layout
size_t hierarchical_binning::execute()
{
assert(data != nullptr);
assert(data->kmer_counts != nullptr);
assert(data->positions.size() <= data->kmer_counts->size());
data->validate();

static constexpr size_t max_size_t{std::numeric_limits<size_t>::max()};

Expand All @@ -40,7 +39,9 @@ size_t hierarchical_binning::execute()

if (!config.disable_estimate_union && !config.disable_rearrangement)
{
assert(data->sketches != nullptr);
if (data->sketches == nullptr)
throw std::invalid_argument{"[HIBF ERROR] data_store::sketches must not be nullptr if union estimation "
"or rearrangement is enabled."};
data->rearrangement_timer.start();
sketch::toolbox::rearrange_bins(*data->sketches,
*data->kmer_counts,
Expand Down Expand Up @@ -364,8 +365,7 @@ size_t hierarchical_binning::backtracking(std::vector<std::vector<std::pair<size

data_store hierarchical_binning::initialise_libf_data(size_t const trace_j) const
{
data_store libf_data{.false_positive_rate = data->false_positive_rate,
.hibf_layout = data->hibf_layout,
data_store libf_data{.hibf_layout = data->hibf_layout,
.kmer_counts = data->kmer_counts,
.sketches = data->sketches,
.positions = {data->positions[trace_j]},
Expand Down
1 change: 1 addition & 0 deletions src/layout/simple_binning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ namespace seqan::hibf::layout
size_t simple_binning::execute()
{
assert(data != nullptr);
data->validate();
assert(num_technical_bins > 0u);
assert(num_user_bins > 0u);

Expand Down
56 changes: 56 additions & 0 deletions test/include/hibf/test/expect_throw_msg.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

/*!\file
* \brief Provides EXPECT_THROW_MSG.
* \author Enrico Seiler <enrico.seiler AT fu-berlin.de>
*/

#pragma once

#include <gtest/gtest.h>

#include <hibf/platform.hpp>

#ifdef EXPECT_THROW_MSG
# warning "EXPECT_THROW_MSG is already defined."
#else
# define EXPECT_THROW_MSG(statement, expected_exception, expected_message) \
try \
{ \
statement; \
std::string const message = "Expected: " #statement " throws an exception of type " #expected_exception \
".\n Actual: it throws nothing."; \
GTEST_NONFATAL_FAILURE_(message.data()); \
} \
catch (expected_exception const & exception) \
{ \
if (auto result = ::testing::internal::EqHelper::Compare("Expected", \
"Actual", \
std::string_view{expected_message}, \
std::string_view{exception.what()}); \
!result) \
{ \
std::string message = #statement " throws the correct exception, but the description is incorrect.\n"; \
message += result.failure_message(); \
GTEST_NONFATAL_FAILURE_(message.data()); \
} \
} \
catch (std::exception const & exception) \
{ \
std::string message = "Expected: " #statement " throws an exception of type " #expected_exception ".\n "; \
message += "Actual: it throws "; \
message += ::testing::internal::GetTypeName(typeid(exception)); \
message += " with description \""; \
message += exception.what(); \
message += "\"."; \
GTEST_NONFATAL_FAILURE_(message.data()); \
} \
catch (...) \
{ \
std::string message = "Expected: " #statement " throws an exception of type " #expected_exception ".\n "; \
message += "Actual: it throws an unknown exception."; \
GTEST_NONFATAL_FAILURE_(message.data()); \
}
#endif
96 changes: 55 additions & 41 deletions test/unit/hibf/config_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include <hibf/config.hpp> // for config, insert_iterator
#include <hibf/test/cereal.hpp> // for test_serialisation
#include <hibf/test/expect_throw_msg.hpp>

TEST(config_test, write_to)
{
Expand Down Expand Up @@ -139,68 +140,67 @@ TEST(config_test, validate_and_set_defaults)
{
auto dummy_input_fn = [](size_t const, seqan::hibf::insert_iterator) {};

auto check_error_message = [](seqan::hibf::config & configuration, std::string_view const expected_message)
{
try
{
configuration.validate_and_set_defaults();
FAIL();
}
catch (std::invalid_argument const & exception)
{
EXPECT_STREQ(expected_message.data(), exception.what());
}
catch (...)
{
FAIL();
}
};

// input_fn is not set
{
seqan::hibf::config configuration{};
check_error_message(configuration, "[HIBF CONFIG ERROR] You did not set the required config::input_fn.");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] You did not set the required config::input_fn.");
}

// number_of_user_bins cannot be 0 or bin_kind::merged (18'446'744'073'709'551'615ULL)
{
seqan::hibf::config configuration{.input_fn = dummy_input_fn};
check_error_message(configuration,
"[HIBF CONFIG ERROR] You did not set the required config::number_of_user_bins.");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] You did not set the required config::number_of_user_bins.");

configuration.number_of_user_bins = 18'446'744'073'709'551'615ULL;
check_error_message(configuration,
"[HIBF CONFIG ERROR] The maximum possible config::number_of_user_bins "
"is 18446744073709551614.");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] The maximum possible config::number_of_user_bins "
"is 18446744073709551614.");
}

// number_of_hash_functions must be in [1,5]
{
seqan::hibf::config configuration{.input_fn = dummy_input_fn,
.number_of_user_bins = 1u,
.number_of_hash_functions = 0u};
check_error_message(configuration, "[HIBF CONFIG ERROR] config::number_of_hash_functions must be in [1,5].");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::number_of_hash_functions must be in [1,5].");

configuration.number_of_hash_functions = 6u;
check_error_message(configuration, "[HIBF CONFIG ERROR] config::number_of_hash_functions must be in [1,5].");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::number_of_hash_functions must be in [1,5].");
}

// maximum_fpr must be in (0.0,1.0)
{
seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .maximum_fpr = 0.0};
check_error_message(configuration, "[HIBF CONFIG ERROR] config::maximum_fpr must be in (0.0,1.0).");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::maximum_fpr must be in (0.0,1.0).");

configuration.maximum_fpr = 1.0;
check_error_message(configuration, "[HIBF CONFIG ERROR] config::maximum_fpr must be in (0.0,1.0).");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::maximum_fpr must be in (0.0,1.0).");
}

// relaxed_fpr must be in (0.0,1.0)
{
seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .relaxed_fpr = 0.0};
check_error_message(configuration, "[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0).");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0).");

configuration.relaxed_fpr = 1.0;
check_error_message(configuration, "[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0).");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::relaxed_fpr must be in (0.0,1.0).");
}

// relaxed_fpr must equal to or greater than maximum_fpr
Expand All @@ -209,24 +209,31 @@ TEST(config_test, validate_and_set_defaults)
.number_of_user_bins = 1u,
.maximum_fpr = 0.3,
.relaxed_fpr = 0.2};
check_error_message(configuration,
"[HIBF CONFIG ERROR] config::relaxed_fpr must be "
"greater than or equal to config::maximum_fpr.");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::relaxed_fpr must be "
"greater than or equal to config::maximum_fpr.");
}

// threads cannot be 0
{
seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .threads = 0u};
check_error_message(configuration, "[HIBF CONFIG ERROR] config::threads must be greater than 0.");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::threads must be greater than 0.");
}

// sketch_bits must be in [5,32]
{
seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .sketch_bits = 4u};
check_error_message(configuration, "[HIBF CONFIG ERROR] config::sketch_bits must be in [5,32].");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::sketch_bits must be in [5,32].");

configuration.sketch_bits = 33u;
check_error_message(configuration, "[HIBF CONFIG ERROR] config::sketch_bits must be in [5,32].");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::sketch_bits must be in [5,32].");
}

// Set default tmax
Expand Down Expand Up @@ -260,9 +267,10 @@ TEST(config_test, validate_and_set_defaults)
.number_of_user_bins = 1u,
.tmax = 18'446'744'073'709'551'553ULL};

check_error_message(configuration,
"[HIBF CONFIG ERROR] The maximum possible config::tmax "
"is 18446744073709551552.");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] The maximum possible config::tmax "
"is 18446744073709551552.");
}

// Given tmax is not a multiple of 64
Expand All @@ -282,18 +290,24 @@ TEST(config_test, validate_and_set_defaults)
// alpha must be positive
{
seqan::hibf::config configuration{.input_fn = dummy_input_fn, .number_of_user_bins = 1u, .alpha = -0.1};
check_error_message(configuration, "[HIBF CONFIG ERROR] config::alpha must be positive.");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::alpha must be positive.");
}

// max_rearrangement_ratio must be in [0.0,1.0]
{
seqan::hibf::config configuration{.input_fn = dummy_input_fn,
.number_of_user_bins = 1u,
.max_rearrangement_ratio = -0.1};
check_error_message(configuration, "[HIBF CONFIG ERROR] config::max_rearrangement_ratio must be in [0.0,1.0].");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::max_rearrangement_ratio must be in [0.0,1.0].");

configuration.max_rearrangement_ratio = 1.1;
check_error_message(configuration, "[HIBF CONFIG ERROR] config::max_rearrangement_ratio must be in [0.0,1.0].");
EXPECT_THROW_MSG(configuration.validate_and_set_defaults(),
std::invalid_argument,
"[HIBF CONFIG ERROR] config::max_rearrangement_ratio must be in [0.0,1.0].");
}

// Set disable_rearrangement if disable_estimate_union is set
Expand Down
1 change: 1 addition & 0 deletions test/unit/hibf/layout/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ hibf_test (layout_test.cpp)
hibf_test (fpr_correction_test.cpp)
hibf_test (graph_test.cpp)
hibf_test (compute_layout_test.cpp)
hibf_test (data_store_test.cpp)
Loading
Loading