diff --git a/README.md b/README.md index 150142f4..79b6e2a2 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ int main() auto & result1 = agent.membership_for(query1, 2u); // query1 hits in user_bin_1 and user_bin_3, which have the IDs 0 and 2, respectively. - for (int64_t hit_user_bin : result1) + for (uint64_t hit_user_bin : result1) std::cout << hit_user_bin << ' '; // The results are not sorted: 2 0 std::cout << '\n'; @@ -154,7 +154,7 @@ int main() agent.sort_results(); // Sort the results. // query2 hits in user_bin_1 and user_bin_2, which have the IDs 0 and 1, respectively. - for (int64_t hit_user_bin : result2) + for (uint64_t hit_user_bin : result2) std::cout << hit_user_bin << ' '; // The results are sorted: 0 1 std::cout << '\n'; } diff --git a/include/hibf/build/update_user_bins.hpp b/include/hibf/build/update_user_bins.hpp index a66415f8..6a8866c8 100644 --- a/include/hibf/build/update_user_bins.hpp +++ b/include/hibf/build/update_user_bins.hpp @@ -16,7 +16,7 @@ namespace seqan::hibf::build /*!\brief Updates user bins stored in HIBF. * \ingroup hibf_build */ -inline void update_user_bins(std::vector & filename_indices, layout::layout::user_bin const & record) +inline void update_user_bins(std::vector & filename_indices, layout::layout::user_bin const & record) { std::fill_n(filename_indices.begin() + record.storage_TB_id, record.number_of_technical_bins, record.idx); } diff --git a/include/hibf/config.hpp b/include/hibf/config.hpp index 8a19ba81..cbf6d224 100644 --- a/include/hibf/config.hpp +++ b/include/hibf/config.hpp @@ -113,6 +113,8 @@ struct config * Since the data to construct the (H)IBF is given by a function object `seqan::hibf::config::input_fn`, * the number of user bins to consider must be given via this option. * + * Value must be neither `0` nor `std::numeric_limits::max()`. + * * \include test/snippet/hibf/config_number_of_user_bins.cpp * * In this example, `12` user bins would be inserted into the (H)IBF, each only storing the hash `42`. @@ -288,7 +290,7 @@ struct config /*!\brief Checks several variables of seqan::hibf::config and sets default values if necessary. * * Required options: - * * seqan::hibf::config::number_of_user_bins must be set to a value other than `0`. + * * seqan::hibf::config::number_of_user_bins must be neither `0` nor `std::numeric_limits::max()`. * * seqan::hibf::config::input_fn must be set. * * Constrains: diff --git a/include/hibf/hierarchical_interleaved_bloom_filter.hpp b/include/hibf/hierarchical_interleaved_bloom_filter.hpp index 37cba1bb..f06cb33a 100644 --- a/include/hibf/hierarchical_interleaved_bloom_filter.hpp +++ b/include/hibf/hierarchical_interleaved_bloom_filter.hpp @@ -27,6 +27,14 @@ namespace seqan::hibf { +namespace bin_kind +{ + +//!\brief The value that indicates a merged bin. +static constexpr uint64_t merged{std::numeric_limits::max()}; + +} // namespace bin_kind + /*!\brief The Hierarchical Interleaved Bloom Filter (HIBF) - Fast answers to set-membership queries for multiple bins. * \ingroup hibf * \details @@ -202,16 +210,16 @@ class hierarchical_interleaved_bloom_filter * If `j != i` is returned, there is a lower level IBF, bin `b` is a merged bin, and `j` is the ID of the lower * level IBF in ibf_vector. */ - std::vector> next_ibf_id; + std::vector> next_ibf_id; /*!\brief Stores for each bin in each IBF of the HIBF the user bin ID. * \details * Assume we look up a bin `b` in IBF `i`, i.e. `ibf_bin_to_user_bin_id[i][b]`. - * If `-1` is returned, bin `b` is a merged bin, there is no single user bin, we need to look into the - * lower level IBF. + * If `seqan::hibf::bin_kind::merged` is returned, bin `b` is a merged bin, there is no single user bin, we need + * to look into the lower level IBF. * Otherwise, the returned value `j` is the corresponding user bin ID. */ - std::vector> ibf_bin_to_user_bin_id{}; + std::vector> ibf_bin_to_user_bin_id{}; //!\brief Returns a membership_agent to be used for counting. membership_agent_type membership_agent() const; @@ -269,7 +277,7 @@ class hierarchical_interleaved_bloom_filter::membership_agent_type //!\brief Helper for recursive membership querying. template - void membership_for_impl(value_range_t && values, int64_t const ibf_idx, uint16_t const threshold) + void membership_for_impl(value_range_t && values, size_t const ibf_idx, uint16_t const threshold) { auto agent = hibf_ptr->ibf_vector[ibf_idx].template counting_agent(); auto & result = agent.bulk_count(values); @@ -282,7 +290,7 @@ class hierarchical_interleaved_bloom_filter::membership_agent_type auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin]; - if (current_filename_index < 0) // merged bin + if (current_filename_index == bin_kind::merged) // merged bin { if (sum >= threshold) membership_for_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold); @@ -299,7 +307,7 @@ class hierarchical_interleaved_bloom_filter::membership_agent_type } //!\brief Stores the result of membership_for(). - std::vector result_buffer; + std::vector result_buffer; public: /*!\name Constructors, destructor and assignment @@ -366,8 +374,8 @@ class hierarchical_interleaved_bloom_filter::membership_agent_type * seqan::hibf::hierarchical_interleaved_bloom_filter::membership_agent for each thread. */ template - [[nodiscard]] std::vector const & membership_for(value_range_t && values, - uint16_t const threshold) & noexcept + [[nodiscard]] std::vector const & membership_for(value_range_t && values, + uint16_t const threshold) & noexcept { assert(hibf_ptr != nullptr); @@ -377,7 +385,7 @@ class hierarchical_interleaved_bloom_filter::membership_agent_type result_buffer.clear(); - membership_for_impl(values, 0, threshold); + membership_for_impl(values, 0u, threshold); return result_buffer; } @@ -385,8 +393,8 @@ class hierarchical_interleaved_bloom_filter::membership_agent_type // `membership_for` cannot be called on a temporary, since the object the returned reference points to // is immediately destroyed. template - [[nodiscard]] std::vector const & membership_for(value_range_t && values, - uint16_t const threshold) && noexcept = delete; + [[nodiscard]] std::vector const & membership_for(value_range_t && values, + uint16_t const threshold) && noexcept = delete; //!\} }; @@ -405,7 +413,7 @@ class hierarchical_interleaved_bloom_filter::counting_agent_type //!\brief Helper for recursive bulk counting. template - void bulk_count_impl(value_range_t && values, int64_t const ibf_idx, size_t const threshold) + void bulk_count_impl(value_range_t && values, size_t const ibf_idx, size_t const threshold) { auto agent = hibf_ptr->ibf_vector[ibf_idx].template counting_agent(); auto & result = agent.bulk_count(values); @@ -417,7 +425,7 @@ class hierarchical_interleaved_bloom_filter::counting_agent_type sum += result[bin]; auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin]; - if (current_filename_index < 0) // merged bin + if (current_filename_index == bin_kind::merged) // merged bin { if (sum >= threshold) bulk_count_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold); @@ -498,7 +506,7 @@ class hierarchical_interleaved_bloom_filter::counting_agent_type std::ranges::fill(result_buffer, static_cast(0)); - bulk_count_impl(values, 0, threshold); + bulk_count_impl(values, 0u, threshold); return result_buffer; } diff --git a/src/config.cpp b/src/config.cpp index b82069d6..36ae0753 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -69,6 +69,10 @@ void config::validate_and_set_defaults() if (number_of_user_bins == 0u) throw std::invalid_argument{"[HIBF CONFIG ERROR] You did not set the required config::number_of_user_bins."}; + if (number_of_user_bins == 18'446'744'073'709'551'615ULL) // std::numeric_limits::max() = bin_kind::merged + throw std::invalid_argument{"[HIBF CONFIG ERROR] The maximum possible config::number_of_user_bins " + "is 18446744073709551614."}; + if (number_of_hash_functions == 0u || number_of_hash_functions > 5u) throw std::invalid_argument{"[HIBF CONFIG ERROR] config::number_of_hash_functions must be in [1,5]."}; @@ -97,7 +101,7 @@ void config::validate_and_set_defaults() } else if (tmax > 18'446'744'073'709'551'552ULL) // next_multiple_of_64 would not fit in size_t. Underflowed by user? { - throw std::invalid_argument{"[HIBF CONFIG ERROR] The maximum possible tmax is 18446744073709551552."}; + throw std::invalid_argument{"[HIBF CONFIG ERROR] The maximum possible config::tmax is 18446744073709551552."}; } else if (tmax % 64 != 0) { diff --git a/src/hierarchical_interleaved_bloom_filter.cpp b/src/hierarchical_interleaved_bloom_filter.cpp index d7aad4f0..6142bc56 100644 --- a/src/hierarchical_interleaved_bloom_filter.cpp +++ b/src/hierarchical_interleaved_bloom_filter.cpp @@ -46,8 +46,8 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf, { size_t const ibf_pos{data.request_ibf_idx()}; - std::vector ibf_positions(current_node.number_of_technical_bins, ibf_pos); - std::vector filename_indices(current_node.number_of_technical_bins, -1); + std::vector ibf_positions(current_node.number_of_technical_bins, ibf_pos); + std::vector filename_indices(current_node.number_of_technical_bins, bin_kind::merged); robin_hood::unordered_flat_set kmers{}; auto initialise_max_bin_kmers = [&]() -> size_t diff --git a/test/snippet/readme.cpp b/test/snippet/readme.cpp index 1c0c3233..d228399a 100644 --- a/test/snippet/readme.cpp +++ b/test/snippet/readme.cpp @@ -77,7 +77,7 @@ int main() auto & result1 = agent.membership_for(query1, 2u); // query1 hits in user_bin_1 and user_bin_3, which have the IDs 0 and 2, respectively. - for (int64_t hit_user_bin : result1) + for (uint64_t hit_user_bin : result1) std::cout << hit_user_bin << ' '; // The results are not sorted: 2 0 std::cout << '\n'; @@ -88,7 +88,7 @@ int main() agent.sort_results(); // Sort the results. // query2 hits in user_bin_1 and user_bin_2, which have the IDs 0 and 1, respectively. - for (int64_t hit_user_bin : result2) + for (uint64_t hit_user_bin : result2) std::cout << hit_user_bin << ' '; // The results are sorted: 0 1 std::cout << '\n'; } diff --git a/test/unit/hibf/config_test.cpp b/test/unit/hibf/config_test.cpp index af576a00..707ddd8f 100644 --- a/test/unit/hibf/config_test.cpp +++ b/test/unit/hibf/config_test.cpp @@ -162,11 +162,16 @@ TEST(config_test, validate_and_set_defaults) check_error_message(configuration, "[HIBF CONFIG ERROR] You did not set the required config::input_fn."); } - // number_of_user_bins cannot be 0 + // number_of_user_bins cannot be 0 or bin_kind::merged (18'446'744'073'709'551'615ULL) { seqan::hibf::config configuration{.input_fn = dummy_input_fn}; check_error_message(configuration, "[HIBF CONFIG ERROR] You did not set the required config::number_of_user_bins."); + + configuration.number_of_user_bins = 18'446'744'073'709'551'615ULL; + check_error_message(configuration, + "[HIBF CONFIG ERROR] The maximum possible config::number_of_user_bins " + "is 18446744073709551614."); } // number_of_hash_functions must be in [1,5] @@ -255,7 +260,9 @@ TEST(config_test, validate_and_set_defaults) .number_of_user_bins = 1u, .tmax = 18'446'744'073'709'551'553ULL}; - check_error_message(configuration, "[HIBF CONFIG ERROR] The maximum possible tmax is 18446744073709551552."); + check_error_message(configuration, + "[HIBF CONFIG ERROR] The maximum possible config::tmax " + "is 18446744073709551552."); } // Given tmax is not a multiple of 64