From a2fa58b09e746a5abb9ac8ea108b14475fea61f2 Mon Sep 17 00:00:00 2001 From: Svenja Mehringer Date: Wed, 23 Aug 2023 13:54:25 +0200 Subject: [PATCH] [FEATURE] Make hibf constructible from a layout file. --- .../hierarchical_interleaved_bloom_filter.hpp | 10 ++++ src/hierarchical_interleaved_bloom_filter.cpp | 15 ++++++ ...archical_interleaved_bloom_filter_test.cpp | 49 ++++++++++++++++++- 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/include/hibf/hierarchical_interleaved_bloom_filter.hpp b/include/hibf/hierarchical_interleaved_bloom_filter.hpp index 5f026707..cc23275e 100644 --- a/include/hibf/hierarchical_interleaved_bloom_filter.hpp +++ b/include/hibf/hierarchical_interleaved_bloom_filter.hpp @@ -105,6 +105,16 @@ class hierarchical_interleaved_bloom_filter ~hierarchical_interleaved_bloom_filter() = default; //!< Defaulted. hierarchical_interleaved_bloom_filter(config const & configuration); + + /*!\brief [Advanced] Constructs the HIBF from a layout file (stream) and a given input function + * \details + * This constructor makes it possible to construct an hibf from a given layout file instead of calculating the + * layout based on the input function. A hibf::config object is not needed as it is assumed to be stored in the + * layout file. A layout file can be constructed manually or via chopper (https://github.com/seqan/chopper) + * or raptor-layout (https://github.com/seqan/raptor). + */ + hierarchical_interleaved_bloom_filter(std::function input_fn, + std::istream & layout_stream); //!\} //!\brief The individual interleaved Bloom filters. diff --git a/src/hierarchical_interleaved_bloom_filter.cpp b/src/hierarchical_interleaved_bloom_filter.cpp index 8545010c..c5fa3772 100644 --- a/src/hierarchical_interleaved_bloom_filter.cpp +++ b/src/hierarchical_interleaved_bloom_filter.cpp @@ -199,4 +199,19 @@ hierarchical_interleaved_bloom_filter::hierarchical_interleaved_bloom_filter(con build_index(*this, configuration, layout); } +hierarchical_interleaved_bloom_filter::hierarchical_interleaved_bloom_filter( + std::function input_fn, + std::istream & layout_stream) +{ + // read config and layout from file + config configuration; + layout::layout hibf_layout; + configuration.read_from(layout_stream); + hibf_layout.read_from(layout_stream); + + configuration.input_fn = input_fn; // set input as it cannot be serialized. + + build_index(*this, configuration, hibf_layout); +} + } // namespace hibf diff --git a/test/unit/hibf/hierarchical_interleaved_bloom_filter_test.cpp b/test/unit/hibf/hierarchical_interleaved_bloom_filter_test.cpp index 0638382c..2987de0f 100644 --- a/test/unit/hibf/hierarchical_interleaved_bloom_filter_test.cpp +++ b/test/unit/hibf/hierarchical_interleaved_bloom_filter_test.cpp @@ -9,7 +9,8 @@ #include // for size_t #include // for function -#include // for vector, allocator +#include +#include // for vector, allocator #include // for insert_iterator, config #include // for hierarchical_interleaved_bloom_filter @@ -41,6 +42,52 @@ TEST(hibf_test, test_specific_hash_values) } } +TEST(hibf_test, build_from_layout) +{ + // range of range of sequences + std::vector> hashes{{1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u, 9u, 10u}, {1u, 2u, 3u, 4u, 5u}}; + + auto input_fn = [&](size_t const num, hibf::insert_iterator it) + { + for (auto const hash : hashes[num]) + it = hash; + }; + + std::stringstream stream{"@HIBF_CONFIG\n" + "@{\n" + "@ \"hibf_config\": {\n" + "@ \"version\": 1,\n" + "@ \"number_of_user_bins\": 2,\n" + "@ \"number_of_hash_functions\": 2,\n" + "@ \"maximum_false_positive_rate\": 0.05,\n" + "@ \"threads\": 1,\n" + "@ \"sketch_bits\": 12,\n" + "@ \"tmax\": 64,\n" + "@ \"alpha\": 1.2,\n" + "@ \"max_rearrangement_ratio\": 0.5,\n" + "@ \"disable_estimate_union\": false,\n" + "@ \"disable_rearrangement\": true,\n" + "@ \"disable_cutoffs\": false\n" + "@ }\n" + "@}\n" + "@HIBF_CONFIG_END\n" + "#TOP_LEVEL_IBF fullest_technical_bin_idx:0\n" + "#USER_BIN_IDX\tTECHNICAL_BIN_INDICES\tNUMBER_OF_TECHNICAL_BINS\n" + "1\t0\t34\n" + "0\t34\t30\n"}; + + hibf::hierarchical_interleaved_bloom_filter hibf{input_fn, stream}; + + { + std::vector query{1, 2, 3, 4, 5}; + + auto agent = hibf.membership_agent(); + auto result = agent.bulk_contains(query, 2); + + EXPECT_RANGE_EQ(result, (std::vector{0u, 1u})); + } +} + // #ifdef HIBF_HAS_SEQAN3 // #include