From a7bfc095967781c0f21432e3b748f843119cf757 Mon Sep 17 00:00:00 2001 From: Svenja Mehringer Date: Wed, 30 Aug 2023 20:23:55 +0200 Subject: [PATCH 1/2] [MISC] Update layout namespace. --- include/hibf/layout/data_store.hpp | 6 +++--- include/hibf/layout/execute.hpp | 4 ++-- include/hibf/layout/print_matrix.hpp | 4 ++-- src/layout/compute_layout.cpp | 2 +- src/layout/execute.cpp | 12 ++++++------ .../hibf/layout/hierarchical_binning_test.cpp | 14 +++++++------- test/unit/hibf/layout/simple_binning_test.cpp | 18 +++++++++--------- 7 files changed, 30 insertions(+), 30 deletions(-) diff --git a/include/hibf/layout/data_store.hpp b/include/hibf/layout/data_store.hpp index 7b474dcd..b8c9430a 100644 --- a/include/hibf/layout/data_store.hpp +++ b/include/hibf/layout/data_store.hpp @@ -11,7 +11,7 @@ #include // for layout #include // for hyperloglog -namespace seqan::hibf +namespace seqan::hibf::layout { struct data_store @@ -41,7 +41,7 @@ struct data_store double false_positive_rate{}; //!\brief The layout that is built by layout::hierarchical_binning. - layout::layout * hibf_layout; // Will be modified by {simple,hierarchical}_binning. + layout * hibf_layout; // Will be modified by {simple,hierarchical}_binning. //!\brief The kmer counts associated with the above files used to layout user bin into technical bins. std::vector const * kmer_counts{}; // Pointed to data should not be modified. @@ -79,4 +79,4 @@ struct data_store //!\} }; -} // namespace seqan::hibf +} // namespace seqan::hibf::layout diff --git a/include/hibf/layout/execute.hpp b/include/hibf/layout/execute.hpp index 3cc018c9..1790affd 100644 --- a/include/hibf/layout/execute.hpp +++ b/include/hibf/layout/execute.hpp @@ -5,9 +5,9 @@ #include // for config #include // for data_store -namespace seqan::hibf +namespace seqan::hibf::layout { size_t execute(config const &, data_store &); -} // namespace seqan::hibf +} // namespace seqan::hibf::layout diff --git a/include/hibf/layout/print_matrix.hpp b/include/hibf/layout/print_matrix.hpp index 53856b54..ce110c01 100644 --- a/include/hibf/layout/print_matrix.hpp +++ b/include/hibf/layout/print_matrix.hpp @@ -4,7 +4,7 @@ #include -namespace seqan::hibf +namespace seqan::hibf::layout { // helper function to print a matrix when debugging @@ -28,4 +28,4 @@ void print_matrix(matrix_type const & matrix, std::cerr << '\n'; } -} // namespace seqan::hibf +} // namespace seqan::hibf::layout diff --git a/src/layout/compute_layout.cpp b/src/layout/compute_layout.cpp index 370d863e..0c2ebf0c 100644 --- a/src/layout/compute_layout.cpp +++ b/src/layout/compute_layout.cpp @@ -55,7 +55,7 @@ compute_layout(config const & config, std::vector & kmer_counts, std::ve .kmer_counts = std::addressof(kmer_counts), .sketches = std::addressof(sketches)}; - size_t const max_hibf_id = seqan::hibf::execute(config, store); + size_t const max_hibf_id = seqan::hibf::layout::execute(config, store); store.hibf_layout->top_level_max_bin_id = max_hibf_id; // sort records ascending by the number of bin indices (corresponds to the IBF levels) diff --git a/src/layout/execute.cpp b/src/layout/execute.cpp index af6830b6..b9bb923a 100644 --- a/src/layout/execute.cpp +++ b/src/layout/execute.cpp @@ -12,10 +12,10 @@ #include // for hierarchical_binning #include // for next_multiple_of_64 -namespace seqan::hibf +namespace seqan::hibf::layout { -size_t execute(seqan::hibf::config const & config, seqan::hibf::data_store & data) +size_t execute(seqan::hibf::config const & config, seqan::hibf::layout::data_store & data) { seqan::hibf::config config_copy{config}; @@ -41,11 +41,11 @@ size_t execute(seqan::hibf::config const & config, seqan::hibf::data_store & dat } data.fpr_correction = - layout::compute_fpr_correction({.fpr = config_copy.maximum_false_positive_rate, // prevent clang-format - .hash_count = config_copy.number_of_hash_functions, - .t_max = config_copy.tmax}); + compute_fpr_correction({.fpr = config_copy.maximum_false_positive_rate, // prevent clang-format + .hash_count = config_copy.number_of_hash_functions, + .t_max = config_copy.tmax}); return seqan::hibf::layout::hierarchical_binning{data, config_copy}.execute(); } -} // namespace seqan::hibf +} // namespace seqan::hibf::layout diff --git a/test/unit/hibf/layout/hierarchical_binning_test.cpp b/test/unit/hibf/layout/hierarchical_binning_test.cpp index 3e378415..237a14ae 100644 --- a/test/unit/hibf/layout/hierarchical_binning_test.cpp +++ b/test/unit/hibf/layout/hierarchical_binning_test.cpp @@ -19,7 +19,7 @@ TEST(hierarchical_binning_test, small_example) seqan::hibf::layout::layout hibf_layout{}; std::vector kmer_counts{500, 1000, 500, 500, 500, 500, 500, 500}; - seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts}; + seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts}; data.fpr_correction = seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax}); @@ -49,7 +49,7 @@ TEST(hierarchical_binning_test, another_example) seqan::hibf::layout::layout hibf_layout{}; std::vector kmer_counts{50, 1000, 1000, 50, 5, 10, 10, 5}; - seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts}; + seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts}; data.fpr_correction = seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax}); @@ -80,7 +80,7 @@ TEST(hierarchical_binning_test, high_level_max_bin_id_is_0) seqan::hibf::layout::layout hibf_layout{}; std::vector kmer_counts{500, 500, 500, 500}; - seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts}; + seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts}; data.fpr_correction = seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax}); @@ -105,7 +105,7 @@ TEST(hierarchical_binning_test, knuts_example) seqan::hibf::layout::layout hibf_layout{}; std::vector kmer_counts{60, 600, 1000, 800, 800}; - seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts}; + seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts}; data.fpr_correction = seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax}); @@ -133,7 +133,7 @@ TEST(hierarchical_binning_test, four_level_hibf) seqan::hibf::layout::layout hibf_layout{}; std::vector kmer_counts{11090, 5080, 3040, 1020, 510, 500}; - seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts}; + seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts}; data.fpr_correction = seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax}); @@ -166,7 +166,7 @@ TEST(hierarchical_binning_test, tb0_is_a_merged_bin) seqan::hibf::layout::layout hibf_layout{}; std::vector kmer_counts{500, 500, 500, 500}; - seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts}; + seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts}; data.fpr_correction = seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax}); @@ -194,7 +194,7 @@ TEST(hierarchical_binning_test, tb0_is_a_merged_bin_and_leads_to_recursive_call) seqan::hibf::layout::layout hibf_layout{}; std::vector kmer_counts{500, 500, 500, 500, 500, 500, 500, 500}; - seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts}; + seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts}; data.fpr_correction = seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax}); diff --git a/test/unit/hibf/layout/simple_binning_test.cpp b/test/unit/hibf/layout/simple_binning_test.cpp index 10042401..769531ca 100644 --- a/test/unit/hibf/layout/simple_binning_test.cpp +++ b/test/unit/hibf/layout/simple_binning_test.cpp @@ -14,9 +14,9 @@ TEST(simple_binning_test, small_example) seqan::hibf::layout::layout hibf_layout; std::vector kmer_counts{100, 40, 20, 20}; - seqan::hibf::data_store data{.hibf_layout = &hibf_layout, - .kmer_counts = &kmer_counts, - .fpr_correction = std::vector(65, 1.0)}; + seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, + .kmer_counts = &kmer_counts, + .fpr_correction = std::vector(65, 1.0)}; seqan::hibf::layout::simple_binning algo{data, 9}; size_t max_bin = algo.execute(); @@ -35,9 +35,9 @@ TEST(simple_binning_test, uniform_distribution) seqan::hibf::layout::layout hibf_layout; std::vector kmer_counts{20, 20, 20, 20}; - seqan::hibf::data_store data{.hibf_layout = &hibf_layout, - .kmer_counts = &kmer_counts, - .fpr_correction = std::vector(65, 1.0)}; + seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, + .kmer_counts = &kmer_counts, + .fpr_correction = std::vector(65, 1.0)}; seqan::hibf::layout::simple_binning algo{data, 4u}; size_t max_bin = algo.execute(); @@ -57,9 +57,9 @@ TEST(simple_binning_test, user_bins_must_be_smaller_than_technical_bins) std::vector kmer_counts{100, 40, 20, 20}; - seqan::hibf::data_store data{.hibf_layout = &hibf_layout, - .kmer_counts = &kmer_counts, - .fpr_correction = std::vector(65, 1.0)}; + seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, + .kmer_counts = &kmer_counts, + .fpr_correction = std::vector(65, 1.0)}; EXPECT_THROW((seqan::hibf::layout::simple_binning{data, 2}), std::runtime_error); } From 72cdc3e6ac2fcc49f08e96d3315d7c25272941e9 Mon Sep 17 00:00:00 2001 From: Svenja Mehringer Date: Wed, 30 Aug 2023 20:28:59 +0200 Subject: [PATCH 2/2] [MISC] Add build namespace. --- include/hibf/build/bin_size_in_bits.hpp | 4 +-- include/hibf/build/build_data.hpp | 4 +-- include/hibf/build/chopper_pack_record.hpp | 4 +-- include/hibf/build/compute_kmers.hpp | 4 +-- include/hibf/build/construct_ibf.hpp | 4 +-- include/hibf/build/insert_into_ibf.hpp | 4 +-- include/hibf/build/update_parent_kmers.hpp | 4 +-- include/hibf/build/update_user_bins.hpp | 4 +-- src/build/compute_kmers.cpp | 4 +-- src/build/construct_ibf.cpp | 4 +-- src/build/insert_into_ibf.cpp | 4 +-- src/hierarchical_interleaved_bloom_filter.cpp | 26 +++++++++++-------- util/fpr_quality.cpp | 4 +-- 13 files changed, 39 insertions(+), 35 deletions(-) diff --git a/include/hibf/build/bin_size_in_bits.hpp b/include/hibf/build/bin_size_in_bits.hpp index 6aa78934..8fe2ebf7 100644 --- a/include/hibf/build/bin_size_in_bits.hpp +++ b/include/hibf/build/bin_size_in_bits.hpp @@ -17,7 +17,7 @@ #include -namespace seqan::hibf +namespace seqan::hibf::build { struct bin_size_parameters @@ -35,4 +35,4 @@ inline size_t bin_size_in_bits(bin_size_parameters const & params) return result; } -} // namespace seqan::hibf +} // namespace seqan::hibf::build diff --git a/include/hibf/build/build_data.hpp b/include/hibf/build/build_data.hpp index dde0715b..7d406d07 100644 --- a/include/hibf/build/build_data.hpp +++ b/include/hibf/build/build_data.hpp @@ -15,7 +15,7 @@ #include // for concurrent, timer #include // for node_data -namespace seqan::hibf +namespace seqan::hibf::build { struct build_data @@ -40,4 +40,4 @@ struct build_data } }; -} // namespace seqan::hibf +} // namespace seqan::hibf::build diff --git a/include/hibf/build/chopper_pack_record.hpp b/include/hibf/build/chopper_pack_record.hpp index d46499ed..305271e7 100644 --- a/include/hibf/build/chopper_pack_record.hpp +++ b/include/hibf/build/chopper_pack_record.hpp @@ -13,7 +13,7 @@ #include -namespace seqan::hibf +namespace seqan::hibf::build { struct chopper_pack_record @@ -45,4 +45,4 @@ struct chopper_pack_record } }; -} // namespace seqan::hibf +} // namespace seqan::hibf::build diff --git a/include/hibf/build/compute_kmers.hpp b/include/hibf/build/compute_kmers.hpp index 77823588..7ec38013 100644 --- a/include/hibf/build/compute_kmers.hpp +++ b/include/hibf/build/compute_kmers.hpp @@ -18,11 +18,11 @@ #include // for unordered_flat_set #include // for layout -namespace seqan::hibf +namespace seqan::hibf::build { void compute_kmers(robin_hood::unordered_flat_set & kmers, build_data const & data, layout::layout::user_bin const & record); -} // namespace seqan::hibf +} // namespace seqan::hibf::build diff --git a/include/hibf/build/construct_ibf.hpp b/include/hibf/build/construct_ibf.hpp index b4181c72..776e02dc 100644 --- a/include/hibf/build/construct_ibf.hpp +++ b/include/hibf/build/construct_ibf.hpp @@ -15,7 +15,7 @@ #include // for interleaved_bloom_filter #include -namespace seqan::hibf +namespace seqan::hibf::build { seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_set & parent_kmers, @@ -25,4 +25,4 @@ seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_s build_data & data, bool is_root); -} // namespace seqan::hibf +} // namespace seqan::hibf::build diff --git a/include/hibf/build/insert_into_ibf.hpp b/include/hibf/build/insert_into_ibf.hpp index 85a1d1d6..0097ccf7 100644 --- a/include/hibf/build/insert_into_ibf.hpp +++ b/include/hibf/build/insert_into_ibf.hpp @@ -16,7 +16,7 @@ #include // for interleaved_bloom_filter #include // for layout -namespace seqan::hibf +namespace seqan::hibf::build { // automatically does naive splitting if number_of_bins > 1 @@ -30,4 +30,4 @@ void insert_into_ibf(build_data const & data, layout::layout::user_bin const & record, seqan::hibf::interleaved_bloom_filter & ibf); -} // namespace seqan::hibf +} // namespace seqan::hibf::build diff --git a/include/hibf/build/update_parent_kmers.hpp b/include/hibf/build/update_parent_kmers.hpp index 8f446f12..508b798b 100644 --- a/include/hibf/build/update_parent_kmers.hpp +++ b/include/hibf/build/update_parent_kmers.hpp @@ -17,7 +17,7 @@ #include // for concurrent, timer #include -namespace seqan::hibf +namespace seqan::hibf::build { inline void update_parent_kmers(robin_hood::unordered_flat_set & parent_kmers, @@ -31,4 +31,4 @@ inline void update_parent_kmers(robin_hood::unordered_flat_set & paren merge_kmers_timer += local_merge_kmers_timer; } -} // namespace seqan::hibf +} // namespace seqan::hibf::build diff --git a/include/hibf/build/update_user_bins.hpp b/include/hibf/build/update_user_bins.hpp index 3287dfe8..a355f199 100644 --- a/include/hibf/build/update_user_bins.hpp +++ b/include/hibf/build/update_user_bins.hpp @@ -13,7 +13,7 @@ #include // for layout -namespace seqan::hibf +namespace seqan::hibf::build { inline void update_user_bins(std::vector & filename_indices, layout::layout::user_bin const & record) @@ -21,4 +21,4 @@ inline void update_user_bins(std::vector & filename_indices, layout::la std::fill_n(filename_indices.begin() + record.storage_TB_id, record.number_of_technical_bins, record.idx); } -} // namespace seqan::hibf +} // namespace seqan::hibf::build diff --git a/src/build/compute_kmers.cpp b/src/build/compute_kmers.cpp index 47d45952..e118f892 100644 --- a/src/build/compute_kmers.cpp +++ b/src/build/compute_kmers.cpp @@ -21,7 +21,7 @@ #include // for concurrent, timer #include // for layout -namespace seqan::hibf +namespace seqan::hibf::build { void compute_kmers(robin_hood::unordered_flat_set & kmers, @@ -35,4 +35,4 @@ void compute_kmers(robin_hood::unordered_flat_set & kmers, data.user_bin_io_timer += local_user_bin_io_timer; } -} // namespace seqan::hibf +} // namespace seqan::hibf::build diff --git a/src/build/construct_ibf.cpp b/src/build/construct_ibf.cpp index b1d1368c..62af85bc 100644 --- a/src/build/construct_ibf.cpp +++ b/src/build/construct_ibf.cpp @@ -21,7 +21,7 @@ #include // for interleaved_bloom_filter, bin_count, bin_size, hash_fun... #include -namespace seqan::hibf +namespace seqan::hibf::build { seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_set & parent_kmers, @@ -54,4 +54,4 @@ seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_s return ibf; } -} // namespace seqan::hibf +} // namespace seqan::hibf::build diff --git a/src/build/insert_into_ibf.cpp b/src/build/insert_into_ibf.cpp index b0b745f7..27c5cf5e 100644 --- a/src/build/insert_into_ibf.cpp +++ b/src/build/insert_into_ibf.cpp @@ -22,7 +22,7 @@ #include // for interleaved_bloom_filter, bin_index #include // for layout -namespace seqan::hibf +namespace seqan::hibf::build { // automatically does naive splitting if number_of_bins > 1 @@ -70,4 +70,4 @@ void insert_into_ibf(build_data const & data, data.fill_ibf_timer += local_fill_ibf_timer; } -} // namespace seqan::hibf +} // namespace seqan::hibf::build diff --git a/src/hierarchical_interleaved_bloom_filter.cpp b/src/hierarchical_interleaved_bloom_filter.cpp index bb4ee1a4..c7ca0d71 100644 --- a/src/hierarchical_interleaved_bloom_filter.cpp +++ b/src/hierarchical_interleaved_bloom_filter.cpp @@ -36,7 +36,7 @@ namespace seqan::hibf size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf, robin_hood::unordered_flat_set & parent_kmers, layout::graph::node const & current_node, - build_data & data, + build::build_data & data, bool is_root) { size_t const ibf_pos{data.request_ibf_idx()}; @@ -62,8 +62,8 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf, { // we assume that the max record is at the beginning of the list of remaining records. auto const & record = current_node.remaining_records[0]; - compute_kmers(kmers, data, record); - update_user_bins(filename_indices, record); + build::compute_kmers(kmers, data, record); + build::update_user_bins(filename_indices, record); return record.number_of_technical_bins; } @@ -117,9 +117,9 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf, size_t const mutex_id{parent_bin_index / 64}; std::lock_guard guard{local_ibf_mutex[mutex_id]}; ibf_positions[parent_bin_index] = ibf_pos; - insert_into_ibf(kmers, 1, parent_bin_index, ibf, data.fill_ibf_timer); + build::insert_into_ibf(kmers, 1, parent_bin_index, ibf, data.fill_ibf_timer); if (!is_root) - update_parent_kmers(parent_kmers, kmers, data.merge_kmers_timer); + build::update_parent_kmers(parent_kmers, kmers, data.merge_kmers_timer); } } }; @@ -134,17 +134,21 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf, if (is_root && record.number_of_technical_bins == 1) // no splitting needed { - insert_into_ibf(data, record, ibf); + build::insert_into_ibf(data, record, ibf); } else { compute_kmers(kmers, data, record); - insert_into_ibf(kmers, record.number_of_technical_bins, record.storage_TB_id, ibf, data.fill_ibf_timer); + build::insert_into_ibf(kmers, + record.number_of_technical_bins, + record.storage_TB_id, + ibf, + data.fill_ibf_timer); if (!is_root) - update_parent_kmers(parent_kmers, kmers, data.merge_kmers_timer); + build::update_parent_kmers(parent_kmers, kmers, data.merge_kmers_timer); } - update_user_bins(filename_indices, record); + build::update_user_bins(filename_indices, record); kmers.clear(); } @@ -157,7 +161,7 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf, size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf, layout::graph::node const & root_node, - build_data & data) + build::build_data & data) { robin_hood::unordered_flat_set root_kmers{}; return hierarchical_build(hibf, root_kmers, root_node, data, true); @@ -174,7 +178,7 @@ void build_index(hierarchical_interleaved_bloom_filter & hibf, hibf.user_bins.set_user_bin_count(hibf_layout.user_bins.size()); hibf.next_ibf_id.resize(number_of_ibfs); - build_data data{.config = config, .ibf_graph = {hibf_layout}}; + build::build_data data{.config = config, .ibf_graph = {hibf_layout}}; layout::graph::node const & root_node = data.ibf_graph.root; diff --git a/util/fpr_quality.cpp b/util/fpr_quality.cpp index a56fa619..8ff8fc9c 100644 --- a/util/fpr_quality.cpp +++ b/util/fpr_quality.cpp @@ -93,7 +93,7 @@ void init_parser(sharg::parser & parser, config & cfg) size_t split_bin_size_in_bits(config const & cfg) { - return seqan::hibf::bin_size_in_bits( + return seqan::hibf::build::bin_size_in_bits( {.fpr = cfg.fpr, .hash_count = cfg.hash, .elements = cfg.split_elements_per_bin}); } @@ -109,7 +109,7 @@ void print_results(size_t const fp_count, config const & cfg) void single_tb(config const & cfg) { seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{1u}, - seqan::hibf::bin_size{seqan::hibf::bin_size_in_bits( + seqan::hibf::bin_size{seqan::hibf::build::bin_size_in_bits( {.fpr = cfg.fpr, .hash_count = cfg.hash, .elements = cfg.elements})}, seqan::hibf::hash_function_count{cfg.hash}}; auto agent = ibf.membership_agent();