From 6a67a4881a76d4c8f07d26c74d3b2ceb0c62c944 Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Wed, 23 Oct 2024 13:30:05 +0200 Subject: [PATCH] [MISC] Use new insert_iterator in more places --- src/build/insert_into_ibf.cpp | 14 +- src/interleaved_bloom_filter.cpp | 9 +- src/sketch/compute_sketches.cpp | 21 ++- test/performance/ibf/CMakeLists.txt | 1 + ...ed_bloom_filter_construction_benchmark.cpp | 153 ++++++++++++++++++ .../sketch/compute_sketches_benchmark.cpp | 21 ++- 6 files changed, 185 insertions(+), 34 deletions(-) create mode 100644 test/performance/ibf/interleaved_bloom_filter_construction_benchmark.cpp diff --git a/src/build/insert_into_ibf.cpp b/src/build/insert_into_ibf.cpp index 4a5b0784..07b45b78 100644 --- a/src/build/insert_into_ibf.cpp +++ b/src/build/insert_into_ibf.cpp @@ -51,20 +51,14 @@ void insert_into_ibf(build_data const & data, layout::layout::user_bin const & record, seqan::hibf::interleaved_bloom_filter & ibf) { - auto const bin_index = seqan::hibf::bin_index{static_cast(record.storage_TB_id)}; - std::vector values; - serial_timer local_user_bin_io_timer{}; - local_user_bin_io_timer.start(); - data.config.input_fn(record.idx, insert_iterator{values}); - local_user_bin_io_timer.stop(); - data.user_bin_io_timer += local_user_bin_io_timer; - serial_timer local_fill_ibf_timer{}; + local_user_bin_io_timer.start(); local_fill_ibf_timer.start(); - for (auto && value : values) - ibf.emplace(value, bin_index); + data.config.input_fn(record.idx, insert_iterator{ibf, record.storage_TB_id}); + local_user_bin_io_timer.stop(); local_fill_ibf_timer.stop(); + data.user_bin_io_timer += local_user_bin_io_timer; data.fill_ibf_timer += local_fill_ibf_timer; } diff --git a/src/interleaved_bloom_filter.cpp b/src/interleaved_bloom_filter.cpp index 195ad487..ab3ae1fd 100644 --- a/src/interleaved_bloom_filter.cpp +++ b/src/interleaved_bloom_filter.cpp @@ -80,16 +80,11 @@ interleaved_bloom_filter::interleaved_bloom_filter(config & configuration, size_ { // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) size_t const chunk_size = std::clamp(std::bit_ceil(bin_count() / configuration.threads), 8u, 64u); - robin_hood::unordered_flat_set kmers; -#pragma omp parallel for schedule(dynamic, chunk_size) num_threads(configuration.threads) private(kmers) +#pragma omp parallel for schedule(dynamic, chunk_size) num_threads(configuration.threads) for (size_t i = 0u; i < configuration.number_of_user_bins; ++i) { - kmers.clear(); - configuration.input_fn(i, insert_iterator{kmers}); - - for (uint64_t const hash : kmers) - emplace(hash, seqan::hibf::bin_index{i}); + configuration.input_fn(i, insert_iterator{*this, i}); } } diff --git a/src/sketch/compute_sketches.cpp b/src/sketch/compute_sketches.cpp index 5faa299d..cd7d9ab8 100644 --- a/src/sketch/compute_sketches.cpp +++ b/src/sketch/compute_sketches.cpp @@ -27,21 +27,18 @@ namespace seqan::hibf::sketch void compute_sketches(config const & config, std::vector & hll_sketches) { // compute hll_sketches - hll_sketches.resize(config.number_of_user_bins); + hll_sketches.resize(config.number_of_user_bins, config.sketch_bits); + + assert(std::ranges::all_of(hll_sketches, + [bits = config.sketch_bits](hyperloglog const & sketch) + { + return sketch.data_size() == (1ULL << bits); + })); - robin_hood::unordered_flat_set kmers; -#pragma omp parallel for schedule(dynamic) num_threads(config.threads) private(kmers) +#pragma omp parallel for schedule(dynamic) num_threads(config.threads) for (size_t i = 0; i < config.number_of_user_bins; ++i) { - seqan::hibf::sketch::hyperloglog hll_sketch(config.sketch_bits); - - kmers.clear(); - config.input_fn(i, insert_iterator{kmers}); - - for (auto k_hash : kmers) - hll_sketch.add(k_hash); - - hll_sketches[i] = std::move(hll_sketch); + config.input_fn(i, insert_iterator{hll_sketches[i]}); } } diff --git a/test/performance/ibf/CMakeLists.txt b/test/performance/ibf/CMakeLists.txt index 498044fc..df882bf3 100644 --- a/test/performance/ibf/CMakeLists.txt +++ b/test/performance/ibf/CMakeLists.txt @@ -5,3 +5,4 @@ hibf_benchmark (bit_vector_benchmark.cpp) hibf_benchmark (bit_vector_serialisation_benchmark.cpp) hibf_benchmark (interleaved_bloom_filter_benchmark.cpp) +hibf_benchmark (interleaved_bloom_filter_construction_benchmark.cpp) diff --git a/test/performance/ibf/interleaved_bloom_filter_construction_benchmark.cpp b/test/performance/ibf/interleaved_bloom_filter_construction_benchmark.cpp new file mode 100644 index 00000000..1d6848a2 --- /dev/null +++ b/test/performance/ibf/interleaved_bloom_filter_construction_benchmark.cpp @@ -0,0 +1,153 @@ +// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +#include // for State, Benchmark, AddCustomContext, Counter, BENCHMARK + +#include // for __fn, generate +#include // for log, ceil, exp +#include // for size_t +#include // for equal_to +#include // for uniform_int_distribution, mt19937_64 +#include // for transform_view, iota_view, __range_adaptor_closure_t, __fn +#include // for to_string, basic_string +#include // for tuple, make_tuple +#include // for move, pair +#include // for vector + +#include // for hash, unordered_map +#include // for chunk, chunk_fn, chunk_view +#include // for operator| +#include // for bin_index, interleaved_bloom_filter, bin_count, bin_size +#include // for divide_and_ceil +#include // for HIBF_HAS_AVX512 +#include // for operator""_MiB + +using namespace seqan::hibf::test::literals; +static constexpr size_t total_ibf_size_in_bytes{1_MiB}; +static constexpr size_t number_of_hash_functions{2u}; +static constexpr double false_positive_rate{0.05}; + +inline benchmark::Counter ibf_size(size_t const bit_size) +{ + return benchmark::Counter(bit_size / 8, benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024); +} + +// This computes how many elements need to be inserted into the IBF to achieve the desired false positive rate for the +// given size. +// The `number_of_elements` many generated values are used for both constructing and querying the IBF. +static /* cmath not constexpr in libc++ */ size_t number_of_elements = []() +{ + size_t const bits = 8u * total_ibf_size_in_bytes; + double const numerator = -std::log(1 - std::exp(std::log(false_positive_rate) / number_of_hash_functions)) * bits; + return std::ceil(numerator / number_of_hash_functions); +}(); + +static auto get_value(size_t const bins) +{ + size_t const chunk_size = seqan::hibf::divide_and_ceil(number_of_elements, bins); + return seqan::stl::views::chunk(std::views::iota(size_t{}, number_of_elements), chunk_size); +} + +void manual_construct(::benchmark::State & state) +{ + size_t const bins = state.range(0); + size_t const bits = 8u * total_ibf_size_in_bytes / bins; + + auto values = get_value(bins); + + for (auto _ : state) + { + seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{bins}, + seqan::hibf::bin_size{bits}, + seqan::hibf::hash_function_count{number_of_hash_functions}}; + + for (size_t bin_index = 0u; bin_index < bins; ++bin_index) + { + for (auto value : values[bin_index]) + ibf.emplace(value, seqan::hibf::bin_index{bin_index}); + } + + state.counters["IBF_size"] = ibf_size(ibf.bit_size()); + + benchmark::DoNotOptimize(ibf); + } +} + +void config_construct(::benchmark::State & state) +{ + size_t const bins = state.range(0); + + auto values = get_value(bins); + + seqan::hibf::config config{.input_fn = + [&values](size_t const user_bin_id, seqan::hibf::insert_iterator && it) + { + for (auto const value : values[user_bin_id]) + it = value; + }, + .number_of_user_bins = bins, + .number_of_hash_functions = number_of_hash_functions, + .maximum_fpr = false_positive_rate}; + + for (auto _ : state) + { + seqan::hibf::interleaved_bloom_filter ibf{config}; + + state.counters["IBF_size"] = ibf_size(ibf.bit_size()); + + benchmark::DoNotOptimize(ibf); + } +} + +void config_and_max_construct(::benchmark::State & state) +{ + size_t const bins = state.range(0); + + auto values = get_value(bins); + size_t const max_bin_size = values[0].size(); + + seqan::hibf::config config{.input_fn = + [&values](size_t const user_bin_id, seqan::hibf::insert_iterator && it) + { + for (auto const value : values[user_bin_id]) + it = value; + }, + .number_of_user_bins = bins, + .number_of_hash_functions = number_of_hash_functions, + .maximum_fpr = false_positive_rate}; + + for (auto _ : state) + { + seqan::hibf::interleaved_bloom_filter ibf{config, max_bin_size}; + + state.counters["IBF_size"] = ibf_size(ibf.bit_size()); + + benchmark::DoNotOptimize(ibf); + } +} + +BENCHMARK(manual_construct)->RangeMultiplier(2)->Range(64, 1024); +BENCHMARK(config_construct)->RangeMultiplier(2)->Range(64, 1024); +BENCHMARK(config_and_max_construct)->RangeMultiplier(2)->Range(64, 1024); + +// This is a hack to add custom context information to the benchmark output. +// The alternative would be to do it in the main(). However, this would require +// not using the BENCHMARK_MAIN macro. +[[maybe_unused]] static bool foo = []() +{ + benchmark::AddCustomContext("IBF size in bytes", std::to_string(total_ibf_size_in_bytes)); + benchmark::AddCustomContext("Number of hash functions", std::to_string(number_of_hash_functions)); + benchmark::AddCustomContext("False positive rate", std::to_string(false_positive_rate)); + benchmark::AddCustomContext("Number of elements", std::to_string(number_of_elements)); + benchmark::AddCustomContext("HIBF_HAS_AVX512", HIBF_HAS_AVX512 ? "true" : "false"); + benchmark::AddCustomContext("AVX512 support", +#if __AVX512F__ && __AVX512BW__ + "true"); +#else + "false"); +#endif + return true; +}(); + +BENCHMARK_MAIN(); diff --git a/test/performance/sketch/compute_sketches_benchmark.cpp b/test/performance/sketch/compute_sketches_benchmark.cpp index 7e6c2358..f2cd0f64 100644 --- a/test/performance/sketch/compute_sketches_benchmark.cpp +++ b/test/performance/sketch/compute_sketches_benchmark.cpp @@ -13,6 +13,11 @@ #include // for hyperloglog #include // for minhashes +inline benchmark::Counter elements_per_second(size_t const count) +{ + return benchmark::Counter(count, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1000); +} + enum class sketch : uint8_t { Hyperloglog, @@ -22,30 +27,36 @@ enum class sketch : uint8_t template void compute_sketches(benchmark::State & state) { + static constexpr uint64_t elements_per_bin = 10000; auto create_hashes = [&](size_t const ub_id, seqan::hibf::insert_iterator it) { // 0 = [0, 10000] // 1 = [10000, 20000] // 1 = [20000, 30000] - for (size_t i = ub_id * 10000; i < (ub_id + 1) * 10000; ++i) + for (size_t i = ub_id * elements_per_bin; i < (ub_id + 1) * elements_per_bin; ++i) it = i; }; - [[maybe_unused]] std::vector minhash_sketches; - std::vector hyperloglog_sketches; - seqan::hibf::config config{}; - config.number_of_user_bins = 16; + config.number_of_user_bins = 64; config.input_fn = create_hashes; config.sketch_bits = 12; + [[maybe_unused]] std::vector minhash_sketches; + std::vector hyperloglog_sketches(config.number_of_user_bins, config.sketch_bits); + for (auto _ : state) { if constexpr (sketch_t == sketch::MinHashes) seqan::hibf::sketch::compute_sketches(config, hyperloglog_sketches, minhash_sketches); else seqan::hibf::sketch::compute_sketches(config, hyperloglog_sketches); + + benchmark::DoNotOptimize(hyperloglog_sketches); + benchmark::ClobberMemory(); } + + state.counters["elements"] = elements_per_second(elements_per_bin * config.number_of_user_bins); } BENCHMARK_TEMPLATE(compute_sketches, sketch::Hyperloglog);