From 6a67a4881a76d4c8f07d26c74d3b2ceb0c62c944 Mon Sep 17 00:00:00 2001
From: Enrico Seiler <enrico.seiler@hotmail.de>
Date: Wed, 23 Oct 2024 13:30:05 +0200
Subject: [PATCH] [MISC] Use new insert_iterator in more places

---
 src/build/insert_into_ibf.cpp                 |  14 +-
 src/interleaved_bloom_filter.cpp              |   9 +-
 src/sketch/compute_sketches.cpp               |  21 ++-
 test/performance/ibf/CMakeLists.txt           |   1 +
 ...ed_bloom_filter_construction_benchmark.cpp | 153 ++++++++++++++++++
 .../sketch/compute_sketches_benchmark.cpp     |  21 ++-
 6 files changed, 185 insertions(+), 34 deletions(-)
 create mode 100644 test/performance/ibf/interleaved_bloom_filter_construction_benchmark.cpp

diff --git a/src/build/insert_into_ibf.cpp b/src/build/insert_into_ibf.cpp
index 4a5b0784..07b45b78 100644
--- a/src/build/insert_into_ibf.cpp
+++ b/src/build/insert_into_ibf.cpp
@@ -51,20 +51,14 @@ void insert_into_ibf(build_data const & data,
                      layout::layout::user_bin const & record,
                      seqan::hibf::interleaved_bloom_filter & ibf)
 {
-    auto const bin_index = seqan::hibf::bin_index{static_cast<size_t>(record.storage_TB_id)};
-    std::vector<uint64_t> values;
-
     serial_timer local_user_bin_io_timer{};
-    local_user_bin_io_timer.start();
-    data.config.input_fn(record.idx, insert_iterator{values});
-    local_user_bin_io_timer.stop();
-    data.user_bin_io_timer += local_user_bin_io_timer;
-
     serial_timer local_fill_ibf_timer{};
+    local_user_bin_io_timer.start();
     local_fill_ibf_timer.start();
-    for (auto && value : values)
-        ibf.emplace(value, bin_index);
+    data.config.input_fn(record.idx, insert_iterator{ibf, record.storage_TB_id});
+    local_user_bin_io_timer.stop();
     local_fill_ibf_timer.stop();
+    data.user_bin_io_timer += local_user_bin_io_timer;
     data.fill_ibf_timer += local_fill_ibf_timer;
 }
 
diff --git a/src/interleaved_bloom_filter.cpp b/src/interleaved_bloom_filter.cpp
index 195ad487..ab3ae1fd 100644
--- a/src/interleaved_bloom_filter.cpp
+++ b/src/interleaved_bloom_filter.cpp
@@ -80,16 +80,11 @@ interleaved_bloom_filter::interleaved_bloom_filter(config & configuration, size_
 {
     // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
     size_t const chunk_size = std::clamp<size_t>(std::bit_ceil(bin_count() / configuration.threads), 8u, 64u);
-    robin_hood::unordered_flat_set<uint64_t> kmers;
 
-#pragma omp parallel for schedule(dynamic, chunk_size) num_threads(configuration.threads) private(kmers)
+#pragma omp parallel for schedule(dynamic, chunk_size) num_threads(configuration.threads)
     for (size_t i = 0u; i < configuration.number_of_user_bins; ++i)
     {
-        kmers.clear();
-        configuration.input_fn(i, insert_iterator{kmers});
-
-        for (uint64_t const hash : kmers)
-            emplace(hash, seqan::hibf::bin_index{i});
+        configuration.input_fn(i, insert_iterator{*this, i});
     }
 }
 
diff --git a/src/sketch/compute_sketches.cpp b/src/sketch/compute_sketches.cpp
index 5faa299d..cd7d9ab8 100644
--- a/src/sketch/compute_sketches.cpp
+++ b/src/sketch/compute_sketches.cpp
@@ -27,21 +27,18 @@ namespace seqan::hibf::sketch
 void compute_sketches(config const & config, std::vector<sketch::hyperloglog> & hll_sketches)
 {
     // compute hll_sketches
-    hll_sketches.resize(config.number_of_user_bins);
+    hll_sketches.resize(config.number_of_user_bins, config.sketch_bits);
+
+    assert(std::ranges::all_of(hll_sketches,
+                               [bits = config.sketch_bits](hyperloglog const & sketch)
+                               {
+                                   return sketch.data_size() == (1ULL << bits);
+                               }));
 
-    robin_hood::unordered_flat_set<uint64_t> kmers;
-#pragma omp parallel for schedule(dynamic) num_threads(config.threads) private(kmers)
+#pragma omp parallel for schedule(dynamic) num_threads(config.threads)
     for (size_t i = 0; i < config.number_of_user_bins; ++i)
     {
-        seqan::hibf::sketch::hyperloglog hll_sketch(config.sketch_bits);
-
-        kmers.clear();
-        config.input_fn(i, insert_iterator{kmers});
-
-        for (auto k_hash : kmers)
-            hll_sketch.add(k_hash);
-
-        hll_sketches[i] = std::move(hll_sketch);
+        config.input_fn(i, insert_iterator{hll_sketches[i]});
     }
 }
 
diff --git a/test/performance/ibf/CMakeLists.txt b/test/performance/ibf/CMakeLists.txt
index 498044fc..df882bf3 100644
--- a/test/performance/ibf/CMakeLists.txt
+++ b/test/performance/ibf/CMakeLists.txt
@@ -5,3 +5,4 @@
 hibf_benchmark (bit_vector_benchmark.cpp)
 hibf_benchmark (bit_vector_serialisation_benchmark.cpp)
 hibf_benchmark (interleaved_bloom_filter_benchmark.cpp)
+hibf_benchmark (interleaved_bloom_filter_construction_benchmark.cpp)
diff --git a/test/performance/ibf/interleaved_bloom_filter_construction_benchmark.cpp b/test/performance/ibf/interleaved_bloom_filter_construction_benchmark.cpp
new file mode 100644
index 00000000..1d6848a2
--- /dev/null
+++ b/test/performance/ibf/interleaved_bloom_filter_construction_benchmark.cpp
@@ -0,0 +1,153 @@
+// SPDX-FileCopyrightText: 2006-2024, Knut Reinert & Freie Universität Berlin
+// SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik
+// SPDX-License-Identifier: BSD-3-Clause
+
+#include <benchmark/benchmark.h> // for State, Benchmark, AddCustomContext, Counter, BENCHMARK
+
+#include <algorithm>  // for __fn, generate
+#include <cmath>      // for log, ceil, exp
+#include <cstddef>    // for size_t
+#include <functional> // for equal_to
+#include <random>     // for uniform_int_distribution, mt19937_64
+#include <ranges>     // for transform_view, iota_view, __range_adaptor_closure_t, __fn
+#include <string>     // for to_string, basic_string
+#include <tuple>      // for tuple, make_tuple
+#include <utility>    // for move, pair
+#include <vector>     // for vector
+
+#include <hibf/contrib/robin_hood.hpp>              // for hash, unordered_map
+#include <hibf/contrib/std/chunk_view.hpp>          // for chunk, chunk_fn, chunk_view
+#include <hibf/contrib/std/detail/adaptor_base.hpp> // for operator|
+#include <hibf/interleaved_bloom_filter.hpp>        // for bin_index, interleaved_bloom_filter, bin_count, bin_size
+#include <hibf/misc/divide_and_ceil.hpp>            // for divide_and_ceil
+#include <hibf/platform.hpp>                        // for HIBF_HAS_AVX512
+#include <hibf/test/bytes.hpp>                      // for operator""_MiB
+
+using namespace seqan::hibf::test::literals;
+static constexpr size_t total_ibf_size_in_bytes{1_MiB};
+static constexpr size_t number_of_hash_functions{2u};
+static constexpr double false_positive_rate{0.05};
+
+inline benchmark::Counter ibf_size(size_t const bit_size)
+{
+    return benchmark::Counter(bit_size / 8, benchmark::Counter::kDefaults, benchmark::Counter::OneK::kIs1024);
+}
+
+// This computes how many elements need to be inserted into the IBF to achieve the desired false positive rate for the
+// given size.
+// The `number_of_elements` many generated values are used for both constructing and querying the IBF.
+static /* cmath not constexpr in libc++ */ size_t number_of_elements = []()
+{
+    size_t const bits = 8u * total_ibf_size_in_bytes;
+    double const numerator = -std::log(1 - std::exp(std::log(false_positive_rate) / number_of_hash_functions)) * bits;
+    return std::ceil(numerator / number_of_hash_functions);
+}();
+
+static auto get_value(size_t const bins)
+{
+    size_t const chunk_size = seqan::hibf::divide_and_ceil(number_of_elements, bins);
+    return seqan::stl::views::chunk(std::views::iota(size_t{}, number_of_elements), chunk_size);
+}
+
+void manual_construct(::benchmark::State & state)
+{
+    size_t const bins = state.range(0);
+    size_t const bits = 8u * total_ibf_size_in_bytes / bins;
+
+    auto values = get_value(bins);
+
+    for (auto _ : state)
+    {
+        seqan::hibf::interleaved_bloom_filter ibf{seqan::hibf::bin_count{bins},
+                                                  seqan::hibf::bin_size{bits},
+                                                  seqan::hibf::hash_function_count{number_of_hash_functions}};
+
+        for (size_t bin_index = 0u; bin_index < bins; ++bin_index)
+        {
+            for (auto value : values[bin_index])
+                ibf.emplace(value, seqan::hibf::bin_index{bin_index});
+        }
+
+        state.counters["IBF_size"] = ibf_size(ibf.bit_size());
+
+        benchmark::DoNotOptimize(ibf);
+    }
+}
+
+void config_construct(::benchmark::State & state)
+{
+    size_t const bins = state.range(0);
+
+    auto values = get_value(bins);
+
+    seqan::hibf::config config{.input_fn =
+                                   [&values](size_t const user_bin_id, seqan::hibf::insert_iterator && it)
+                               {
+                                   for (auto const value : values[user_bin_id])
+                                       it = value;
+                               },
+                               .number_of_user_bins = bins,
+                               .number_of_hash_functions = number_of_hash_functions,
+                               .maximum_fpr = false_positive_rate};
+
+    for (auto _ : state)
+    {
+        seqan::hibf::interleaved_bloom_filter ibf{config};
+
+        state.counters["IBF_size"] = ibf_size(ibf.bit_size());
+
+        benchmark::DoNotOptimize(ibf);
+    }
+}
+
+void config_and_max_construct(::benchmark::State & state)
+{
+    size_t const bins = state.range(0);
+
+    auto values = get_value(bins);
+    size_t const max_bin_size = values[0].size();
+
+    seqan::hibf::config config{.input_fn =
+                                   [&values](size_t const user_bin_id, seqan::hibf::insert_iterator && it)
+                               {
+                                   for (auto const value : values[user_bin_id])
+                                       it = value;
+                               },
+                               .number_of_user_bins = bins,
+                               .number_of_hash_functions = number_of_hash_functions,
+                               .maximum_fpr = false_positive_rate};
+
+    for (auto _ : state)
+    {
+        seqan::hibf::interleaved_bloom_filter ibf{config, max_bin_size};
+
+        state.counters["IBF_size"] = ibf_size(ibf.bit_size());
+
+        benchmark::DoNotOptimize(ibf);
+    }
+}
+
+BENCHMARK(manual_construct)->RangeMultiplier(2)->Range(64, 1024);
+BENCHMARK(config_construct)->RangeMultiplier(2)->Range(64, 1024);
+BENCHMARK(config_and_max_construct)->RangeMultiplier(2)->Range(64, 1024);
+
+// This is a hack to add custom context information to the benchmark output.
+// The alternative would be to do it in the main(). However, this would require
+// not using the BENCHMARK_MAIN macro.
+[[maybe_unused]] static bool foo = []()
+{
+    benchmark::AddCustomContext("IBF size in bytes", std::to_string(total_ibf_size_in_bytes));
+    benchmark::AddCustomContext("Number of hash functions", std::to_string(number_of_hash_functions));
+    benchmark::AddCustomContext("False positive rate", std::to_string(false_positive_rate));
+    benchmark::AddCustomContext("Number of elements", std::to_string(number_of_elements));
+    benchmark::AddCustomContext("HIBF_HAS_AVX512", HIBF_HAS_AVX512 ? "true" : "false");
+    benchmark::AddCustomContext("AVX512 support",
+#if __AVX512F__ && __AVX512BW__
+                                "true");
+#else
+                                "false");
+#endif
+    return true;
+}();
+
+BENCHMARK_MAIN();
diff --git a/test/performance/sketch/compute_sketches_benchmark.cpp b/test/performance/sketch/compute_sketches_benchmark.cpp
index 7e6c2358..f2cd0f64 100644
--- a/test/performance/sketch/compute_sketches_benchmark.cpp
+++ b/test/performance/sketch/compute_sketches_benchmark.cpp
@@ -13,6 +13,11 @@
 #include <hibf/sketch/hyperloglog.hpp>      // for hyperloglog
 #include <hibf/sketch/minhashes.hpp>        // for minhashes
 
+inline benchmark::Counter elements_per_second(size_t const count)
+{
+    return benchmark::Counter(count, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1000);
+}
+
 enum class sketch : uint8_t
 {
     Hyperloglog,
@@ -22,30 +27,36 @@ enum class sketch : uint8_t
 template <sketch sketch_t>
 void compute_sketches(benchmark::State & state)
 {
+    static constexpr uint64_t elements_per_bin = 10000;
     auto create_hashes = [&](size_t const ub_id, seqan::hibf::insert_iterator it)
     {
         // 0 = [0, 10000]
         // 1 = [10000, 20000]
         // 1 = [20000, 30000]
-        for (size_t i = ub_id * 10000; i < (ub_id + 1) * 10000; ++i)
+        for (size_t i = ub_id * elements_per_bin; i < (ub_id + 1) * elements_per_bin; ++i)
             it = i;
     };
 
-    [[maybe_unused]] std::vector<seqan::hibf::sketch::minhashes> minhash_sketches;
-    std::vector<seqan::hibf::sketch::hyperloglog> hyperloglog_sketches;
-
     seqan::hibf::config config{};
-    config.number_of_user_bins = 16;
+    config.number_of_user_bins = 64;
     config.input_fn = create_hashes;
     config.sketch_bits = 12;
 
+    [[maybe_unused]] std::vector<seqan::hibf::sketch::minhashes> minhash_sketches;
+    std::vector<seqan::hibf::sketch::hyperloglog> hyperloglog_sketches(config.number_of_user_bins, config.sketch_bits);
+
     for (auto _ : state)
     {
         if constexpr (sketch_t == sketch::MinHashes)
             seqan::hibf::sketch::compute_sketches(config, hyperloglog_sketches, minhash_sketches);
         else
             seqan::hibf::sketch::compute_sketches(config, hyperloglog_sketches);
+
+        benchmark::DoNotOptimize(hyperloglog_sketches);
+        benchmark::ClobberMemory();
     }
+
+    state.counters["elements"] = elements_per_second(elements_per_bin * config.number_of_user_bins);
 }
 
 BENCHMARK_TEMPLATE(compute_sketches, sketch::Hyperloglog);