Skip to content

Commit

Permalink
Merge pull request #48 from eseiler/misc/strong_type_binsize
Browse files Browse the repository at this point in the history
[MISC] Use a strong type for bin_size_in_bits
  • Loading branch information
eseiler authored Aug 21, 2023
2 parents 6e6f5a2 + 0cb2977 commit 0cce459
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 12 deletions.
15 changes: 10 additions & 5 deletions include/hibf/detail/build/bin_size_in_bits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,17 @@
namespace hibf
{

inline size_t bin_size_in_bits(size_t const number_of_kmers_to_be_stored,
size_t const number_of_hash_functions,
double const maximum_false_positive_rate)
struct bin_size_parameters
{
double const numerator{-static_cast<double>(number_of_kmers_to_be_stored * number_of_hash_functions)};
double const denominator{std::log(1 - std::exp(std::log(maximum_false_positive_rate) / number_of_hash_functions))};
double fpr{};
size_t hash_count{};
size_t elements{};
};

inline size_t bin_size_in_bits(bin_size_parameters const & params)
{
double const numerator{-static_cast<double>(params.elements * params.hash_count)};
double const denominator{std::log(1 - std::exp(std::log(params.fpr) / params.hash_count))};
double const result{std::ceil(numerator / denominator)};
return result;
}
Expand Down
6 changes: 3 additions & 3 deletions src/detail/build/construct_ibf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_set<uint
auto & node_data = data.node_map[node];

size_t const kmers_per_bin{static_cast<size_t>(std::ceil(static_cast<double>(kmers.size()) / number_of_bins))};
double const bin_bits{static_cast<double>(bin_size_in_bits(kmers_per_bin,
data.config.number_of_hash_functions,
data.config.maximum_false_positive_rate))};
double const bin_bits{static_cast<double>(bin_size_in_bits({.fpr = data.config.maximum_false_positive_rate,
.hash_count = data.config.number_of_hash_functions,
.elements = kmers_per_bin}))};
hibf::bin_size const bin_size{static_cast<size_t>(std::ceil(bin_bits * data.fpr_correction[number_of_bins]))};
hibf::bin_count const bin_count{node_data.number_of_technical_bins};

Expand Down
9 changes: 5 additions & 4 deletions util/fpr_quality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ void init_parser(sharg::parser & parser, config & cfg)

size_t split_bin_size_in_bits(config const & cfg)
{
return hibf::bin_size_in_bits(cfg.split_elements_per_bin, cfg.hash, cfg.fpr);
return hibf::bin_size_in_bits({.fpr = cfg.fpr, .hash_count = cfg.hash, .elements = cfg.split_elements_per_bin});
}

void print_results(size_t const fp_count, config const & cfg)
Expand All @@ -107,9 +107,10 @@ void print_results(size_t const fp_count, config const & cfg)

void single_tb(config const & cfg)
{
hibf::interleaved_bloom_filter ibf{hibf::bin_count{1u},
hibf::bin_size{hibf::bin_size_in_bits(cfg.elements, cfg.hash, cfg.fpr)},
hibf::hash_function_count{cfg.hash}};
hibf::interleaved_bloom_filter ibf{
hibf::bin_count{1u},
hibf::bin_size{hibf::bin_size_in_bits({.fpr = cfg.fpr, .hash_count = cfg.hash, .elements = cfg.elements})},
hibf::hash_function_count{cfg.hash}};
auto agent = ibf.membership_agent();

// Generate elements many random kmer values.
Expand Down

1 comment on commit 0cce459

@vercel
Copy link

@vercel vercel bot commented on 0cce459 Aug 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

hibf – ./

hibf-git-main-seqan.vercel.app
hibf-seqan.vercel.app
hibf.vercel.app

Please sign in to comment.