Skip to content

Commit

Permalink
[MISC] Rename hibf::membership_agent::bulk_contains -> hibf::membersh…
Browse files Browse the repository at this point in the history
…ip_agent::membership_for
  • Loading branch information
smehringer committed Aug 29, 2023
1 parent 698d34e commit 1f1aef0
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 13 deletions.
40 changes: 31 additions & 9 deletions include/hibf/hierarchical_interleaved_bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ class hierarchical_interleaved_bloom_filter::membership_agent_type

//!\brief Helper for recursive membership querying.
template <std::ranges::forward_range value_range_t>
void bulk_contains_impl(value_range_t && values, int64_t const ibf_idx, size_t const threshold)
void membership_for_impl(value_range_t && values, int64_t const ibf_idx, size_t const threshold)
{
auto agent = hibf_ptr->ibf_vector[ibf_idx].template counting_agent<uint16_t>();
auto & result = agent.bulk_count(values);
Expand All @@ -177,7 +177,7 @@ class hierarchical_interleaved_bloom_filter::membership_agent_type
if (current_filename_index < 0) // merged bin
{
if (sum >= threshold)
bulk_contains_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
membership_for_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
sum = 0u;
}
else if (bin + 1u == result.size() || // last bin
Expand Down Expand Up @@ -209,28 +209,50 @@ class hierarchical_interleaved_bloom_filter::membership_agent_type
{}
//!\}

//!\brief Stores the result of bulk_contains().
//!\brief Stores the result of membership_for().
std::vector<int64_t> result_buffer;

/*!\name Lookup
* \{
*/
/*!\brief Determines set membership of given values, and returns the user bin indices of occurrences.
/*!\brief Determines set membership for all user bins contained in this index, based on `values` and the `threshold`.
* \param[in] values The values to process; must model std::ranges::forward_range.
* \param[in] threshold Report a user bin if there are at least this many hits.
* \returns A vector of user bin ids (index values) with successfull set membership query.
*
* \attention The result of this function must always be bound via reference, e.g. `auto &`, to prevent copying.
* \attention Sequential calls to this function invalidate the previously returned reference.
*
* \details
*
* Each value in `values` is queried against the index and all hits are accumulated. If the accumulated sum of hits
* reaches the threshold for a user bin, that user bin (its index value) is returned.
*
* ### Example
*
* Lets assume that the hibf index is build on 3 user bins (UB_A, UB_B, and UB_C) and the user bins contain the
* following hash values:
*
* * 0: UB_A = {4,5,6,11}
* * 1: UB_B = {4,5,11,12}
* * 2: UB_C = {4,5,6,7,9,10}
*
* Then the following query:
* ```cpp
* auto agent = hibf.membership_agent();
* auto result = agent.membership_for(std::vector<size_t>{4,5,6,7}, 3); // result = {0,2}
* ```
* would return a vector that contains the index values 0 and 2, indicating that UB_A (hits 4,5,6) and
* UB_C (hits 4,5,6,7) reached the threshold of `>= 3` hits. UB_B only counts 2 hits (hits 4,5) and is thus not
* contained in the list of user bins with a successful query.
*
* ### Thread safety
*
* Concurrent invocations of this function are not thread safe, please create a
* seqan::hibf::hierarchical_interleaved_bloom_filter::membership_agent for each thread.
*/
template <std::ranges::forward_range value_range_t>
[[nodiscard]] std::vector<int64_t> const & bulk_contains(value_range_t && values, size_t const threshold) & noexcept
[[nodiscard]] std::vector<int64_t> const & membership_for(value_range_t && values, size_t const threshold) & noexcept
{
assert(hibf_ptr != nullptr);

Expand All @@ -240,18 +262,18 @@ class hierarchical_interleaved_bloom_filter::membership_agent_type

result_buffer.clear();

bulk_contains_impl(values, 0, threshold);
membership_for_impl(values, 0, threshold);

std::ranges::sort(result_buffer); // TODO: necessary?

return result_buffer;
}

// `bulk_contains` cannot be called on a temporary, since the object the returned reference points to
// `membership_for` cannot be called on a temporary, since the object the returned reference points to
// is immediately destroyed.
template <std::ranges::range value_range_t>
[[nodiscard]] std::vector<int64_t> const & bulk_contains(value_range_t && values,
size_t const threshold) && noexcept = delete;
[[nodiscard]] std::vector<int64_t> const & membership_for(value_range_t && values,
size_t const threshold) && noexcept = delete;
//!\}
};

Expand Down
8 changes: 4 additions & 4 deletions test/unit/hibf/hierarchical_interleaved_bloom_filter_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ TEST(hibf_test, test_specific_hash_values)
std::vector<size_t> query{1, 2, 3, 4, 5};

auto agent = hibf.membership_agent();
auto result = agent.bulk_contains(query, 2);
auto result = agent.membership_for(query, 2);

EXPECT_RANGE_EQ(result, (std::vector<size_t>{0u, 1u}));
}
Expand Down Expand Up @@ -82,7 +82,7 @@ TEST(hibf_test, build_from_layout)
std::vector<size_t> query{1, 2, 3, 4, 5};

auto agent = hibf.membership_agent();
auto result = agent.bulk_contains(query, 2);
auto result = agent.membership_for(query, 2);

EXPECT_RANGE_EQ(result, (std::vector<size_t>{0u, 1u}));
}
Expand Down Expand Up @@ -119,7 +119,7 @@ TEST(hibf_test, build_from_layout)
// std::vector<seqan::hibf::dna4> query{"AAGG"_dna4};
// auto query_kmers = query | kmer_transformation;

// auto result = agent.bulk_contains(query_kmers, 1);
// auto result = agent.membership_for(query_kmers, 1);

// seqan::hibf::debug_stream << result << std::endl;
// }
Expand Down Expand Up @@ -162,7 +162,7 @@ TEST(hibf_test, build_from_layout)

// std::vector<seqan::hibf::dna4> query{"AAGG"_dna4};

// auto result = agent.bulk_contains(query | transform, 1);
// auto result = agent.membership_for(query | transform, 1);

// seqan::hibf::debug_stream << result << std::endl; // prints [0] since query is found in user bin 0
// }
Expand Down

0 comments on commit 1f1aef0

Please sign in to comment.