Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MISC] Update layout- and add build-subnamespace #58

Merged
merged 2 commits into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions include/hibf/build/bin_size_in_bits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#include <hibf/platform.hpp>

namespace seqan::hibf
namespace seqan::hibf::build
{

struct bin_size_parameters
Expand All @@ -35,4 +35,4 @@ inline size_t bin_size_in_bits(bin_size_parameters const & params)
return result;
}

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions include/hibf/build/build_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include <hibf/detail/timer.hpp> // for concurrent, timer
#include <hibf/layout/graph.hpp> // for node_data

namespace seqan::hibf
namespace seqan::hibf::build
{

struct build_data
Expand All @@ -40,4 +40,4 @@ struct build_data
}
};

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions include/hibf/build/chopper_pack_record.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

#include <hibf/platform.hpp>

namespace seqan::hibf
namespace seqan::hibf::build
{

struct chopper_pack_record
Expand Down Expand Up @@ -45,4 +45,4 @@ struct chopper_pack_record
}
};

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions include/hibf/build/compute_kmers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
#include <hibf/contrib/robin_hood.hpp> // for unordered_flat_set
#include <hibf/layout/layout.hpp> // for layout

namespace seqan::hibf
namespace seqan::hibf::build
{

void compute_kmers(robin_hood::unordered_flat_set<uint64_t> & kmers,
build_data const & data,
layout::layout::user_bin const & record);

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions include/hibf/build/construct_ibf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include <hibf/interleaved_bloom_filter.hpp> // for interleaved_bloom_filter
#include <hibf/layout/graph.hpp>

namespace seqan::hibf
namespace seqan::hibf::build
{

seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_set<uint64_t> & parent_kmers,
Expand All @@ -25,4 +25,4 @@ seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_s
build_data & data,
bool is_root);

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions include/hibf/build/insert_into_ibf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <hibf/interleaved_bloom_filter.hpp> // for interleaved_bloom_filter
#include <hibf/layout/layout.hpp> // for layout

namespace seqan::hibf
namespace seqan::hibf::build
{

// automatically does naive splitting if number_of_bins > 1
Expand All @@ -30,4 +30,4 @@ void insert_into_ibf(build_data const & data,
layout::layout::user_bin const & record,
seqan::hibf::interleaved_bloom_filter & ibf);

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions include/hibf/build/update_parent_kmers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <hibf/detail/timer.hpp> // for concurrent, timer
#include <hibf/platform.hpp>

namespace seqan::hibf
namespace seqan::hibf::build
{

inline void update_parent_kmers(robin_hood::unordered_flat_set<uint64_t> & parent_kmers,
Expand All @@ -31,4 +31,4 @@ inline void update_parent_kmers(robin_hood::unordered_flat_set<uint64_t> & paren
merge_kmers_timer += local_merge_kmers_timer;
}

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions include/hibf/build/update_user_bins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@

#include <hibf/layout/layout.hpp> // for layout

namespace seqan::hibf
namespace seqan::hibf::build
{

inline void update_user_bins(std::vector<int64_t> & filename_indices, layout::layout::user_bin const & record)
{
std::fill_n(filename_indices.begin() + record.storage_TB_id, record.number_of_technical_bins, record.idx);
}

} // namespace seqan::hibf
} // namespace seqan::hibf::build
6 changes: 3 additions & 3 deletions include/hibf/layout/data_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include <hibf/layout/layout.hpp> // for layout
#include <hibf/sketch/hyperloglog.hpp> // for hyperloglog

namespace seqan::hibf
namespace seqan::hibf::layout
{

struct data_store
Expand Down Expand Up @@ -41,7 +41,7 @@ struct data_store
double false_positive_rate{};

//!\brief The layout that is built by layout::hierarchical_binning.
layout::layout * hibf_layout; // Will be modified by {simple,hierarchical}_binning.
layout * hibf_layout; // Will be modified by {simple,hierarchical}_binning.

//!\brief The kmer counts associated with the above files used to layout user bin into technical bins.
std::vector<size_t> const * kmer_counts{}; // Pointed to data should not be modified.
Expand Down Expand Up @@ -79,4 +79,4 @@ struct data_store
//!\}
};

} // namespace seqan::hibf
} // namespace seqan::hibf::layout
4 changes: 2 additions & 2 deletions include/hibf/layout/execute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
#include <hibf/config.hpp> // for config
#include <hibf/layout/data_store.hpp> // for data_store

namespace seqan::hibf
namespace seqan::hibf::layout
{

size_t execute(config const &, data_store &);

} // namespace seqan::hibf
} // namespace seqan::hibf::layout
4 changes: 2 additions & 2 deletions include/hibf/layout/print_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#include <hibf/platform.hpp>

namespace seqan::hibf
namespace seqan::hibf::layout
{

// helper function to print a matrix when debugging
Expand All @@ -28,4 +28,4 @@ void print_matrix(matrix_type const & matrix,
std::cerr << '\n';
}

} // namespace seqan::hibf
} // namespace seqan::hibf::layout
4 changes: 2 additions & 2 deletions src/build/compute_kmers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <hibf/detail/timer.hpp> // for concurrent, timer
#include <hibf/layout/layout.hpp> // for layout

namespace seqan::hibf
namespace seqan::hibf::build
{

void compute_kmers(robin_hood::unordered_flat_set<uint64_t> & kmers,
Expand All @@ -35,4 +35,4 @@ void compute_kmers(robin_hood::unordered_flat_set<uint64_t> & kmers,
data.user_bin_io_timer += local_user_bin_io_timer;
}

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions src/build/construct_ibf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <hibf/interleaved_bloom_filter.hpp> // for interleaved_bloom_filter, bin_count, bin_size, hash_fun...
#include <hibf/layout/graph.hpp>

namespace seqan::hibf
namespace seqan::hibf::build
{

seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_set<uint64_t> & parent_kmers,
Expand Down Expand Up @@ -54,4 +54,4 @@ seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_s
return ibf;
}

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions src/build/insert_into_ibf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include <hibf/interleaved_bloom_filter.hpp> // for interleaved_bloom_filter, bin_index
#include <hibf/layout/layout.hpp> // for layout

namespace seqan::hibf
namespace seqan::hibf::build
{

// automatically does naive splitting if number_of_bins > 1
Expand Down Expand Up @@ -70,4 +70,4 @@ void insert_into_ibf(build_data const & data,
data.fill_ibf_timer += local_fill_ibf_timer;
}

} // namespace seqan::hibf
} // namespace seqan::hibf::build
26 changes: 15 additions & 11 deletions src/hierarchical_interleaved_bloom_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf,
robin_hood::unordered_flat_set<uint64_t> & parent_kmers,
layout::graph::node const & current_node,
build_data & data,
build::build_data & data,
bool is_root)
{
size_t const ibf_pos{data.request_ibf_idx()};
Expand All @@ -62,8 +62,8 @@
{
// we assume that the max record is at the beginning of the list of remaining records.
auto const & record = current_node.remaining_records[0];
compute_kmers(kmers, data, record);
update_user_bins(filename_indices, record);
build::compute_kmers(kmers, data, record);
build::update_user_bins(filename_indices, record);

return record.number_of_technical_bins;
}
Expand Down Expand Up @@ -117,9 +117,9 @@
size_t const mutex_id{parent_bin_index / 64};
std::lock_guard<std::mutex> guard{local_ibf_mutex[mutex_id]};
ibf_positions[parent_bin_index] = ibf_pos;
insert_into_ibf(kmers, 1, parent_bin_index, ibf, data.fill_ibf_timer);
build::insert_into_ibf(kmers, 1, parent_bin_index, ibf, data.fill_ibf_timer);
if (!is_root)
update_parent_kmers(parent_kmers, kmers, data.merge_kmers_timer);
build::update_parent_kmers(parent_kmers, kmers, data.merge_kmers_timer);
}
}
};
Expand All @@ -134,17 +134,21 @@

if (is_root && record.number_of_technical_bins == 1) // no splitting needed
{
insert_into_ibf(data, record, ibf);
build::insert_into_ibf(data, record, ibf);

Check warning on line 137 in src/hierarchical_interleaved_bloom_filter.cpp

View check run for this annotation

Codecov / codecov/patch

src/hierarchical_interleaved_bloom_filter.cpp#L137

Added line #L137 was not covered by tests
}
else
{
compute_kmers(kmers, data, record);
insert_into_ibf(kmers, record.number_of_technical_bins, record.storage_TB_id, ibf, data.fill_ibf_timer);
build::insert_into_ibf(kmers,
record.number_of_technical_bins,
record.storage_TB_id,
ibf,
data.fill_ibf_timer);
if (!is_root)
update_parent_kmers(parent_kmers, kmers, data.merge_kmers_timer);
build::update_parent_kmers(parent_kmers, kmers, data.merge_kmers_timer);

Check warning on line 148 in src/hierarchical_interleaved_bloom_filter.cpp

View check run for this annotation

Codecov / codecov/patch

src/hierarchical_interleaved_bloom_filter.cpp#L148

Added line #L148 was not covered by tests
}

update_user_bins(filename_indices, record);
build::update_user_bins(filename_indices, record);
kmers.clear();
}

Expand All @@ -157,7 +161,7 @@

size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf,
layout::graph::node const & root_node,
build_data & data)
build::build_data & data)
{
robin_hood::unordered_flat_set<uint64_t> root_kmers{};
return hierarchical_build(hibf, root_kmers, root_node, data, true);
Expand All @@ -174,7 +178,7 @@
hibf.user_bins.set_user_bin_count(hibf_layout.user_bins.size());
hibf.next_ibf_id.resize(number_of_ibfs);

build_data data{.config = config, .ibf_graph = {hibf_layout}};
build::build_data data{.config = config, .ibf_graph = {hibf_layout}};

layout::graph::node const & root_node = data.ibf_graph.root;

Expand Down
2 changes: 1 addition & 1 deletion src/layout/compute_layout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ compute_layout(config const & config, std::vector<size_t> & kmer_counts, std::ve
.kmer_counts = std::addressof(kmer_counts),
.sketches = std::addressof(sketches)};

size_t const max_hibf_id = seqan::hibf::execute(config, store);
size_t const max_hibf_id = seqan::hibf::layout::execute(config, store);
store.hibf_layout->top_level_max_bin_id = max_hibf_id;

// sort records ascending by the number of bin indices (corresponds to the IBF levels)
Expand Down
12 changes: 6 additions & 6 deletions src/layout/execute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
#include <hibf/layout/hierarchical_binning.hpp> // for hierarchical_binning
#include <hibf/next_multiple_of_64.hpp> // for next_multiple_of_64

namespace seqan::hibf
namespace seqan::hibf::layout
{

size_t execute(seqan::hibf::config const & config, seqan::hibf::data_store & data)
size_t execute(seqan::hibf::config const & config, seqan::hibf::layout::data_store & data)
{
seqan::hibf::config config_copy{config};

Expand All @@ -41,11 +41,11 @@ size_t execute(seqan::hibf::config const & config, seqan::hibf::data_store & dat
}

data.fpr_correction =
layout::compute_fpr_correction({.fpr = config_copy.maximum_false_positive_rate, // prevent clang-format
.hash_count = config_copy.number_of_hash_functions,
.t_max = config_copy.tmax});
compute_fpr_correction({.fpr = config_copy.maximum_false_positive_rate, // prevent clang-format
.hash_count = config_copy.number_of_hash_functions,
.t_max = config_copy.tmax});

return seqan::hibf::layout::hierarchical_binning{data, config_copy}.execute();
}

} // namespace seqan::hibf
} // namespace seqan::hibf::layout
14 changes: 7 additions & 7 deletions test/unit/hibf/layout/hierarchical_binning_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ TEST(hierarchical_binning_test, small_example)
seqan::hibf::layout::layout hibf_layout{};
std::vector<size_t> kmer_counts{500, 1000, 500, 500, 500, 500, 500, 500};

seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};
seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};

data.fpr_correction =
seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax});
Expand Down Expand Up @@ -49,7 +49,7 @@ TEST(hierarchical_binning_test, another_example)

seqan::hibf::layout::layout hibf_layout{};
std::vector<size_t> kmer_counts{50, 1000, 1000, 50, 5, 10, 10, 5};
seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};
seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};

data.fpr_correction =
seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax});
Expand Down Expand Up @@ -80,7 +80,7 @@ TEST(hierarchical_binning_test, high_level_max_bin_id_is_0)

seqan::hibf::layout::layout hibf_layout{};
std::vector<size_t> kmer_counts{500, 500, 500, 500};
seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};
seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};

data.fpr_correction =
seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax});
Expand All @@ -105,7 +105,7 @@ TEST(hierarchical_binning_test, knuts_example)

seqan::hibf::layout::layout hibf_layout{};
std::vector<size_t> kmer_counts{60, 600, 1000, 800, 800};
seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};
seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};

data.fpr_correction =
seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax});
Expand Down Expand Up @@ -133,7 +133,7 @@ TEST(hierarchical_binning_test, four_level_hibf)

seqan::hibf::layout::layout hibf_layout{};
std::vector<size_t> kmer_counts{11090, 5080, 3040, 1020, 510, 500};
seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};
seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};

data.fpr_correction =
seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax});
Expand Down Expand Up @@ -166,7 +166,7 @@ TEST(hierarchical_binning_test, tb0_is_a_merged_bin)

seqan::hibf::layout::layout hibf_layout{};
std::vector<size_t> kmer_counts{500, 500, 500, 500};
seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};
seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};

data.fpr_correction =
seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax});
Expand Down Expand Up @@ -194,7 +194,7 @@ TEST(hierarchical_binning_test, tb0_is_a_merged_bin_and_leads_to_recursive_call)

seqan::hibf::layout::layout hibf_layout{};
std::vector<size_t> kmer_counts{500, 500, 500, 500, 500, 500, 500, 500};
seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};
seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};

data.fpr_correction =
seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax});
Expand Down
Loading