Skip to content

Commit

Permalink
Merge pull request #58 from smehringer/subnamespaces
Browse files Browse the repository at this point in the history
[MISC] Update layout- and add build-subnamespace
  • Loading branch information
eseiler authored Sep 1, 2023
2 parents debe6f5 + 72cdc3e commit 5071729
Show file tree
Hide file tree
Showing 20 changed files with 69 additions and 65 deletions.
4 changes: 2 additions & 2 deletions include/hibf/build/bin_size_in_bits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#include <hibf/platform.hpp>

namespace seqan::hibf
namespace seqan::hibf::build
{

struct bin_size_parameters
Expand All @@ -35,4 +35,4 @@ inline size_t bin_size_in_bits(bin_size_parameters const & params)
return result;
}

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions include/hibf/build/build_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include <hibf/detail/timer.hpp> // for concurrent, timer
#include <hibf/layout/graph.hpp> // for node_data

namespace seqan::hibf
namespace seqan::hibf::build
{

struct build_data
Expand All @@ -40,4 +40,4 @@ struct build_data
}
};

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions include/hibf/build/chopper_pack_record.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

#include <hibf/platform.hpp>

namespace seqan::hibf
namespace seqan::hibf::build
{

struct chopper_pack_record
Expand Down Expand Up @@ -45,4 +45,4 @@ struct chopper_pack_record
}
};

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions include/hibf/build/compute_kmers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
#include <hibf/contrib/robin_hood.hpp> // for unordered_flat_set
#include <hibf/layout/layout.hpp> // for layout

namespace seqan::hibf
namespace seqan::hibf::build
{

void compute_kmers(robin_hood::unordered_flat_set<uint64_t> & kmers,
build_data const & data,
layout::layout::user_bin const & record);

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions include/hibf/build/construct_ibf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include <hibf/interleaved_bloom_filter.hpp> // for interleaved_bloom_filter
#include <hibf/layout/graph.hpp>

namespace seqan::hibf
namespace seqan::hibf::build
{

seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_set<uint64_t> & parent_kmers,
Expand All @@ -25,4 +25,4 @@ seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_s
build_data & data,
bool is_root);

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions include/hibf/build/insert_into_ibf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <hibf/interleaved_bloom_filter.hpp> // for interleaved_bloom_filter
#include <hibf/layout/layout.hpp> // for layout

namespace seqan::hibf
namespace seqan::hibf::build
{

// automatically does naive splitting if number_of_bins > 1
Expand All @@ -30,4 +30,4 @@ void insert_into_ibf(build_data const & data,
layout::layout::user_bin const & record,
seqan::hibf::interleaved_bloom_filter & ibf);

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions include/hibf/build/update_parent_kmers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <hibf/detail/timer.hpp> // for concurrent, timer
#include <hibf/platform.hpp>

namespace seqan::hibf
namespace seqan::hibf::build
{

inline void update_parent_kmers(robin_hood::unordered_flat_set<uint64_t> & parent_kmers,
Expand All @@ -31,4 +31,4 @@ inline void update_parent_kmers(robin_hood::unordered_flat_set<uint64_t> & paren
merge_kmers_timer += local_merge_kmers_timer;
}

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions include/hibf/build/update_user_bins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@

#include <hibf/layout/layout.hpp> // for layout

namespace seqan::hibf
namespace seqan::hibf::build
{

inline void update_user_bins(std::vector<int64_t> & filename_indices, layout::layout::user_bin const & record)
{
std::fill_n(filename_indices.begin() + record.storage_TB_id, record.number_of_technical_bins, record.idx);
}

} // namespace seqan::hibf
} // namespace seqan::hibf::build
6 changes: 3 additions & 3 deletions include/hibf/layout/data_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include <hibf/layout/layout.hpp> // for layout
#include <hibf/sketch/hyperloglog.hpp> // for hyperloglog

namespace seqan::hibf
namespace seqan::hibf::layout
{

struct data_store
Expand Down Expand Up @@ -41,7 +41,7 @@ struct data_store
double false_positive_rate{};

//!\brief The layout that is built by layout::hierarchical_binning.
layout::layout * hibf_layout; // Will be modified by {simple,hierarchical}_binning.
layout * hibf_layout; // Will be modified by {simple,hierarchical}_binning.

//!\brief The kmer counts associated with the above files used to layout user bin into technical bins.
std::vector<size_t> const * kmer_counts{}; // Pointed to data should not be modified.
Expand Down Expand Up @@ -79,4 +79,4 @@ struct data_store
//!\}
};

} // namespace seqan::hibf
} // namespace seqan::hibf::layout
4 changes: 2 additions & 2 deletions include/hibf/layout/execute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
#include <hibf/config.hpp> // for config
#include <hibf/layout/data_store.hpp> // for data_store

namespace seqan::hibf
namespace seqan::hibf::layout
{

size_t execute(config const &, data_store &);

} // namespace seqan::hibf
} // namespace seqan::hibf::layout
4 changes: 2 additions & 2 deletions include/hibf/layout/print_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#include <hibf/platform.hpp>

namespace seqan::hibf
namespace seqan::hibf::layout
{

// helper function to print a matrix when debugging
Expand All @@ -28,4 +28,4 @@ void print_matrix(matrix_type const & matrix,
std::cerr << '\n';
}

} // namespace seqan::hibf
} // namespace seqan::hibf::layout
4 changes: 2 additions & 2 deletions src/build/compute_kmers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <hibf/detail/timer.hpp> // for concurrent, timer
#include <hibf/layout/layout.hpp> // for layout

namespace seqan::hibf
namespace seqan::hibf::build
{

void compute_kmers(robin_hood::unordered_flat_set<uint64_t> & kmers,
Expand All @@ -35,4 +35,4 @@ void compute_kmers(robin_hood::unordered_flat_set<uint64_t> & kmers,
data.user_bin_io_timer += local_user_bin_io_timer;
}

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions src/build/construct_ibf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <hibf/interleaved_bloom_filter.hpp> // for interleaved_bloom_filter, bin_count, bin_size, hash_fun...
#include <hibf/layout/graph.hpp>

namespace seqan::hibf
namespace seqan::hibf::build
{

seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_set<uint64_t> & parent_kmers,
Expand Down Expand Up @@ -54,4 +54,4 @@ seqan::hibf::interleaved_bloom_filter construct_ibf(robin_hood::unordered_flat_s
return ibf;
}

} // namespace seqan::hibf
} // namespace seqan::hibf::build
4 changes: 2 additions & 2 deletions src/build/insert_into_ibf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include <hibf/interleaved_bloom_filter.hpp> // for interleaved_bloom_filter, bin_index
#include <hibf/layout/layout.hpp> // for layout

namespace seqan::hibf
namespace seqan::hibf::build
{

// automatically does naive splitting if number_of_bins > 1
Expand Down Expand Up @@ -70,4 +70,4 @@ void insert_into_ibf(build_data const & data,
data.fill_ibf_timer += local_fill_ibf_timer;
}

} // namespace seqan::hibf
} // namespace seqan::hibf::build
26 changes: 15 additions & 11 deletions src/hierarchical_interleaved_bloom_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace seqan::hibf
size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf,
robin_hood::unordered_flat_set<uint64_t> & parent_kmers,
layout::graph::node const & current_node,
build_data & data,
build::build_data & data,
bool is_root)
{
size_t const ibf_pos{data.request_ibf_idx()};
Expand All @@ -62,8 +62,8 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf,
{
// we assume that the max record is at the beginning of the list of remaining records.
auto const & record = current_node.remaining_records[0];
compute_kmers(kmers, data, record);
update_user_bins(filename_indices, record);
build::compute_kmers(kmers, data, record);
build::update_user_bins(filename_indices, record);

return record.number_of_technical_bins;
}
Expand Down Expand Up @@ -117,9 +117,9 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf,
size_t const mutex_id{parent_bin_index / 64};
std::lock_guard<std::mutex> guard{local_ibf_mutex[mutex_id]};
ibf_positions[parent_bin_index] = ibf_pos;
insert_into_ibf(kmers, 1, parent_bin_index, ibf, data.fill_ibf_timer);
build::insert_into_ibf(kmers, 1, parent_bin_index, ibf, data.fill_ibf_timer);
if (!is_root)
update_parent_kmers(parent_kmers, kmers, data.merge_kmers_timer);
build::update_parent_kmers(parent_kmers, kmers, data.merge_kmers_timer);
}
}
};
Expand All @@ -134,17 +134,21 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf,

if (is_root && record.number_of_technical_bins == 1) // no splitting needed
{
insert_into_ibf(data, record, ibf);
build::insert_into_ibf(data, record, ibf);
}
else
{
compute_kmers(kmers, data, record);
insert_into_ibf(kmers, record.number_of_technical_bins, record.storage_TB_id, ibf, data.fill_ibf_timer);
build::insert_into_ibf(kmers,
record.number_of_technical_bins,
record.storage_TB_id,
ibf,
data.fill_ibf_timer);
if (!is_root)
update_parent_kmers(parent_kmers, kmers, data.merge_kmers_timer);
build::update_parent_kmers(parent_kmers, kmers, data.merge_kmers_timer);
}

update_user_bins(filename_indices, record);
build::update_user_bins(filename_indices, record);
kmers.clear();
}

Expand All @@ -157,7 +161,7 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf,

size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf,
layout::graph::node const & root_node,
build_data & data)
build::build_data & data)
{
robin_hood::unordered_flat_set<uint64_t> root_kmers{};
return hierarchical_build(hibf, root_kmers, root_node, data, true);
Expand All @@ -174,7 +178,7 @@ void build_index(hierarchical_interleaved_bloom_filter & hibf,
hibf.user_bins.set_user_bin_count(hibf_layout.user_bins.size());
hibf.next_ibf_id.resize(number_of_ibfs);

build_data data{.config = config, .ibf_graph = {hibf_layout}};
build::build_data data{.config = config, .ibf_graph = {hibf_layout}};

layout::graph::node const & root_node = data.ibf_graph.root;

Expand Down
2 changes: 1 addition & 1 deletion src/layout/compute_layout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ compute_layout(config const & config, std::vector<size_t> & kmer_counts, std::ve
.kmer_counts = std::addressof(kmer_counts),
.sketches = std::addressof(sketches)};

size_t const max_hibf_id = seqan::hibf::execute(config, store);
size_t const max_hibf_id = seqan::hibf::layout::execute(config, store);
store.hibf_layout->top_level_max_bin_id = max_hibf_id;

// sort records ascending by the number of bin indices (corresponds to the IBF levels)
Expand Down
12 changes: 6 additions & 6 deletions src/layout/execute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
#include <hibf/layout/hierarchical_binning.hpp> // for hierarchical_binning
#include <hibf/next_multiple_of_64.hpp> // for next_multiple_of_64

namespace seqan::hibf
namespace seqan::hibf::layout
{

size_t execute(seqan::hibf::config const & config, seqan::hibf::data_store & data)
size_t execute(seqan::hibf::config const & config, seqan::hibf::layout::data_store & data)
{
seqan::hibf::config config_copy{config};

Expand All @@ -41,11 +41,11 @@ size_t execute(seqan::hibf::config const & config, seqan::hibf::data_store & dat
}

data.fpr_correction =
layout::compute_fpr_correction({.fpr = config_copy.maximum_false_positive_rate, // prevent clang-format
.hash_count = config_copy.number_of_hash_functions,
.t_max = config_copy.tmax});
compute_fpr_correction({.fpr = config_copy.maximum_false_positive_rate, // prevent clang-format
.hash_count = config_copy.number_of_hash_functions,
.t_max = config_copy.tmax});

return seqan::hibf::layout::hierarchical_binning{data, config_copy}.execute();
}

} // namespace seqan::hibf
} // namespace seqan::hibf::layout
14 changes: 7 additions & 7 deletions test/unit/hibf/layout/hierarchical_binning_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ TEST(hierarchical_binning_test, small_example)
seqan::hibf::layout::layout hibf_layout{};
std::vector<size_t> kmer_counts{500, 1000, 500, 500, 500, 500, 500, 500};

seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};
seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};

data.fpr_correction =
seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax});
Expand Down Expand Up @@ -49,7 +49,7 @@ TEST(hierarchical_binning_test, another_example)

seqan::hibf::layout::layout hibf_layout{};
std::vector<size_t> kmer_counts{50, 1000, 1000, 50, 5, 10, 10, 5};
seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};
seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};

data.fpr_correction =
seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax});
Expand Down Expand Up @@ -80,7 +80,7 @@ TEST(hierarchical_binning_test, high_level_max_bin_id_is_0)

seqan::hibf::layout::layout hibf_layout{};
std::vector<size_t> kmer_counts{500, 500, 500, 500};
seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};
seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};

data.fpr_correction =
seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax});
Expand All @@ -105,7 +105,7 @@ TEST(hierarchical_binning_test, knuts_example)

seqan::hibf::layout::layout hibf_layout{};
std::vector<size_t> kmer_counts{60, 600, 1000, 800, 800};
seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};
seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};

data.fpr_correction =
seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax});
Expand Down Expand Up @@ -133,7 +133,7 @@ TEST(hierarchical_binning_test, four_level_hibf)

seqan::hibf::layout::layout hibf_layout{};
std::vector<size_t> kmer_counts{11090, 5080, 3040, 1020, 510, 500};
seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};
seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};

data.fpr_correction =
seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax});
Expand Down Expand Up @@ -166,7 +166,7 @@ TEST(hierarchical_binning_test, tb0_is_a_merged_bin)

seqan::hibf::layout::layout hibf_layout{};
std::vector<size_t> kmer_counts{500, 500, 500, 500};
seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};
seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};

data.fpr_correction =
seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax});
Expand Down Expand Up @@ -194,7 +194,7 @@ TEST(hierarchical_binning_test, tb0_is_a_merged_bin_and_leads_to_recursive_call)

seqan::hibf::layout::layout hibf_layout{};
std::vector<size_t> kmer_counts{500, 500, 500, 500, 500, 500, 500, 500};
seqan::hibf::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};
seqan::hibf::layout::data_store data{.hibf_layout = &hibf_layout, .kmer_counts = &kmer_counts};

data.fpr_correction =
seqan::hibf::layout::compute_fpr_correction({.fpr = 0.05, .hash_count = 2, .t_max = config.tmax});
Expand Down
Loading

1 comment on commit 5071729

@vercel
Copy link

@vercel vercel bot commented on 5071729 Sep 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

hibf – ./

hibf-git-main-seqan.vercel.app
hibf-seqan.vercel.app
hibf.vercel.app

Please sign in to comment.