From 5ba0ee5105805fd2c92a4c3b02fdc2c4fa987164 Mon Sep 17 00:00:00 2001 From: Ivan Raikov Date: Mon, 3 Jul 2023 12:20:25 -0700 Subject: [PATCH 1/4] WIP: added aggregate append functions to avoid re-opening output file multiple times --- include/cell/cell_attributes.hh | 187 +++++++++++++++++++++++--------- include/hdf5/file_access.hh | 3 +- python/neuroh5/iomodule.cc | 79 ++------------ src/cell/cell_attributes.cc | 165 ++++++++++++++++++++++++++-- src/hdf5/file_access.cc | 22 +++- 5 files changed, 329 insertions(+), 127 deletions(-) diff --git a/include/cell/cell_attributes.hh b/include/cell/cell_attributes.hh index 3cee27cf..d0e23f49 100644 --- a/include/cell/cell_attributes.hh +++ b/include/cell/cell_attributes.hh @@ -153,7 +153,103 @@ namespace neuroh5 size_t numitems = 0 ); + + void append_cell_attribute_maps ( + MPI_Comm comm, + const std::string& file_name, + const std::string& attr_namespace, + const std::string& pop_name, + const CELL_IDX_T& pop_start, + const map >>& attr_values_uint32, + const map >> attr_values_int32, + const map >>& attr_values_uint16, + const map >>& attr_values_int16, + const map >>& attr_values_uint8, + const map >>& attr_values_int8, + const map >>& attr_values_float, + const size_t io_size, + const data::optional_hid data_type, + const CellIndex index_type = IndexOwner, + const CellPtr ptr_type = CellPtr(PtrOwner), + const size_t chunk_size = 4000, + const size_t value_chunk_size = 4000, + const size_t cache_size = 1*1024*1024 + ); + + template + void append_cell_attribute + ( + MPI_Comm comm, + const hid_t& loc, + const std::string& attr_namespace, + const std::string& pop_name, + const CELL_IDX_T& pop_start, + const std::string& attr_name, + const std::vector& index, + const std::vector attr_ptr, + const std::vector& values, + const data::optional_hid data_type, + const CellIndex index_type, + const CellPtr ptr_type, + const size_t chunk_size = 4000, + const size_t value_chunk_size = 4000, + const size_t cache_size = 1*1024*1024 + ) + { + int status; + throw_assert(index.size() == attr_ptr.size()-1, + "append_cell_attribute: mismatch between sizes of cell index and attribute pointer"); + + int size, rank; + throw_assert(MPI_Comm_size(comm, &size) == MPI_SUCCESS, + "append_cell_attribute: unable to obtain MPI communicator size"); + throw_assert(MPI_Comm_rank(comm, &rank) == MPI_SUCCESS, + "append_cell_attribute: unable to obtain MPI communicator rank"); + + string attr_prefix = hdf5::cell_attribute_prefix(attr_namespace, pop_name); + string attr_path = hdf5::cell_attribute_path(attr_namespace, pop_name, attr_name); + + T dummy; + hid_t ftype; + if (data_type.has_value()) + ftype = data_type.value(); + else + ftype = infer_datatype(dummy); + + hid_t file = H5Iget_file_id(loc); + throw_assert(file >= 0, + "append_cell_attribute: invalid file handle"); + + if (!(hdf5::exists_dataset (file, attr_path) > 0)) + { + create_cell_attribute_datasets(file, attr_namespace, pop_name, attr_name, + ftype, index_type, ptr_type, + chunk_size, value_chunk_size + ); + } + + vector rindex; + + for (const CELL_IDX_T& gid: index) + { + throw_assert(gid >= pop_start, + "append_cell_attribute: invalid gid"); + rindex.push_back(gid - pop_start); + } + + hdf5::append_cell_attribute(comm, file, attr_path, rindex, attr_ptr, values, + data_type, index_type, ptr_type); + + throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, + "append_cell_attribute: error in MPI_Barrier"); + + status = H5Fclose(file); + throw_assert(status == 0, "append_cell_attribute: unable to close HDF5 file"); + + } + + template void append_cell_attribute ( @@ -241,20 +337,11 @@ namespace neuroh5 throw_assert(file >= 0, "append_cell_attribute: HDF5 file open error"); - vector rindex; - - for (const CELL_IDX_T& gid: index) - { - throw_assert(gid >= pop_start, - "append_cell_attribute: invalid gid"); - rindex.push_back(gid - pop_start); - } - - hdf5::append_cell_attribute(comm, file, attr_path, rindex, attr_ptr, values, - data_type, index_type, ptr_type); - - throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, - "append_cell_attribute: error in MPI_Barrier"); + append_cell_attribute (comm, file, attr_namespace, pop_name, pop_start, + attr_name, index, attr_ptr, values, + data_type, index_type, ptr_type, + chunk_size, value_chunk_size, cache_size); + status = H5Fclose(file); throw_assert(status == 0, "append_cell_attribute: unable to close HDF5 file"); status = H5Pclose(fapl); @@ -270,14 +357,14 @@ namespace neuroh5 void append_cell_attribute_map ( MPI_Comm comm, - const std::string& file_name, + const hid_t& loc, const std::string& attr_namespace, const std::string& pop_name, const CELL_IDX_T& pop_start, const std::string& attr_name, const std::map>& value_map, - const size_t io_size, const data::optional_hid data_type, + const set& io_rank_set, const CellIndex index_type = IndexOwner, const CellPtr ptr_type = CellPtr(PtrOwner), const size_t chunk_size = 4000, @@ -285,8 +372,9 @@ namespace neuroh5 const size_t cache_size = 1*1024*1024 ) { - - int ssize, srank; size_t size, rank; size_t io_size_value=0; + + herr_t status; + int ssize, srank; size_t size, rank; int io_size_value=0; size_t io_size=0; throw_assert(MPI_Comm_size(comm, &ssize) == MPI_SUCCESS, "error in MPI_Comm_size"); throw_assert(MPI_Comm_rank(comm, &srank) == MPI_SUCCESS, "error in MPI_Comm_rank"); throw_assert(ssize > 0, "invalid MPI comm size"); @@ -294,24 +382,34 @@ namespace neuroh5 rank = srank; size = ssize; - if (size < io_size) - { - io_size_value = size; - } - else - { - io_size_value = io_size; - } + hid_t file = H5Iget_file_id(loc); + throw_assert(file >= 0, + "append_cell_attribute_map: invalid file handle"); + + hid_t fapl; + throw_assert((fapl = H5Fget_access_plist(file)) >= 0, + "append_cell_attribute_map: error in H5Fget_access_plist"); + + MPI_Comm io_comm; + MPI_Info io_comm_info; + + throw_assert(H5Pget_fapl_mpio(fapl, &io_comm, &io_comm_info) >= 0, + "append_cell_attribute_map: error in H5Pget_fapl_mpio"); + + throw_assert(MPI_Comm_size(io_comm, &io_size_value) == MPI_SUCCESS, "error in MPI_Comm_size"); + throw_assert(io_size_value >= 0, "invalid io_size"); + io_size = io_size_value; + throw_assert(io_size <= size, "invalid io_size"); + + throw_assert(H5Pclose(fapl) == 0, + "append_cell_attribute_map: error in H5Pclose"); - set io_rank_set; - data::range_sample(size, io_size_value, io_rank_set); bool is_io_rank = false; if (io_rank_set.find(rank) != io_rank_set.end()) is_io_rank = true; - vector< pair > ranges; - mpi::rank_ranges(size, io_size_value, ranges); + mpi::rank_ranges(size, io_size, ranges); // Determine I/O ranks to which to send the values vector io_dests(size); @@ -422,39 +520,28 @@ namespace neuroh5 throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, "append_cell_attribute_map: error in MPI_Barrier"); - // MPI Communicator for I/O ranks - MPI_Comm io_comm; - // MPI group color value used for I/O ranks - int io_color = 1, color; - - // Am I an I/O rank? if (is_io_rank) { - color = io_color; - } - else - { - color = 0; - } - throw_assert(MPI_Comm_split(comm,color,rank,&io_comm) == MPI_SUCCESS, - "append_cell_attribute_map: error in MPI_Comm_split"); - MPI_Comm_set_errhandler(io_comm, MPI_ERRORS_RETURN); - - if (is_io_rank) - { - append_cell_attribute(io_comm, file_name, + append_cell_attribute(io_comm, file, attr_namespace, pop_name, pop_start, attr_name, gid_recvbuf, attr_ptr, value_recvbuf, data_type, index_type, ptr_type, chunk_size, value_chunk_size, cache_size); } - + + throw_assert(MPI_Barrier(io_comm) == MPI_SUCCESS, "append_cell_attribute_map: error in MPI_Barrier"); throw_assert(MPI_Comm_free(&io_comm) == MPI_SUCCESS, "append_cell_attribute_map: error in MPI_Comm_free"); + throw_assert(MPI_Info_free(&io_comm_info) == MPI_SUCCESS, + "append_cell_attribute_map: error in MPI_Info_free"); throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, "append_cell_attribute_map: error in MPI_Barrier"); + + status = H5Fclose(file); + throw_assert(status == 0, "append_cell_attribute_map: unable to close HDF5 file"); + } diff --git a/include/hdf5/file_access.hh b/include/hdf5/file_access.hh index 3ae66665..a3a5fd93 100644 --- a/include/hdf5/file_access.hh +++ b/include/hdf5/file_access.hh @@ -18,7 +18,8 @@ namespace neuroh5 MPI_Comm comm, const std::string& file_name, const bool collective = false, - const bool rdwr = false + const bool rdwr = false, + const size_t cache_size = 1*1024*1024 ); herr_t close_file diff --git a/python/neuroh5/iomodule.cc b/python/neuroh5/iomodule.cc index 9b01b8d2..5d5acd61 100644 --- a/python/neuroh5/iomodule.cc +++ b/python/neuroh5/iomodule.cc @@ -6912,74 +6912,17 @@ extern "C" all_attr_values_int8, all_attr_values_float); - if (access( file_name.c_str(), F_OK ) != 0) - { - vector groups; - groups.push_back (hdf5::POPULATIONS); - status = hdf5::create_file_toplevel (data_comm, file_name, groups); - } - else - { - status = 0; - } - throw_assert(status == 0, - "py_append_cell_attributes: unable to create toplevel groups in file"); - - MPI_Barrier(data_comm); - - const data::optional_hid dflt_data_type; - - for(auto it = all_attr_values_float.cbegin(); it != all_attr_values_float.cend(); ++it) - { - const string& attr_name = it->first; - cell::append_cell_attribute_map (data_comm, file_name, attr_namespace, pop_name, pop_start, - attr_name, it->second, io_size, dflt_data_type, - chunk_size, value_chunk_size, cache_size); - } - for(auto it = all_attr_values_uint32.cbegin(); it != all_attr_values_uint32.cend(); ++it) - { - const string& attr_name = it->first; - cell::append_cell_attribute_map (data_comm, file_name, attr_namespace, pop_name, pop_start, - attr_name, it->second, io_size, dflt_data_type, - chunk_size, value_chunk_size, cache_size); - } - for(auto it = all_attr_values_uint16.cbegin(); it != all_attr_values_uint16.cend(); ++it) - { - const string& attr_name = it->first; - cell::append_cell_attribute_map (data_comm, file_name, attr_namespace, pop_name, pop_start, - attr_name, it->second, io_size, dflt_data_type, - chunk_size, value_chunk_size, cache_size); - } - for(auto it = all_attr_values_uint8.cbegin(); it != all_attr_values_uint8.cend(); ++it) - { - const string& attr_name = it->first; - cell::append_cell_attribute_map (data_comm, file_name, attr_namespace, pop_name, pop_start, - attr_name, it->second, io_size, dflt_data_type, - chunk_size, value_chunk_size, cache_size); - } - for(auto it = all_attr_values_int32.cbegin(); it != all_attr_values_int32.cend(); ++it) - { - const string& attr_name = it->first; - cell::append_cell_attribute_map (data_comm, file_name, attr_namespace, pop_name, pop_start, - attr_name, it->second, io_size, dflt_data_type, - chunk_size, value_chunk_size, cache_size); - } - for(auto it = all_attr_values_int16.cbegin(); it != all_attr_values_int16.cend(); ++it) - { - const string& attr_name = it->first; - cell::append_cell_attribute_map (data_comm, file_name, attr_namespace, pop_name, pop_start, - attr_name, it->second, io_size, dflt_data_type, - chunk_size, value_chunk_size, cache_size); - } - for(auto it = all_attr_values_int8.cbegin(); it != all_attr_values_int8.cend(); ++it) - { - const string& attr_name = it->first; - cell::append_cell_attribute_map (data_comm, file_name, attr_namespace, pop_name, pop_start, - attr_name, it->second, io_size, dflt_data_type, - chunk_size, value_chunk_size, cache_size); - } - - + append_cell_attribute_maps (data_comm, file_name, + attr_namespace, pop_name, pop_start, + all_attr_values_uint32, + all_attr_values_uint16, + all_attr_values_uint8, + all_attr_values_int32, + all_attr_values_int16, + all_attr_values_int8, + all_attr_values_float, + io_size, dflt_data_type, + chunk_size, value_chunk_size, cache_size); } throw_assert(MPI_Barrier(data_comm) == MPI_SUCCESS, diff --git a/src/cell/cell_attributes.cc b/src/cell/cell_attributes.cc index 4b2ce3bf..19866000 100644 --- a/src/cell/cell_attributes.cc +++ b/src/cell/cell_attributes.cc @@ -4,14 +4,17 @@ /// /// Routines for manipulation of scalar and vector attributes associated with a cell id. /// -/// Copyright (C) 2016-2021 Project NeuroH5. +/// Copyright (C) 2016-2023 Project NeuroH5. //============================================================================== #include "neuroh5_types.hh" #include "path_names.hh" #include "read_template.hh" #include "write_template.hh" +#include "file_access.hh" +#include "cell_attributes.hh" #include "hdf5_cell_attributes.hh" +#include "create_file_toplevel.hh" #include "exists_dataset.hh" #include "dataset_num_elements.hh" #include "create_group.hh" @@ -31,6 +34,7 @@ #include #include +#include #include #include #include @@ -919,8 +923,7 @@ namespace neuroh5 throw_assert_nomsg(status == 0); status = H5Pclose(fapl); throw_assert_nomsg(status == 0); - - + } @@ -938,8 +941,8 @@ namespace neuroh5 data::NamedAttrMap &attr_map, // if positive, these arguments specify offset and number of entries to read // from the entries available to the current rank - size_t offset = 0, - size_t numitems = 0 + size_t offset, + size_t numitems ) { int srank, ssize; size_t rank, size; @@ -1108,8 +1111,8 @@ namespace neuroh5 const string& pop_name, const CELL_IDX_T& pop_start, data::NamedAttrMap& attr_map, - size_t offset = 0, - size_t numitems = 0 + size_t offset, + size_t numitems ) { herr_t status; @@ -1784,6 +1787,154 @@ namespace neuroh5 } recvbuf.clear(); } + + void append_cell_attribute_maps ( + MPI_Comm comm, + const std::string& file_name, + const std::string& attr_namespace, + const std::string& pop_name, + const CELL_IDX_T& pop_start, + const map >>& attr_values_uint32, + const map >> attr_values_int32, + const map >>& attr_values_uint16, + const map >>& attr_values_int16, + const map >>& attr_values_uint8, + const map >>& attr_values_int8, + const map >>& attr_values_float, + const size_t io_size, + const data::optional_hid data_type, + const CellIndex index_type, + const CellPtr ptr_type, + const size_t chunk_size, + const size_t value_chunk_size, + const size_t cache_size + ) + { + herr_t status; + int ssize, srank; size_t size, rank; size_t io_size_value=0; + throw_assert(MPI_Comm_size(comm, &ssize) == MPI_SUCCESS, "error in MPI_Comm_size"); + throw_assert(MPI_Comm_rank(comm, &srank) == MPI_SUCCESS, "error in MPI_Comm_rank"); + throw_assert(ssize > 0, "invalid MPI comm size"); + throw_assert(srank >= 0, "invalid MPI comm rank"); + rank = srank; + size = ssize; + + if (size < io_size) + { + io_size_value = size; + } + else + { + io_size_value = io_size; + } + + set io_rank_set; + data::range_sample(size, io_size_value, io_rank_set); + bool is_io_rank = false; + if (io_rank_set.find(rank) != io_rank_set.end()) + is_io_rank = true; + throw_assert(io_rank_set.size() > 0, "invalid I/O rank set"); + + + // MPI Communicator for I/O ranks + MPI_Comm io_comm; + // MPI group color value used for I/O ranks + int io_color = 1, color; + + // Am I an I/O rank? + if (is_io_rank) + { + color = io_color; + } + else + { + color = 0; + } + MPI_Comm_split(comm,color,rank,&io_comm); + MPI_Comm_set_errhandler(io_comm, MPI_ERRORS_RETURN); + + if (access( file_name.c_str(), F_OK ) != 0) + { + vector groups; + groups.push_back (hdf5::POPULATIONS); + status = hdf5::create_file_toplevel (io_comm, file_name, groups); + } + else + { + status = 0; + } + throw_assert(status == 0, + "append_cell_attribute_maps: unable to create toplevel groups in file"); + + throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, "error in MPI_Barrier"); + + hid_t file = hdf5::open_file(comm, file_name, true, true, cache_size); + + for(auto it = attr_values_float.cbegin(); it != attr_values_float.cend(); ++it) + { + const string& attr_name = it->first; + cell::append_cell_attribute_map (comm, file, attr_namespace, pop_name, pop_start, + attr_name, it->second, data_type, io_rank_set, + index_type, ptr_type, + chunk_size, value_chunk_size, cache_size); + } + for(auto it = attr_values_uint32.cbegin(); it != attr_values_uint32.cend(); ++it) + { + const string& attr_name = it->first; + cell::append_cell_attribute_map (comm, file, attr_namespace, pop_name, pop_start, + attr_name, it->second, data_type, io_rank_set, + index_type, ptr_type, + chunk_size, value_chunk_size, cache_size); + } + for(auto it = attr_values_uint16.cbegin(); it != attr_values_uint16.cend(); ++it) + { + const string& attr_name = it->first; + cell::append_cell_attribute_map (comm, file, attr_namespace, pop_name, pop_start, + attr_name, it->second, data_type, io_rank_set, + index_type, ptr_type, + chunk_size, value_chunk_size, cache_size); + } + for(auto it = attr_values_uint8.cbegin(); it != attr_values_uint8.cend(); ++it) + { + const string& attr_name = it->first; + cell::append_cell_attribute_map (comm, file, attr_namespace, pop_name, pop_start, + attr_name, it->second, data_type, io_rank_set, + index_type, ptr_type, + chunk_size, value_chunk_size, cache_size); + } + for(auto it = attr_values_int32.cbegin(); it != attr_values_int32.cend(); ++it) + { + const string& attr_name = it->first; + cell::append_cell_attribute_map (comm, file, attr_namespace, pop_name, pop_start, + attr_name, it->second, data_type, io_rank_set, + index_type, ptr_type, + chunk_size, value_chunk_size, cache_size); + } + for(auto it = attr_values_int16.cbegin(); it != attr_values_int16.cend(); ++it) + { + const string& attr_name = it->first; + cell::append_cell_attribute_map (comm, file, attr_namespace, pop_name, pop_start, + attr_name, it->second, data_type, io_rank_set, + index_type, ptr_type, + chunk_size, value_chunk_size, cache_size); + } + for(auto it = attr_values_int8.cbegin(); it != attr_values_int8.cend(); ++it) + { + const string& attr_name = it->first; + cell::append_cell_attribute_map (comm, file, attr_namespace, pop_name, pop_start, + attr_name, it->second, data_type, io_rank_set, + index_type, ptr_type, + chunk_size, value_chunk_size, cache_size); + } + + hdf5::close_file(file); + + throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, "error in MPI_Barrier"); + throw_assert(MPI_Comm_free(&io_comm) == MPI_SUCCESS, + "append_cell_attribute_maps: error in MPI_Comm_free"); + + } + } diff --git a/src/hdf5/file_access.cc b/src/hdf5/file_access.cc index e382f550..ab82a503 100644 --- a/src/hdf5/file_access.cc +++ b/src/hdf5/file_access.cc @@ -17,11 +17,28 @@ namespace neuroh5 MPI_Comm comm, const std::string& file_name, const bool collective = false, - const bool rdwr = false + const bool rdwr = false, + const size_t cache_size = 1*1024*1024 ) { hid_t fapl = H5Pcreate(H5P_FILE_ACCESS); throw_assert_nomsg(fapl >= 0); + + /* Cache parameters: */ + int nelemts; /* Dummy parameter in API, no longer used */ + size_t nslots; /* Number of slots in the + hash table */ + size_t nbytes; /* Size of chunk cache in bytes */ + double w0; /* Chunk preemption policy */ + /* Retrieve default cache parameters */ + throw_assert(H5Pget_cache(fapl, &nelemts, &nslots, &nbytes, &w0) >=0, + "error in H5Pget_cache"); + /* Set cache size and instruct the cache to discard the fully read chunk */ + nbytes = cache_size; w0 = 1.; + throw_assert(H5Pset_cache(fapl, nelemts, nslots, nbytes, w0)>= 0, + "error in H5Pset_cache"); + + #ifdef HDF5_IS_PARALLEL if (collective) { @@ -41,6 +58,9 @@ namespace neuroh5 } throw_assert_nomsg(file >= 0); + + throw_assert(H5Pclose(fapl) == 0, + "open_file: unable to close HDF5 file properties list"); return file; } From 3953e46d0e590d3c1fe865605aa678db383ae874 Mon Sep 17 00:00:00 2001 From: Ivan Raikov Date: Wed, 5 Jul 2023 08:43:23 -0700 Subject: [PATCH 2/4] added missing logic for two-phase I/O in append_trees --- include/cell/append_tree.hh | 127 +------------- include/cell/cell_attributes.hh | 217 +++++++++++++++++++----- include/cell/cell_index.hh | 19 +++ python/neuroh5/iomodule.cc | 29 ++-- src/cell/append_tree.cc | 289 +++++++++++++++++++++++++++++++- src/cell/cell_attributes.cc | 77 +++++---- src/cell/cell_index.cc | 126 +++++++++++++- 7 files changed, 669 insertions(+), 215 deletions(-) diff --git a/include/cell/append_tree.hh b/include/cell/append_tree.hh index 68a7a367..b987ba68 100644 --- a/include/cell/append_tree.hh +++ b/include/cell/append_tree.hh @@ -62,132 +62,20 @@ namespace neuroh5 /***************************************************************************** * Save tree data structures to HDF5 *****************************************************************************/ - template int append_trees ( MPI_Comm comm, - const std::string& file_name, + MPI_Comm io_comm, + hid_t loc, const std::string& pop_name, const CELL_IDX_T& pop_start, std::forward_list &tree_list, + const set &io_rank_set, CellPtr ptr_type = CellPtr(PtrOwner), const size_t chunk_size = 4000, const size_t value_chunk_size = 4000 - ) - { - herr_t status; - - unsigned int rank, size; - throw_assert_nomsg(MPI_Comm_size(comm, (int*)&size) == MPI_SUCCESS); - throw_assert_nomsg(MPI_Comm_rank(comm, (int*)&rank) == MPI_SUCCESS); - - - std::vector sec_ptr; - std::vector topo_ptr; - std::vector attr_ptr; - - std::vector all_index_vector; - std::vector all_src_vector, all_dst_vector; - std::vector all_xcoords, all_ycoords, all_zcoords; // coordinates of nodes - std::vector all_radiuses; // Radius - std::vector all_layers; // Layer - std::vector all_sections; // Section - std::vector all_parents; // Parent - std::vector all_swc_types; // SWC Types - - if (ptr_type.type == PtrNone) - { - status = build_singleton_tree_datasets(comm, - tree_list, - all_src_vector, all_dst_vector, - all_xcoords, all_ycoords, all_zcoords, - all_radiuses, all_layers, all_sections, - all_parents, all_swc_types); - } - else - { - status = build_tree_datasets(comm, - tree_list, - sec_ptr, topo_ptr, attr_ptr, - all_index_vector, all_src_vector, all_dst_vector, - all_xcoords, all_ycoords, all_zcoords, - all_radiuses, all_layers, all_sections, - all_parents, all_swc_types); - throw_assert_nomsg(status >= 0); - } - - - throw_assert_nomsg(append_cell_index (comm, file_name, pop_name, pop_start, - hdf5::TREES, all_index_vector) == 0); - - const data::optional_hid dflt_data_type; - const data::optional_hid coord_data_type(COORD_H5_NATIVE_T); - const data::optional_hid layer_data_type(LAYER_IDX_H5_NATIVE_T); - const data::optional_hid parent_node_data_type(PARENT_NODE_IDX_H5_NATIVE_T); - const data::optional_hid section_data_type(SECTION_IDX_H5_NATIVE_T); - const data::optional_hid swc_data_type(SWC_TYPE_H5_NATIVE_T); - - string attr_ptr_owner_path = hdf5::cell_attribute_path(hdf5::TREES, pop_name, hdf5::X_COORD) + "/" + hdf5::ATTR_PTR; - string sec_ptr_owner_path = hdf5::cell_attribute_path(hdf5::TREES, pop_name, hdf5::SRCSEC) + "/" + hdf5::SEC_PTR; - - append_cell_attribute (comm, file_name, hdf5::TREES, pop_name, pop_start, hdf5::X_COORD, - all_index_vector, attr_ptr, all_xcoords, - coord_data_type, IndexShared, - CellPtr (PtrOwner, hdf5::ATTR_PTR), - chunk_size, value_chunk_size); - append_cell_attribute (comm, file_name, hdf5::TREES, pop_name, pop_start, hdf5::Y_COORD, - all_index_vector, attr_ptr, all_ycoords, - coord_data_type, IndexShared, - CellPtr (PtrShared, attr_ptr_owner_path), - chunk_size, value_chunk_size); - append_cell_attribute (comm, file_name, hdf5::TREES, pop_name, pop_start, hdf5::Z_COORD, - all_index_vector, attr_ptr, all_zcoords, - coord_data_type, IndexShared, - CellPtr (PtrShared, attr_ptr_owner_path), - chunk_size, value_chunk_size); - append_cell_attribute (comm, file_name, hdf5::TREES, pop_name, pop_start, hdf5::RADIUS, - all_index_vector, attr_ptr, all_radiuses, - dflt_data_type, IndexShared, - CellPtr (PtrShared, attr_ptr_owner_path), - chunk_size, value_chunk_size); - append_cell_attribute (comm, file_name, hdf5::TREES, pop_name, pop_start, hdf5::LAYER, - all_index_vector, attr_ptr, all_layers, - layer_data_type, IndexShared, - CellPtr (PtrShared, attr_ptr_owner_path), - chunk_size, value_chunk_size); - append_cell_attribute (comm, file_name, hdf5::TREES, pop_name, pop_start, hdf5::PARENT, - all_index_vector, attr_ptr, all_parents, - parent_node_data_type, IndexShared, - CellPtr (PtrShared, attr_ptr_owner_path), - chunk_size, value_chunk_size); - - append_cell_attribute (comm, file_name, hdf5::TREES, pop_name, pop_start, hdf5::SWCTYPE, - all_index_vector, attr_ptr, all_swc_types, - swc_data_type, IndexShared, - CellPtr (PtrShared, attr_ptr_owner_path), - chunk_size, value_chunk_size); - append_cell_attribute (comm, file_name, hdf5::TREES, pop_name, pop_start, hdf5::SRCSEC, - all_index_vector, topo_ptr, all_src_vector, - section_data_type, IndexShared, - CellPtr (PtrOwner, hdf5::SEC_PTR), - chunk_size, value_chunk_size); - append_cell_attribute (comm, file_name, hdf5::TREES, pop_name, pop_start, hdf5::DSTSEC, - all_index_vector, topo_ptr, all_dst_vector, - section_data_type, IndexShared, - CellPtr (PtrShared, sec_ptr_owner_path), - chunk_size, value_chunk_size); - - append_cell_attribute (comm, file_name, hdf5::TREES, pop_name, pop_start, hdf5::SECTION, - all_index_vector, sec_ptr, all_sections, - section_data_type, IndexShared, - CellPtr (PtrOwner, hdf5::SEC_PTR), - chunk_size, value_chunk_size); - - - return 0; - } + ); - template int append_trees ( MPI_Comm comm, @@ -195,13 +83,10 @@ namespace neuroh5 const std::string& pop_name, const CELL_IDX_T& pop_start, std::forward_list &tree_list, + size_t io_size, const size_t chunk_size, const size_t value_chunk_size - ) - { - return append_trees(comm, file_name, pop_name, pop_start, tree_list, - CellPtr(PtrOwner), chunk_size, value_chunk_size); - } + ); } } diff --git a/include/cell/cell_attributes.hh b/include/cell/cell_attributes.hh index d0e23f49..5454dfa7 100644 --- a/include/cell/cell_attributes.hh +++ b/include/cell/cell_attributes.hh @@ -180,7 +180,6 @@ namespace neuroh5 template void append_cell_attribute ( - MPI_Comm comm, const hid_t& loc, const std::string& attr_namespace, const std::string& pop_name, @@ -201,11 +200,6 @@ namespace neuroh5 throw_assert(index.size() == attr_ptr.size()-1, "append_cell_attribute: mismatch between sizes of cell index and attribute pointer"); - int size, rank; - throw_assert(MPI_Comm_size(comm, &size) == MPI_SUCCESS, - "append_cell_attribute: unable to obtain MPI communicator size"); - throw_assert(MPI_Comm_rank(comm, &rank) == MPI_SUCCESS, - "append_cell_attribute: unable to obtain MPI communicator rank"); string attr_prefix = hdf5::cell_attribute_prefix(attr_namespace, pop_name); string attr_path = hdf5::cell_attribute_path(attr_namespace, pop_name, attr_name); @@ -221,6 +215,26 @@ namespace neuroh5 throw_assert(file >= 0, "append_cell_attribute: invalid file handle"); + hid_t fapl; + throw_assert((fapl = H5Fget_access_plist(file)) >= 0, + "append_cell_attribute_map: error in H5Fget_access_plist"); + + MPI_Comm comm; + MPI_Info comm_info; + + throw_assert(H5Pget_fapl_mpio(fapl, &comm, &comm_info) >= 0, + "append_cell_attribute: error in H5Pget_fapl_mpio"); + + int size, rank; + throw_assert(MPI_Comm_size(comm, &size) == MPI_SUCCESS, + "append_cell_attribute: unable to obtain MPI communicator size"); + throw_assert(MPI_Comm_rank(comm, &rank) == MPI_SUCCESS, + "append_cell_attribute: unable to obtain MPI communicator rank"); + + throw_assert(H5Pclose(fapl) == 0, + "append_cell_attribute: error in H5Pclose"); + + if (!(hdf5::exists_dataset (file, attr_path) > 0)) { create_cell_attribute_datasets(file, attr_namespace, pop_name, attr_name, @@ -337,7 +351,7 @@ namespace neuroh5 throw_assert(file >= 0, "append_cell_attribute: HDF5 file open error"); - append_cell_attribute (comm, file, attr_namespace, pop_name, pop_start, + append_cell_attribute (file, attr_namespace, pop_name, pop_start, attr_name, index, attr_ptr, values, data_type, index_type, ptr_type, chunk_size, value_chunk_size, cache_size); @@ -374,39 +388,44 @@ namespace neuroh5 { herr_t status; - int ssize, srank; size_t size, rank; int io_size_value=0; size_t io_size=0; + int ssize=0, srank=0; size_t size=0, rank=0; size_t io_size=io_rank_set.size(); throw_assert(MPI_Comm_size(comm, &ssize) == MPI_SUCCESS, "error in MPI_Comm_size"); throw_assert(MPI_Comm_rank(comm, &srank) == MPI_SUCCESS, "error in MPI_Comm_rank"); throw_assert(ssize > 0, "invalid MPI comm size"); throw_assert(srank >= 0, "invalid MPI rank"); rank = srank; size = ssize; - - hid_t file = H5Iget_file_id(loc); - throw_assert(file >= 0, - "append_cell_attribute_map: invalid file handle"); + throw_assert(io_size <= size, "invalid io_size"); + + bool is_io_rank = false; + if (io_rank_set.find(rank) != io_rank_set.end()) + is_io_rank = true; hid_t fapl; - throw_assert((fapl = H5Fget_access_plist(file)) >= 0, - "append_cell_attribute_map: error in H5Fget_access_plist"); - + hid_t file; MPI_Comm io_comm; MPI_Info io_comm_info; - throw_assert(H5Pget_fapl_mpio(fapl, &io_comm, &io_comm_info) >= 0, - "append_cell_attribute_map: error in H5Pget_fapl_mpio"); - - throw_assert(MPI_Comm_size(io_comm, &io_size_value) == MPI_SUCCESS, "error in MPI_Comm_size"); - throw_assert(io_size_value >= 0, "invalid io_size"); - io_size = io_size_value; - throw_assert(io_size <= size, "invalid io_size"); - - throw_assert(H5Pclose(fapl) == 0, - "append_cell_attribute_map: error in H5Pclose"); + if (is_io_rank) + { + int io_size_value=0; + + file = H5Iget_file_id(loc); + throw_assert(file >= 0, + "append_cell_attribute_map: invalid file handle"); - bool is_io_rank = false; - if (io_rank_set.find(rank) != io_rank_set.end()) - is_io_rank = true; + throw_assert((fapl = H5Fget_access_plist(file)) >= 0, + "append_cell_attribute_map: error in H5Fget_access_plist"); + + throw_assert(H5Pget_fapl_mpio(fapl, &io_comm, &io_comm_info) >= 0, + "append_cell_attribute_map: error in H5Pget_fapl_mpio"); + + throw_assert(MPI_Comm_size(io_comm, &io_size_value) == MPI_SUCCESS, "error in MPI_Comm_size"); + throw_assert(io_size_value == io_size, "io_size mismatch"); + + throw_assert(H5Pclose(fapl) == 0, + "append_cell_attribute_map: error in H5Pclose"); + } vector< pair > ranges; mpi::rank_ranges(size, io_size, ranges); @@ -522,29 +541,149 @@ namespace neuroh5 if (is_io_rank) { - append_cell_attribute(io_comm, file, + append_cell_attribute(file, attr_namespace, pop_name, pop_start, attr_name, gid_recvbuf, attr_ptr, value_recvbuf, data_type, index_type, ptr_type, chunk_size, value_chunk_size, cache_size); } - - throw_assert(MPI_Barrier(io_comm) == MPI_SUCCESS, - "append_cell_attribute_map: error in MPI_Barrier"); - throw_assert(MPI_Comm_free(&io_comm) == MPI_SUCCESS, - "append_cell_attribute_map: error in MPI_Comm_free"); - throw_assert(MPI_Info_free(&io_comm_info) == MPI_SUCCESS, - "append_cell_attribute_map: error in MPI_Info_free"); + if (is_io_rank) + { + throw_assert(MPI_Barrier(io_comm) == MPI_SUCCESS, + "append_cell_attribute_map: error in MPI_Barrier"); + throw_assert(MPI_Comm_free(&io_comm) == MPI_SUCCESS, + "append_cell_attribute_map: error in MPI_Comm_free"); + + if (io_comm_info != MPI_INFO_NULL) + { + throw_assert(MPI_Info_free(&io_comm_info) == MPI_SUCCESS, + "append_cell_attribute_map: error in MPI_Info_free"); + } + status = H5Fclose(file); + throw_assert(status == 0, "append_cell_attribute_map: unable to close HDF5 file"); + } + throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, "append_cell_attribute_map: error in MPI_Barrier"); - status = H5Fclose(file); - throw_assert(status == 0, "append_cell_attribute_map: unable to close HDF5 file"); - } + template + void append_cell_attribute_map + ( + MPI_Comm comm, + const std::string& file_name, + const std::string& attr_namespace, + const std::string& pop_name, + const CELL_IDX_T& pop_start, + const std::string& attr_name, + const std::map>& value_map, + const size_t io_size, + const data::optional_hid data_type, + const CellIndex index_type = IndexOwner, + const CellPtr ptr_type = CellPtr(PtrOwner), + const size_t chunk_size = 4000, + const size_t value_chunk_size = 4000, + const size_t cache_size = 1*1024*1024 + ) + { + herr_t status; + int size, rank; + throw_assert(MPI_Comm_size(comm, &size) == MPI_SUCCESS, + "append_cell_attribute: unable to obtain MPI communicator size"); + throw_assert(MPI_Comm_rank(comm, &rank) == MPI_SUCCESS, + "append_cell_attribute: unable to obtain MPI communicator rank"); + + hid_t fapl = H5Pcreate(H5P_FILE_ACCESS); + + set io_rank_set; + data::range_sample(size, io_size, io_rank_set); + bool is_io_rank = false; + if (io_rank_set.find(rank) != io_rank_set.end()) + is_io_rank = true; + throw_assert(io_rank_set.size() > 0, "invalid I/O rank set"); + + int io_color = 1, color; + MPI_Comm io_comm; + + // Am I an I/O rank? + if (is_io_rank) + { + color = io_color; + } + else + { + color = 0; + } + MPI_Comm_split(comm,color,rank,&io_comm); + MPI_Comm_set_errhandler(io_comm, MPI_ERRORS_RETURN); + + throw_assert(H5Pset_fapl_mpio(fapl, io_comm, MPI_INFO_NULL) >= 0, + "append_cell_attribute_map: HDF5 mpio error"); + + /* Cache parameters: */ + int nelemts; /* Dummy parameter in API, no longer used */ + size_t nslots; /* Number of slots in the + hash table */ + size_t nbytes; /* Size of chunk cache in bytes */ + double w0; /* Chunk preemption policy */ + /* Retrieve default cache parameters */ + throw_assert(H5Pget_cache(fapl, &nelemts, &nslots, &nbytes, &w0) >=0, + "error in H5Pget_cache"); + /* Set cache size and instruct the cache to discard the fully read chunk */ + nbytes = cache_size; w0 = 1.; + throw_assert(H5Pset_cache(fapl, nelemts, nslots, nbytes, w0)>= 0, + "error in H5Pset_cache"); + + string attr_path = hdf5::cell_attribute_path(attr_namespace, pop_name, attr_name); + + if (rank == 0) + { + hid_t file = H5Fopen(file_name.c_str(), H5F_ACC_RDWR, H5P_DEFAULT); + throw_assert(file >= 0, + "append_cell_attribute: HDF5 file open error"); + + T dummy; + hid_t ftype; + if (data_type.has_value()) + { + ftype = data_type.value(); + } + else + ftype = infer_datatype(dummy); + throw_assert(ftype >= 0, + "append_cell_attribute: unable to infer HDF5 data type"); + + + if (!(hdf5::exists_dataset (file, attr_path) > 0)) + { + create_cell_attribute_datasets(file, attr_namespace, pop_name, attr_name, + ftype, index_type, ptr_type, + chunk_size, value_chunk_size + ); + } + status = H5Fclose(file); + throw_assert(status == 0, "append_cell_attribute: unable to close HDF5 file"); + } + throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, + "append_cell_attribute: error in MPI_Barrier"); + + hid_t file = H5Fopen(file_name.c_str(), H5F_ACC_RDWR, fapl); + throw_assert(file >= 0, + "append_cell_attribute: HDF5 file open error"); + + append_cell_attribute_map(comm, file, attr_namespace, pop_name, pop_start, attr_name, value_map, + io_size, data_type, IndexOwner, CellPtr(PtrOwner), + chunk_size, value_chunk_size, cache_size); + + throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, "error in MPI_Barrier"); + throw_assert(MPI_Comm_free(&io_comm) == MPI_SUCCESS, + "append_cell_attribute_map: error in MPI_Comm_free"); + + } + template void append_cell_attribute_map ( diff --git a/include/cell/cell_index.hh b/include/cell/cell_index.hh index 33b527fa..d8aa43bf 100644 --- a/include/cell/cell_index.hh +++ b/include/cell/cell_index.hh @@ -25,6 +25,15 @@ namespace neuroh5 const string& attr_name_space, const size_t chunk_size = 1000 ); + + herr_t create_cell_index + ( + MPI_Comm comm, + hid_t loc, + const string& pop_name, + const string& attr_name_space, + const size_t chunk_size = 1000 + ); herr_t read_cell_index ( @@ -44,6 +53,16 @@ namespace neuroh5 const string& attr_name_space, const vector& cell_index ); + + herr_t append_cell_index + ( + MPI_Comm comm, + hid_t loc, + const string& pop_name, + const CELL_IDX_T& pop_start, + const string& attr_name_space, + const vector& cell_index + ); herr_t link_cell_index ( diff --git a/python/neuroh5/iomodule.cc b/python/neuroh5/iomodule.cc index 5d5acd61..06764d30 100644 --- a/python/neuroh5/iomodule.cc +++ b/python/neuroh5/iomodule.cc @@ -6903,6 +6903,8 @@ extern "C" map >> all_attr_values_int8; map >> all_attr_values_float; + const data::optional_hid dflt_data_type; + build_cell_attr_value_maps(idx_values, all_attr_values_uint32, all_attr_values_uint16, @@ -6912,17 +6914,18 @@ extern "C" all_attr_values_int8, all_attr_values_float); - append_cell_attribute_maps (data_comm, file_name, - attr_namespace, pop_name, pop_start, - all_attr_values_uint32, - all_attr_values_uint16, - all_attr_values_uint8, - all_attr_values_int32, - all_attr_values_int16, - all_attr_values_int8, - all_attr_values_float, - io_size, dflt_data_type, - chunk_size, value_chunk_size, cache_size); + cell::append_cell_attribute_maps (data_comm, file_name, + attr_namespace, pop_name, pop_start, + all_attr_values_uint32, + all_attr_values_int32, + all_attr_values_uint16, + all_attr_values_int16, + all_attr_values_uint8, + all_attr_values_int8, + all_attr_values_float, + io_size, dflt_data_type, + IndexOwner, CellPtr(PtrOwner), + chunk_size, value_chunk_size, cache_size); } throw_assert(MPI_Barrier(data_comm) == MPI_SUCCESS, @@ -7172,8 +7175,8 @@ extern "C" ++layer_it, ++swc_type_it; } - throw_assert(cell::append_trees (data_comm, file_name, pop_name, pop_start, tree_list, - chunk_size, value_chunk_size) >= 0, + throw_assert(cell::append_trees (data_comm, file_name, pop_name, pop_start, tree_list, + io_size, chunk_size, value_chunk_size) >= 0, "py_append_cell_trees: unable to append trees"); } throw_assert(MPI_Barrier(data_comm) == MPI_SUCCESS, diff --git a/src/cell/append_tree.cc b/src/cell/append_tree.cc index 2d0245fd..e284ef42 100644 --- a/src/cell/append_tree.cc +++ b/src/cell/append_tree.cc @@ -4,25 +4,30 @@ /// /// Append tree structures to NeuroH5 file. /// -/// Copyright (C) 2016-2021 Project NeuroH5. +/// Copyright (C) 2016-2023 Project NeuroH5. //============================================================================== #include #include +#include #include #include #include #include "neuroh5_types.hh" +#include "append_tree.hh" #include "file_access.hh" #include "rank_range.hh" +#include "range_sample.hh" #include "dataset_num_elements.hh" #include "exists_dataset.hh" #include "enum_type.hh" #include "path_names.hh" +#include "serialize_tree.hh" #include "cell_index.hh" #include "cell_attributes.hh" +#include "create_file_toplevel.hh" #include "compact_optional.hh" #include "optional_value.hh" #include "throw_assert.hh" @@ -269,6 +274,288 @@ namespace neuroh5 return 0; } + + /***************************************************************************** + * Save tree data structures to HDF5 + *****************************************************************************/ + int append_trees + ( + MPI_Comm comm, + MPI_Comm io_comm, + hid_t loc, + const std::string& pop_name, + const CELL_IDX_T& pop_start, + std::forward_list &tree_list, + const set &io_rank_set, + CellPtr ptr_type, + const size_t chunk_size, + const size_t value_chunk_size + ) + { + herr_t status=0; + size_t io_size=0; + + size_t rank, size; + throw_assert_nomsg(MPI_Comm_size(comm, (int*)&size) == MPI_SUCCESS); + throw_assert_nomsg(MPI_Comm_rank(comm, (int*)&rank) == MPI_SUCCESS); + + size_t io_comm_size; + throw_assert_nomsg(MPI_Comm_size(io_comm, (int*)&io_comm_size) == MPI_SUCCESS); + + + bool is_io_rank = false; + if (io_rank_set.find(rank) != io_rank_set.end()) + is_io_rank = true; + throw_assert(io_rank_set.size() > 0, "invalid I/O rank set"); + io_size = io_rank_set.size(); + throw_assert(io_comm_size == io_size, "mismatch between io_size and io_comm size"); + + vector< pair > ranges; + mpi::rank_ranges(size, io_size, ranges); + + // Determine I/O ranks to which to send the values + vector io_dests(size); + for (size_t r=0; r=0; i--) + { + if (ranges[i].first <= r) + { + io_dests[r] = *std::next(io_rank_set.begin(), i); + break; + } + } + } + + std::forward_list local_tree_list; + { + std::vector sendbuf; + std::vector sendcounts, sdispls; + vector recvcounts, rdispls; + vector recvbuf; + + rank_t dst_rank = io_dests[rank]; + map > rank_tree_map; + for_each(tree_list.cbegin(), + tree_list.cend(), + [&] (const neurotree_t& tree) + { + set dst_rank_set; + + const CELL_IDX_T &idx = get<0>(tree); + CELL_IDX_T gid = idx; + + map &tree_map = rank_tree_map[dst_rank]; + tree_map.insert(make_pair(gid, tree)); + }); + + data::serialize_rank_tree_map (size, rank, rank_tree_map, sendcounts, sendbuf, sdispls); + + throw_assert_nomsg(mpi::alltoallv_vector(comm, MPI_CHAR, sendcounts, sdispls, sendbuf, + recvcounts, rdispls, recvbuf) >= 0); + sendbuf.clear(); + sendbuf.shrink_to_fit(); + + data::deserialize_rank_tree_list (size, recvbuf, recvcounts, rdispls, + local_tree_list); + } + + + std::vector sec_ptr; + std::vector topo_ptr; + std::vector attr_ptr; + + std::vector all_index_vector; + std::vector all_src_vector, all_dst_vector; + std::vector all_xcoords, all_ycoords, all_zcoords; // coordinates of nodes + std::vector all_radiuses; // Radius + std::vector all_layers; // Layer + std::vector all_sections; // Section + std::vector all_parents; // Parent + std::vector all_swc_types; // SWC Types + + if (ptr_type.type == PtrNone) + { + status = build_singleton_tree_datasets(comm, + local_tree_list, + all_src_vector, all_dst_vector, + all_xcoords, all_ycoords, all_zcoords, + all_radiuses, all_layers, all_sections, + all_parents, all_swc_types); + } + else + { + status = build_tree_datasets(comm, + local_tree_list, + sec_ptr, topo_ptr, attr_ptr, + all_index_vector, all_src_vector, all_dst_vector, + all_xcoords, all_ycoords, all_zcoords, + all_radiuses, all_layers, all_sections, + all_parents, all_swc_types); + throw_assert_nomsg(status >= 0); + } + + local_tree_list.clear(); + + + const data::optional_hid dflt_data_type; + const data::optional_hid coord_data_type(COORD_H5_NATIVE_T); + const data::optional_hid layer_data_type(LAYER_IDX_H5_NATIVE_T); + const data::optional_hid parent_node_data_type(PARENT_NODE_IDX_H5_NATIVE_T); + const data::optional_hid section_data_type(SECTION_IDX_H5_NATIVE_T); + const data::optional_hid swc_data_type(SWC_TYPE_H5_NATIVE_T); + + string attr_ptr_owner_path = hdf5::cell_attribute_path(hdf5::TREES, pop_name, hdf5::X_COORD) + "/" + hdf5::ATTR_PTR; + string sec_ptr_owner_path = hdf5::cell_attribute_path(hdf5::TREES, pop_name, hdf5::SRCSEC) + "/" + hdf5::SEC_PTR; + + + hid_t file; + + if (is_io_rank) + { + + hid_t file = H5Iget_file_id(loc); + throw_assert(file >= 0, + "append_trees: invalid file handle"); + + append_cell_index (io_comm, file, pop_name, pop_start, + hdf5::TREES, all_index_vector); + + append_cell_attribute (file, hdf5::TREES, pop_name, pop_start, hdf5::X_COORD, + all_index_vector, attr_ptr, all_xcoords, + coord_data_type, IndexShared, + CellPtr (PtrOwner, hdf5::ATTR_PTR), + chunk_size, value_chunk_size); + append_cell_attribute (file, hdf5::TREES, pop_name, pop_start, hdf5::Y_COORD, + all_index_vector, attr_ptr, all_ycoords, + coord_data_type, IndexShared, + CellPtr (PtrShared, attr_ptr_owner_path), + chunk_size, value_chunk_size); + append_cell_attribute (file, hdf5::TREES, pop_name, pop_start, hdf5::Z_COORD, + all_index_vector, attr_ptr, all_zcoords, + coord_data_type, IndexShared, + CellPtr (PtrShared, attr_ptr_owner_path), + chunk_size, value_chunk_size); + append_cell_attribute (file, hdf5::TREES, pop_name, pop_start, hdf5::RADIUS, + all_index_vector, attr_ptr, all_radiuses, + dflt_data_type, IndexShared, + CellPtr (PtrShared, attr_ptr_owner_path), + chunk_size, value_chunk_size); + append_cell_attribute (file, hdf5::TREES, pop_name, pop_start, hdf5::LAYER, + all_index_vector, attr_ptr, all_layers, + layer_data_type, IndexShared, + CellPtr (PtrShared, attr_ptr_owner_path), + chunk_size, value_chunk_size); + append_cell_attribute (file, hdf5::TREES, pop_name, pop_start, hdf5::PARENT, + all_index_vector, attr_ptr, all_parents, + parent_node_data_type, IndexShared, + CellPtr (PtrShared, attr_ptr_owner_path), + chunk_size, value_chunk_size); + + append_cell_attribute (file, hdf5::TREES, pop_name, pop_start, hdf5::SWCTYPE, + all_index_vector, attr_ptr, all_swc_types, + swc_data_type, IndexShared, + CellPtr (PtrShared, attr_ptr_owner_path), + chunk_size, value_chunk_size); + append_cell_attribute (file, hdf5::TREES, pop_name, pop_start, hdf5::SRCSEC, + all_index_vector, topo_ptr, all_src_vector, + section_data_type, IndexShared, + CellPtr (PtrOwner, hdf5::SEC_PTR), + chunk_size, value_chunk_size); + append_cell_attribute (file, hdf5::TREES, pop_name, pop_start, hdf5::DSTSEC, + all_index_vector, topo_ptr, all_dst_vector, + section_data_type, IndexShared, + CellPtr (PtrShared, sec_ptr_owner_path), + chunk_size, value_chunk_size); + + append_cell_attribute (file, hdf5::TREES, pop_name, pop_start, hdf5::SECTION, + all_index_vector, sec_ptr, all_sections, + section_data_type, IndexShared, + CellPtr (PtrOwner, hdf5::SEC_PTR), + chunk_size, value_chunk_size); + + + status = H5Fclose(file); + throw_assert(status == 0, "append_trees: unable to close HDF5 file"); + + } + + + throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, "error in MPI_Barrier"); + + return 0; + } + + int append_trees + ( + MPI_Comm comm, + const std::string& file_name, + const std::string& pop_name, + const CELL_IDX_T& pop_start, + std::forward_list &tree_list, + size_t io_size, + const size_t chunk_size, + const size_t value_chunk_size + ) + { + herr_t status; + + size_t rank, size; + throw_assert_nomsg(MPI_Comm_size(comm, (int*)&size) == MPI_SUCCESS); + throw_assert_nomsg(MPI_Comm_rank(comm, (int*)&rank) == MPI_SUCCESS); + + set io_rank_set; + data::range_sample(size, io_size, io_rank_set); + + bool is_io_rank = false; + if (io_rank_set.find(rank) != io_rank_set.end()) + is_io_rank = true; + + // MPI Communicator for I/O ranks + MPI_Comm io_comm; + // MPI group color value used for I/O ranks + int io_color = 1, color; + + // Am I an I/O rank? + if (is_io_rank) + { + color = io_color; + } + else + { + color = 0; + } + MPI_Comm_split(comm,color,rank,&io_comm); + MPI_Comm_set_errhandler(io_comm, MPI_ERRORS_RETURN); + + if (access( file_name.c_str(), F_OK ) != 0) + { + vector groups; + groups.push_back (hdf5::POPULATIONS); + status = hdf5::create_file_toplevel (io_comm, file_name, groups); + } + else + { + status = 0; + } + throw_assert(status == 0, + "append_trees: unable to create toplevel groups in file"); + + throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, "error in MPI_Barrier"); + + hid_t file = hdf5::open_file(io_comm, file_name, true, true); + + status = append_trees(comm, io_comm, file, pop_name, pop_start, tree_list, + io_rank_set, CellPtr(PtrOwner), chunk_size, value_chunk_size); + throw_assert_nomsg(status >= 0); + + hdf5::close_file(file); + + throw_assert(MPI_Comm_free(&io_comm) == MPI_SUCCESS, + "append_trees: error in MPI_Comm_free"); + + return 0; + } } diff --git a/src/cell/cell_attributes.cc b/src/cell/cell_attributes.cc index 19866000..a0c9526f 100644 --- a/src/cell/cell_attributes.cc +++ b/src/cell/cell_attributes.cc @@ -1863,45 +1863,49 @@ namespace neuroh5 { status = 0; } - throw_assert(status == 0, - "append_cell_attribute_maps: unable to create toplevel groups in file"); + throw_assert(status == 0, + "append_cell_attribute_maps: unable to create toplevel groups in file"); + + throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, "error in MPI_Barrier"); + + hid_t file; - throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, "error in MPI_Barrier"); - - hid_t file = hdf5::open_file(comm, file_name, true, true, cache_size); - - for(auto it = attr_values_float.cbegin(); it != attr_values_float.cend(); ++it) - { - const string& attr_name = it->first; - cell::append_cell_attribute_map (comm, file, attr_namespace, pop_name, pop_start, + if (is_io_rank) { + file = hdf5::open_file(io_comm, file_name, true, true, cache_size); + } + + for(auto it = attr_values_float.cbegin(); it != attr_values_float.cend(); ++it) + { + const string& attr_name = it->first; + cell::append_cell_attribute_map (comm, file, attr_namespace, pop_name, pop_start, + attr_name, it->second, data_type, io_rank_set, + index_type, ptr_type, + chunk_size, value_chunk_size, cache_size); + } + for(auto it = attr_values_uint32.cbegin(); it != attr_values_uint32.cend(); ++it) + { + const string& attr_name = it->first; + cell::append_cell_attribute_map (comm, file, attr_namespace, pop_name, pop_start, + attr_name, it->second, data_type, io_rank_set, + index_type, ptr_type, + chunk_size, value_chunk_size, cache_size); + } + for(auto it = attr_values_uint16.cbegin(); it != attr_values_uint16.cend(); ++it) + { + const string& attr_name = it->first; + cell::append_cell_attribute_map (comm, file, attr_namespace, pop_name, pop_start, + attr_name, it->second, data_type, io_rank_set, + index_type, ptr_type, + chunk_size, value_chunk_size, cache_size); + } + for(auto it = attr_values_uint8.cbegin(); it != attr_values_uint8.cend(); ++it) + { + const string& attr_name = it->first; + cell::append_cell_attribute_map (comm, file, attr_namespace, pop_name, pop_start, attr_name, it->second, data_type, io_rank_set, index_type, ptr_type, chunk_size, value_chunk_size, cache_size); } - for(auto it = attr_values_uint32.cbegin(); it != attr_values_uint32.cend(); ++it) - { - const string& attr_name = it->first; - cell::append_cell_attribute_map (comm, file, attr_namespace, pop_name, pop_start, - attr_name, it->second, data_type, io_rank_set, - index_type, ptr_type, - chunk_size, value_chunk_size, cache_size); - } - for(auto it = attr_values_uint16.cbegin(); it != attr_values_uint16.cend(); ++it) - { - const string& attr_name = it->first; - cell::append_cell_attribute_map (comm, file, attr_namespace, pop_name, pop_start, - attr_name, it->second, data_type, io_rank_set, - index_type, ptr_type, - chunk_size, value_chunk_size, cache_size); - } - for(auto it = attr_values_uint8.cbegin(); it != attr_values_uint8.cend(); ++it) - { - const string& attr_name = it->first; - cell::append_cell_attribute_map (comm, file, attr_namespace, pop_name, pop_start, - attr_name, it->second, data_type, io_rank_set, - index_type, ptr_type, - chunk_size, value_chunk_size, cache_size); - } for(auto it = attr_values_int32.cbegin(); it != attr_values_int32.cend(); ++it) { const string& attr_name = it->first; @@ -1927,7 +1931,10 @@ namespace neuroh5 chunk_size, value_chunk_size, cache_size); } - hdf5::close_file(file); + if (is_io_rank) + { + hdf5::close_file(file); + } throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, "error in MPI_Barrier"); throw_assert(MPI_Comm_free(&io_comm) == MPI_SUCCESS, diff --git a/src/cell/cell_index.cc b/src/cell/cell_index.cc index c9d463ce..e9cb61a5 100644 --- a/src/cell/cell_index.cc +++ b/src/cell/cell_index.cc @@ -4,7 +4,7 @@ /// /// Functions for reading and writing cell indices from an HDF5 file. /// -/// Copyright (C) 2016-2020 Project NeuroH5. +/// Copyright (C) 2016-2023 Project NeuroH5. //============================================================================== @@ -16,6 +16,7 @@ #include "neuroh5_types.hh" #include "dataset_num_elements.hh" #include "exists_dataset.hh" +#include "cell_index.hh" #include "file_access.hh" #include "create_group.hh" #include "path_names.hh" @@ -38,7 +39,7 @@ namespace neuroh5 const string& file_name, const string& pop_name, const string& attr_name_space, - const size_t chunk_size = 1000 + const size_t chunk_size ) { herr_t ierr = 0; @@ -112,6 +113,86 @@ namespace neuroh5 return ierr; } + + + herr_t create_cell_index + ( + MPI_Comm comm, + hid_t loc, + const string& pop_name, + const string& attr_name_space, + const size_t chunk_size + ) + { + herr_t ierr = 0; + int srank, ssize; size_t rank, size; + + throw_assert_nomsg(MPI_Comm_size(comm, &ssize) == MPI_SUCCESS); + throw_assert_nomsg(MPI_Comm_rank(comm, &srank) == MPI_SUCCESS); + throw_assert_nomsg(srank >= 0); + throw_assert_nomsg(ssize > 0); + + rank = (size_t)srank; + size = (size_t)ssize; + + string attr_prefix = hdf5::cell_attribute_prefix(attr_name_space, pop_name); + string attr_path = hdf5::cell_attribute_path(attr_name_space, pop_name, hdf5::CELL_INDEX); + + bool has_group=false, has_index=false; + + hid_t file = H5Iget_file_id(loc); + throw_assert(file >= 0, + "create_cell_index: invalid file handle"); + + has_group = hdf5::exists_dataset (file, attr_prefix) > 0; + if (!has_group) + { + ierr = hdf5::create_group (file, attr_prefix); + throw_assert_nomsg(ierr == 0); + } + else + { + has_index = hdf5::exists_dataset (file, attr_path) > 0; + } + + if (!has_index) + { + + hsize_t maxdims[1] = {H5S_UNLIMITED}; + hsize_t cdims[1] = {chunk_size}; /* chunking dimensions */ + hsize_t initial_size = 0; + + hid_t plist = H5Pcreate (H5P_DATASET_CREATE); + ierr = H5Pset_chunk(plist, 1, cdims); + throw_assert_nomsg(ierr == 0); + + ierr = H5Pset_alloc_time(plist, H5D_ALLOC_TIME_EARLY); + throw_assert_nomsg(ierr == 0); + +#ifdef H5_HAS_PARALLEL_DEFLATE + ierr = H5Pset_deflate(plist, 9); + throw_assert_nomsg(ierr == 0); +#endif + + hid_t lcpl = H5Pcreate(H5P_LINK_CREATE); + throw_assert_nomsg(lcpl >= 0); + throw_assert_nomsg(H5Pset_create_intermediate_group(lcpl, 1) >= 0); + + hid_t mspace = H5Screate_simple(1, &initial_size, maxdims); + throw_assert_nomsg(mspace >= 0); + hid_t dset = H5Dcreate2(file, attr_path.c_str(), CELL_IDX_H5_FILE_T, + mspace, lcpl, plist, H5P_DEFAULT); + throw_assert_nomsg(H5Dclose(dset) >= 0); + throw_assert_nomsg(H5Sclose(mspace) >= 0); + } + + ierr = H5Fclose (file); + throw_assert_nomsg(ierr == 0); + + throw_assert_nomsg(MPI_Barrier(comm) == MPI_SUCCESS); + + return ierr; + } herr_t read_cell_index ( @@ -188,6 +269,42 @@ namespace neuroh5 throw_assert_nomsg(srank >= 0); throw_assert_nomsg(ssize > 0); + rank = (size_t)srank; + size = (size_t)ssize; + + hid_t file = hdf5::open_file(comm, file_name, true, true); + throw_assert_nomsg(file >= 0); + + ierr = append_cell_index(comm, file, pop_name, pop_start, attr_name_space, cell_index); + throw_assert_nomsg(ierr == 0); + hdf5::close_file(file); + + return 0; + } + + + herr_t append_cell_index + ( + MPI_Comm comm, + hid_t loc, + const string& pop_name, + const CELL_IDX_T& pop_start, + const string& attr_name_space, + const vector& cell_index + ) + { + herr_t ierr = 0; + int srank, ssize; size_t rank, size; + + hid_t file = H5Iget_file_id(loc); + throw_assert(file >= 0, + "append_cell_index: invalid file handle"); + + throw_assert_nomsg(MPI_Comm_size(comm, &ssize) == MPI_SUCCESS); + throw_assert_nomsg(MPI_Comm_rank(comm, &srank) == MPI_SUCCESS); + throw_assert_nomsg(srank >= 0); + throw_assert_nomsg(ssize > 0); + rank = (size_t)srank; size = (size_t)ssize; @@ -217,12 +334,10 @@ namespace neuroh5 global_index_size = global_index_size + index_size_vector[i]; } - ierr = create_cell_index(comm, file_name, pop_name, attr_name_space); + ierr = create_cell_index(comm, file, pop_name, attr_name_space); #ifdef NEUROH5_DEBUG throw_assert_nomsg(MPI_Barrier(comm) == MPI_SUCCESS); #endif - hid_t file = hdf5::open_file(comm, file_name, true, true); - throw_assert_nomsg(file >= 0); /* Create property list for collective dataset write. */ hid_t wapl; @@ -247,7 +362,6 @@ namespace neuroh5 return ierr; } - herr_t link_cell_index ( From 9cf77938669089591de57e5e5393d1f58081cbbf Mon Sep 17 00:00:00 2001 From: Ivan Raikov Date: Thu, 6 Jul 2023 01:34:26 -0500 Subject: [PATCH 3/4] various bug fixes related to changes to append_tree and is_io_rank logic --- include/cell/cell_attributes.hh | 12 +- include/mpi/alltoallv_template.hh | 40 ++++--- src/cell/append_tree.cc | 103 ++++++++++-------- src/cell/cell_attributes.cc | 12 +- src/cell/scatter_read_tree.cc | 6 +- src/graph/append_graph.cc | 4 +- src/graph/scatter_read_projection.cc | 4 +- .../scatter_read_projection_selection.cc | 4 +- tests/test_bcast_gj.py | 15 ++- 9 files changed, 101 insertions(+), 99 deletions(-) diff --git a/include/cell/cell_attributes.hh b/include/cell/cell_attributes.hh index 5454dfa7..2456b3ef 100644 --- a/include/cell/cell_attributes.hh +++ b/include/cell/cell_attributes.hh @@ -397,9 +397,7 @@ namespace neuroh5 size = ssize; throw_assert(io_size <= size, "invalid io_size"); - bool is_io_rank = false; - if (io_rank_set.find(rank) != io_rank_set.end()) - is_io_rank = true; + bool is_io_rank = (io_rank_set.find(rank) != io_rank_set.end()); hid_t fapl; hid_t file; @@ -600,9 +598,7 @@ namespace neuroh5 set io_rank_set; data::range_sample(size, io_size, io_rank_set); - bool is_io_rank = false; - if (io_rank_set.find(rank) != io_rank_set.end()) - is_io_rank = true; + bool is_io_rank = (io_rank_set.find(rank) != io_rank_set.end()); throw_assert(io_rank_set.size() > 0, "invalid I/O rank set"); int io_color = 1, color; @@ -861,9 +857,7 @@ namespace neuroh5 set io_rank_set; data::range_sample(size, io_size_value, io_rank_set); - bool is_io_rank = false; - if (io_rank_set.find(rank) != io_rank_set.end()) - is_io_rank = true; + bool is_io_rank = (io_rank_set.find(rank) != io_rank_set.end()); throw_assert(io_rank_set.size() > 0, "invalid I/O rank set"); vector< pair > ranges; diff --git a/include/mpi/alltoallv_template.hh b/include/mpi/alltoallv_template.hh index 04394c94..783ffa81 100644 --- a/include/mpi/alltoallv_template.hh +++ b/include/mpi/alltoallv_template.hh @@ -61,12 +61,18 @@ namespace neuroh5 { int status; - status = MPI_Alltoall(&sendcounts[0], 1, MPI_INT, - &recvcounts[0], 1, MPI_INT, comm); + MPI_Request request; + status = MPI_Ialltoall(&sendcounts[0], 1, MPI_INT, + &recvcounts[0], 1, MPI_INT, comm, &request); throw_assert(status == MPI_SUCCESS, "alltoallv: error in MPI_Alltoallv: status: " << status); - throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, - "alltoallv: error in MPI_Barrier"); + + status = MPI_Wait(&request, MPI_STATUS_IGNORE); + throw_assert(status == MPI_SUCCESS, + "alltoallv: error in MPI_Wait: status: " << status); + + //throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, + // "alltoallv: error in MPI_Barrier"); } // 2. Each rank accumulates the vector sizes and allocates @@ -85,23 +91,31 @@ namespace neuroh5 size_t global_recvbuf_size=0; { int status; - status = MPI_Allreduce(&recvbuf_size, &global_recvbuf_size, 1, MPI_SIZE_T, MPI_SUM, - comm); - throw_assert (status == MPI_SUCCESS, "error in MPI_Allreduce: status = " << status); - throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, - "alltoallv: error in MPI_Barrier"); + MPI_Request request; + status = MPI_Iallreduce(&recvbuf_size, &global_recvbuf_size, 1, MPI_SIZE_T, MPI_SUM, + comm, &request); + throw_assert (status == MPI_SUCCESS, "error in MPI_Iallreduce: status = " << status); + status = MPI_Wait(&request, MPI_STATUS_IGNORE); + throw_assert(status == MPI_SUCCESS, + "alltoallv: error in MPI_Wait: status: " << status); + //throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, + // "alltoallv: error in MPI_Barrier"); } if (global_recvbuf_size > 0) { int status; + MPI_Request request; // 3. Each ALL_COMM rank participates in the MPI_Alltoallv - status = MPI_Alltoallv(&sendbuf[0], &sendcounts[0], &sdispls[0], datatype, + status = MPI_Ialltoallv(&sendbuf[0], &sendcounts[0], &sdispls[0], datatype, &recvbuf[0], &recvcounts[0], &rdispls[0], datatype, - comm); + comm, &request); throw_assert (status == MPI_SUCCESS, "error in MPI_Alltoallv: status = " << status); - throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, - "alltoallv: error in MPI_Barrier"); + status = MPI_Wait(&request, MPI_STATUS_IGNORE); + throw_assert(status == MPI_SUCCESS, + "alltoallv: error in MPI_Wait: status: " << status); + //throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, + // "alltoallv: error in MPI_Barrier"); } return 0; diff --git a/src/cell/append_tree.cc b/src/cell/append_tree.cc index e284ef42..4511ec88 100644 --- a/src/cell/append_tree.cc +++ b/src/cell/append_tree.cc @@ -295,27 +295,29 @@ namespace neuroh5 herr_t status=0; size_t io_size=0; - size_t rank, size; + size_t rank=0, size=0; throw_assert_nomsg(MPI_Comm_size(comm, (int*)&size) == MPI_SUCCESS); throw_assert_nomsg(MPI_Comm_rank(comm, (int*)&rank) == MPI_SUCCESS); - size_t io_comm_size; - throw_assert_nomsg(MPI_Comm_size(io_comm, (int*)&io_comm_size) == MPI_SUCCESS); - - - bool is_io_rank = false; - if (io_rank_set.find(rank) != io_rank_set.end()) - is_io_rank = true; throw_assert(io_rank_set.size() > 0, "invalid I/O rank set"); + bool is_io_rank = (io_rank_set.find(rank) != io_rank_set.end()); io_size = io_rank_set.size(); - throw_assert(io_comm_size == io_size, "mismatch between io_size and io_comm size"); + + size_t io_comm_size=0; + if (is_io_rank) + { + throw_assert_nomsg(MPI_Comm_size(io_comm, (int*)&io_comm_size) == MPI_SUCCESS); + throw_assert(io_comm_size == io_size, "mismatch between io_size and io_comm size"); + }; vector< pair > ranges; mpi::rank_ranges(size, io_size, ranges); // Determine I/O ranks to which to send the values - vector io_dests(size); - for (size_t r=0; r io_dests; + io_dests.resize(size); + + for (rank_t r=0; r=0; i--) { @@ -326,13 +328,13 @@ namespace neuroh5 } } } - + std::forward_list local_tree_list; { std::vector sendbuf; std::vector sendcounts, sdispls; - vector recvcounts, rdispls; - vector recvbuf; + std::vector recvcounts, rdispls; + std::vector recvbuf; rank_t dst_rank = io_dests[rank]; map > rank_tree_map; @@ -359,8 +361,7 @@ namespace neuroh5 data::deserialize_rank_tree_list (size, recvbuf, recvcounts, rdispls, local_tree_list); } - - + std::vector sec_ptr; std::vector topo_ptr; std::vector attr_ptr; @@ -374,28 +375,31 @@ namespace neuroh5 std::vector all_parents; // Parent std::vector all_swc_types; // SWC Types - if (ptr_type.type == PtrNone) - { - status = build_singleton_tree_datasets(comm, - local_tree_list, - all_src_vector, all_dst_vector, - all_xcoords, all_ycoords, all_zcoords, - all_radiuses, all_layers, all_sections, - all_parents, all_swc_types); - } - else + if (is_io_rank) { - status = build_tree_datasets(comm, - local_tree_list, - sec_ptr, topo_ptr, attr_ptr, - all_index_vector, all_src_vector, all_dst_vector, - all_xcoords, all_ycoords, all_zcoords, - all_radiuses, all_layers, all_sections, - all_parents, all_swc_types); - throw_assert_nomsg(status >= 0); + if (ptr_type.type == PtrNone) + { + status = build_singleton_tree_datasets(io_comm, + local_tree_list, + all_src_vector, all_dst_vector, + all_xcoords, all_ycoords, all_zcoords, + all_radiuses, all_layers, all_sections, + all_parents, all_swc_types); + } + else + { + status = build_tree_datasets(io_comm, + local_tree_list, + sec_ptr, topo_ptr, attr_ptr, + all_index_vector, all_src_vector, all_dst_vector, + all_xcoords, all_ycoords, all_zcoords, + all_radiuses, all_layers, all_sections, + all_parents, all_swc_types); + throw_assert_nomsg(status >= 0); + } + + local_tree_list.clear(); } - - local_tree_list.clear(); const data::optional_hid dflt_data_type; @@ -500,17 +504,16 @@ namespace neuroh5 { herr_t status; - size_t rank, size; + size_t rank=0, size=0; throw_assert_nomsg(MPI_Comm_size(comm, (int*)&size) == MPI_SUCCESS); throw_assert_nomsg(MPI_Comm_rank(comm, (int*)&rank) == MPI_SUCCESS); set io_rank_set; data::range_sample(size, io_size, io_rank_set); + throw_assert(io_rank_set.size() == io_size, "invalid I/O rank set"); + + bool is_io_rank = (io_rank_set.find(rank) != io_rank_set.end()); - bool is_io_rank = false; - if (io_rank_set.find(rank) != io_rank_set.end()) - is_io_rank = true; - // MPI Communicator for I/O ranks MPI_Comm io_comm; // MPI group color value used for I/O ranks @@ -523,8 +526,9 @@ namespace neuroh5 } else { - color = 0; + color = MPI_UNDEFINED; } + MPI_Comm_split(comm,color,rank,&io_comm); MPI_Comm_set_errhandler(io_comm, MPI_ERRORS_RETURN); @@ -543,16 +547,23 @@ namespace neuroh5 throw_assert(MPI_Barrier(comm) == MPI_SUCCESS, "error in MPI_Barrier"); - hid_t file = hdf5::open_file(io_comm, file_name, true, true); + hid_t file; + if (is_io_rank) + { + file = hdf5::open_file(io_comm, file_name, true, true); + } status = append_trees(comm, io_comm, file, pop_name, pop_start, tree_list, io_rank_set, CellPtr(PtrOwner), chunk_size, value_chunk_size); throw_assert_nomsg(status >= 0); - hdf5::close_file(file); + if (is_io_rank) + { + hdf5::close_file(file); - throw_assert(MPI_Comm_free(&io_comm) == MPI_SUCCESS, - "append_trees: error in MPI_Comm_free"); + throw_assert(MPI_Comm_free(&io_comm) == MPI_SUCCESS, + "append_trees: error in MPI_Comm_free"); + } return 0; } diff --git a/src/cell/cell_attributes.cc b/src/cell/cell_attributes.cc index a0c9526f..dd50aa14 100644 --- a/src/cell/cell_attributes.cc +++ b/src/cell/cell_attributes.cc @@ -968,9 +968,7 @@ namespace neuroh5 set io_rank_set; data::range_sample(size, io_size, io_rank_set); - bool is_io_rank = false; - if (io_rank_set.find(rank) != io_rank_set.end()) - is_io_rank = true; + bool is_io_rank = (io_rank_set.find(rank) != io_rank_set.end()); if (is_io_rank) { @@ -1637,9 +1635,7 @@ namespace neuroh5 set io_rank_set; data::range_sample(size, io_data_size, io_rank_set); - bool is_io_rank = false; - if (io_rank_set.find(rank) != io_rank_set.end()) - is_io_rank = true; + bool is_io_rank = (io_rank_set.find(rank) != io_rank_set.end()); // I/O rank with lowest data rank size_t io_root_data_rank = *(io_rank_set.begin()); @@ -1830,9 +1826,7 @@ namespace neuroh5 set io_rank_set; data::range_sample(size, io_size_value, io_rank_set); - bool is_io_rank = false; - if (io_rank_set.find(rank) != io_rank_set.end()) - is_io_rank = true; + bool is_io_rank = (io_rank_set.find(rank) != io_rank_set.end()); throw_assert(io_rank_set.size() > 0, "invalid I/O rank set"); diff --git a/src/cell/scatter_read_tree.cc b/src/cell/scatter_read_tree.cc index 15519268..0c54ec52 100644 --- a/src/cell/scatter_read_tree.cc +++ b/src/cell/scatter_read_tree.cc @@ -82,11 +82,7 @@ namespace neuroh5 set io_rank_set; data::range_sample(size, io_size, io_rank_set); - bool is_io_rank = false; - if (io_rank_set.find(rank) != io_rank_set.end()) - { - is_io_rank = true; - } + bool is_io_rank = (io_rank_set.find(rank) != io_rank_set.end()); // Am I an I/O rank? if (is_io_rank) diff --git a/src/graph/append_graph.cc b/src/graph/append_graph.cc index baaac250..7e4c843e 100644 --- a/src/graph/append_graph.cc +++ b/src/graph/append_graph.cc @@ -147,9 +147,7 @@ namespace neuroh5 set io_rank_set; data::range_sample(size, io_size, io_rank_set); - bool is_io_rank = false; - if (io_rank_set.find(rank) != io_rank_set.end()) - is_io_rank = true; + bool is_io_rank = (io_rank_set.find(rank) != io_rank_set.end()); // A vector that maps nodes to compute ranks map< NODE_IDX_T, rank_t > node_rank_map; diff --git a/src/graph/scatter_read_projection.cc b/src/graph/scatter_read_projection.cc index 16647b68..1d6487d2 100644 --- a/src/graph/scatter_read_projection.cc +++ b/src/graph/scatter_read_projection.cc @@ -71,9 +71,7 @@ namespace neuroh5 set io_rank_set; data::range_sample(size, io_size, io_rank_set); - bool is_io_rank = false; - if (io_rank_set.find(rank) != io_rank_set.end()) - is_io_rank = true; + bool is_io_rank = (io_rank_set.find(rank) != io_rank_set.end()); size_t io_rank_root = 0; if (io_rank_set.size() > 0) diff --git a/src/graph/scatter_read_projection_selection.cc b/src/graph/scatter_read_projection_selection.cc index 786cedbb..bb2c677d 100644 --- a/src/graph/scatter_read_projection_selection.cc +++ b/src/graph/scatter_read_projection_selection.cc @@ -73,9 +73,7 @@ namespace neuroh5 set io_rank_set; data::range_sample(size, io_size, io_rank_set); - bool is_io_rank = false; - if (io_rank_set.find(rank) != io_rank_set.end()) - is_io_rank = true; + bool is_io_rank = (io_rank_set.find(rank) != io_rank_set.end()); // Am I an I/O rank? if (is_io_rank) diff --git a/tests/test_bcast_gj.py b/tests/test_bcast_gj.py index cb1fbd5b..fc10cd58 100644 --- a/tests/test_bcast_gj.py +++ b/tests/test_bcast_gj.py @@ -1,16 +1,15 @@ from mpi4py import MPI -from neuroh5.io import scatter_read_graph, bcast_graph +from neuroh5.io import bcast_graph import numpy as np comm = MPI.COMM_WORLD -print "rank = ", comm.Get_rank() -print "size = ", comm.Get_size() +gapjunctions_file_path="/scratch1/03320/iraikov/striped2/dentate/Full_Scale_Control/DG_gapjunctions_20230114.h5" -(g, a) = bcast_graph("/oasis/scratch/comet/iraikov/temp_project/dentate/Full_Scale_Control/dentate_Full_Scale_Control_gapjunctions.h5", - attributes=True) +(graph, a) = bcast_graph(gapjunctions_file_path, + namespaces=['Coupling strength', 'Location'], + comm=comm) -if comm.Get_rank() == 0: - print a - print g.keys() +if comm.rank == 0: + print(graph['AAC']) From 767b08e3817fbf6e583521128c9d426db58eb3e4 Mon Sep 17 00:00:00 2001 From: Ivan Raikov Date: Fri, 7 Jul 2023 13:04:26 -0500 Subject: [PATCH 4/4] pass chunk_size argument to append_edge_attributes --- include/graph/edge_attributes.hh | 6 ++++-- src/graph/append_projection.cc | 14 +++++++------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/include/graph/edge_attributes.hh b/include/graph/edge_attributes.hh index 070b14f5..66e59d94 100644 --- a/include/graph/edge_attributes.hh +++ b/include/graph/edge_attributes.hh @@ -348,7 +348,8 @@ namespace neuroh5 const string &src_pop_name, const string &dst_pop_name, const map & edge_attr_map, - const std::map >& edge_attr_index) + const std::map >& edge_attr_index, + const size_t chunk_size = 4000) { for (auto const& iter : edge_attr_map) @@ -367,7 +368,8 @@ namespace neuroh5 size_t i = attr_index.attr_index(attr_name); graph::append_edge_attribute(comm, file, src_pop_name, dst_pop_name, attr_namespace, attr_name, - edge_attr_values.attr_vec(i)); + edge_attr_values.attr_vec(i), + chunk_size); } } else diff --git a/src/graph/append_projection.cc b/src/graph/append_projection.cc index 56ac65e0..255fbbcb 100644 --- a/src/graph/append_projection.cc +++ b/src/graph/append_projection.cc @@ -789,19 +789,19 @@ namespace neuroh5 throw_assert_nomsg(MPI_Barrier(comm) == MPI_SUCCESS); append_edge_attribute_map(comm, file, src_pop_name, dst_pop_name, - edge_attr_map, edge_attr_index); + edge_attr_map, edge_attr_index, chunk_size); append_edge_attribute_map(comm, file, src_pop_name, dst_pop_name, - edge_attr_map, edge_attr_index); + edge_attr_map, edge_attr_index, chunk_size); append_edge_attribute_map(comm, file, src_pop_name, dst_pop_name, - edge_attr_map, edge_attr_index); + edge_attr_map, edge_attr_index, chunk_size); append_edge_attribute_map(comm, file, src_pop_name, dst_pop_name, - edge_attr_map, edge_attr_index); + edge_attr_map, edge_attr_index, chunk_size); append_edge_attribute_map(comm, file, src_pop_name, dst_pop_name, - edge_attr_map, edge_attr_index); + edge_attr_map, edge_attr_index, chunk_size); append_edge_attribute_map(comm, file, src_pop_name, dst_pop_name, - edge_attr_map, edge_attr_index); + edge_attr_map, edge_attr_index, chunk_size); append_edge_attribute_map(comm, file, src_pop_name, dst_pop_name, - edge_attr_map, edge_attr_index); + edge_attr_map, edge_attr_index, chunk_size); // clean-up throw_assert_nomsg(MPI_Barrier(comm) == MPI_SUCCESS);