diff --git a/include/openPMD/CustomHierarchy.hpp b/include/openPMD/CustomHierarchy.hpp index b238ff0efe..10ad959b13 100644 --- a/include/openPMD/CustomHierarchy.hpp +++ b/include/openPMD/CustomHierarchy.hpp @@ -65,15 +65,12 @@ namespace internal std::vector const &particles); MeshesParticlesPath(Series const &); - [[nodiscard]] ContainedType determineType( - std::vector const &path, - std::string const &name) const; - [[nodiscard]] bool isParticle( - std::vector const &path, - std::string const &name) const; - [[nodiscard]] bool isMesh( - std::vector const &path, - std::string const &name) const; + [[nodiscard]] ContainedType + determineType(std::vector const &path) const; + [[nodiscard]] bool + isParticleContainer(std::vector const &path) const; + [[nodiscard]] bool + isMeshContainer(std::vector const &path) const; }; struct CustomHierarchyData : ContainerData diff --git a/src/CustomHierarchy.cpp b/src/CustomHierarchy.cpp index fb790ce538..d6d621d755 100644 --- a/src/CustomHierarchy.cpp +++ b/src/CustomHierarchy.cpp @@ -43,11 +43,18 @@ #include #include #include +#include #include #include #include #include +// @todo add handselected choice of [:punct:] characters to this +// using a macro here to make string interpolation simpler +#define OPENPMD_LEGAL_IDENTIFIER_CHARS "[:alnum:]_" +#define OPENPMD_SINGLE_GLOBBING_CHAR "%" +#define OPENPMD_DOUBLE_GLOBBING_CHAR "%%" + namespace { template @@ -84,7 +91,9 @@ void setDefaultMeshesParticlesPath( std::vector const &particles, OutParam &writeTarget) { - std::regex is_default_path_specification("[[:alnum:]_]+/", regex_flags); + std::regex is_default_path_specification( + "[" OPENPMD_LEGAL_IDENTIFIER_CHARS "]+/", + regex_flags | std::regex_constants::optimize); constexpr char const *default_default_mesh = "meshes"; constexpr char const *default_default_particle = "particles"; for (auto [vec, defaultPath, default_default] : @@ -116,42 +125,10 @@ void setDefaultMeshesParticlesPath( } } -bool anyPathRegexMatches( - std::regex regex, - std::vector const &path, - std::string const &name) +bool anyPathRegexMatches(std::regex regex, std::vector const &path) { - /* - * /group/meshes/E is a mesh if the meshes path contains: - * - * 1) '/group/meshes/' (absolute path to mesh container) - * 2) '/group/meshes/E' (absolute path to mesh itself) - * 3) 'meshes/' (relative path to mesh container) - * - * The potential fourth option 'E' (relative path to mesh itself) - * is not supported. ("Anything that is named 'E' is a mesh" is not - * really a semantic that we want to explicitly support.) - * '/' is never a valid meshes path. - * - * All this analogously for particles path. - */ - std::vector pathsToMatch = { - /* option 2) from above */ - "/" + (path.empty() ? "" : concatWithSep(path, "/") + "/") + name}; - if (!path.empty()) - { - // option 1) from above - pathsToMatch.emplace_back("/" + concatWithSep(path, "/") + "/"); - - // option 3 from above - pathsToMatch.emplace_back(*path.rbegin() + "/"); - } - return std::any_of( - pathsToMatch.begin(), - pathsToMatch.end(), - [®ex](std::string const &candidate_path) { - return std::regex_match(candidate_path, regex); - }); + std::string pathToMatch = '/' + concatWithSep(path, "/") + '/'; + return std::regex_match(pathToMatch, regex); } } // namespace @@ -159,28 +136,83 @@ namespace openPMD { namespace internal { + namespace + { + std::string globToRegexLongForm(std::string const &glob) + { + return auxiliary::replace_all( + auxiliary::replace_all( + glob, + OPENPMD_DOUBLE_GLOBBING_CHAR, + "([" OPENPMD_LEGAL_IDENTIFIER_CHARS "/]*)"), + OPENPMD_SINGLE_GLOBBING_CHAR, + "([" OPENPMD_LEGAL_IDENTIFIER_CHARS "]*)"); + } + + std::string globToRegexShortForm(std::string const &glob) + { + return "[" OPENPMD_LEGAL_IDENTIFIER_CHARS "/]*/" + glob; + } + } // namespace + MeshesParticlesPath::MeshesParticlesPath( std::vector const &meshes, std::vector const &particles) { - std::regex is_default_path_specification("[[:alnum:]_]+/", regex_flags); + /* + * /group/meshes/E is a mesh if the meshes path contains: + * + * 1) '/group/meshes/' (absolute path to mesh container) + * 2) 'meshes/' (relative path to mesh container) + * + * All this analogously for particles path. + */ + + // regex for detecting option 1) + // e.g. '/path/to/meshes/': The path to the meshes. Mandatory slashes at + // beginning and end, possibly slashes in + // between. Mandatory slash at beginning might + // be replaced with '%%' to enable paths like + // '%%/path/to/meshes'. + // resolves to: `(/|%%)[[:alnum:]_%/]+/` + std::regex is_legal_long_path_specification( + "(/|" OPENPMD_DOUBLE_GLOBBING_CHAR + ")[" OPENPMD_LEGAL_IDENTIFIER_CHARS OPENPMD_SINGLE_GLOBBING_CHAR + "/]+/", + regex_flags | std::regex_constants::optimize); + + // Regex for detecting option 2) + // e.g. 'meshes/': The name without path. One single mandatory slash + // at the end, no slashes otherwise. + // resolves to `[[:alnum:]_]+/` + std::regex is_legal_short_path_specification( + "[" OPENPMD_LEGAL_IDENTIFIER_CHARS "]+/", + regex_flags | std::regex_constants::optimize); + for (auto [target_regex, vec] : {std::make_tuple(&this->meshRegex, &meshes), std::make_tuple(&this->particleRegex, &particles)}) { - if (vec->empty()) - { - *target_regex = std::regex( - /* does not match anything */ "a^", - regex_flags | std::regex_constants::optimize); - continue; - } - auto begin = vec->begin(); std::stringstream build_regex; - build_regex << '(' << *begin++ << ')'; - for (; begin != vec->end(); ++begin) + // neutral element: empty language, regex doesn't match anything + build_regex << "(a^)"; + for (auto const &entry : *vec) { - build_regex << "|(" << *begin << ')'; + if (std::regex_match(entry, is_legal_short_path_specification)) + { + build_regex << "|(" << globToRegexShortForm(entry) << ')'; + } + else if (std::regex_match( + entry, is_legal_long_path_specification)) + { + build_regex << "|(" << globToRegexLongForm(entry) << ')'; + } + else + { + std::cerr + << "[WARNING] Not a legal meshes-/particles-path: '" + << entry << "'. Will skip." << std::endl; + } } auto regex_string = build_regex.str(); // std::cout << "Using regex string: " << regex_string << std::endl; @@ -192,13 +224,13 @@ namespace internal } ContainedType MeshesParticlesPath::determineType( - std::vector const &path, std::string const &name) const + std::vector const &path) const { - if (isMesh(path, name)) + if (isMeshContainer(path)) { return ContainedType::Mesh; } - else if (isParticle(path, name)) + else if (isParticleContainer(path)) { return ContainedType::Particle; } @@ -208,15 +240,15 @@ namespace internal } } - bool MeshesParticlesPath::isParticle( - std::vector const &path, std::string const &name) const + bool MeshesParticlesPath::isParticleContainer( + std::vector const &path) const { - return anyPathRegexMatches(particleRegex, path, name); + return anyPathRegexMatches(particleRegex, path); } - bool MeshesParticlesPath::isMesh( - std::vector const &path, std::string const &name) const + bool MeshesParticlesPath::isMeshContainer( + std::vector const &path) const { - return anyPathRegexMatches(meshRegex, path, name); + return anyPathRegexMatches(meshRegex, path); } CustomHierarchyData::CustomHierarchyData() @@ -367,7 +399,7 @@ void CustomHierarchy::read( EraseStaleParticles particlesMap(data.m_embeddedParticles); for (auto const &path : *pList.paths) { - switch (mpp.determineType(currentPath, path)) + switch (mpp.determineType(currentPath)) { case internal::ContainedType::Group: { Parameter pOpen; @@ -433,7 +465,7 @@ void CustomHierarchy::read( } for (auto const &path : *dList.datasets) { - switch (mpp.determineType(currentPath, path)) + switch (mpp.determineType(currentPath)) { // Group is a bit of an internal misnomer here, it just means that // it matches neither meshes nor particles path @@ -528,7 +560,7 @@ void CustomHierarchy::flush_internal( } for (auto &[name, mesh] : data.m_embeddedMeshes) { - if (!mpp.isMesh(currentPath, name)) + if (!mpp.isMeshContainer(currentPath)) { std::string extend_meshes_path; // Check if this can be covered by shorthand notation @@ -544,8 +576,7 @@ void CustomHierarchy::flush_internal( extend_meshes_path = "/" + (currentPath.empty() ? "" - : concatWithSep(currentPath, "/") + "/") + - name; + : concatWithSep(currentPath, "/") + "/"); } mpp.collectNewMeshesPaths.emplace(std::move(extend_meshes_path)); } @@ -553,7 +584,7 @@ void CustomHierarchy::flush_internal( } for (auto &[name, particleSpecies] : data.m_embeddedParticles) { - if (!mpp.isParticle(currentPath, name)) + if (!mpp.isParticleContainer(currentPath)) { std::string extend_particles_path; if (!currentPath.empty() && @@ -569,8 +600,8 @@ void CustomHierarchy::flush_internal( extend_particles_path = "/" + (currentPath.empty() ? "" - : concatWithSep(currentPath, "/") + "/") + - name; + : concatWithSep(currentPath, "/") + "/"); + ; } mpp.collectNewParticlesPaths.emplace( std::move(extend_particles_path)); @@ -829,3 +860,7 @@ Series &CustomHierarchy::getBufferedSeries() return *data.m_bufferedSeries; } } // namespace openPMD + +#undef OPENPMD_LEGAL_IDENTIFIER_CHARS +#undef OPENPMD_SINGLE_GLOBBING_CHAR +#undef OPENPMD_DOUBLE_GLOBBING_CHAR diff --git a/test/CoreTest.cpp b/test/CoreTest.cpp index db31f25e1c..a1a617feaf 100644 --- a/test/CoreTest.cpp +++ b/test/CoreTest.cpp @@ -212,7 +212,7 @@ TEST_CASE("custom_hierarchies", "[core]") auto meshesViaAlias = write.iterations[0].meshes; meshesViaAlias["E"]["x"].makeEmpty(2); - write.setMeshesPath(std::vector{"fields/", ".*/meshes/"}); + write.setMeshesPath(std::vector{"fields/", "%%/meshes/"}); auto meshesManually = write.iterations[0]["fields"].asContainerOf(); REQUIRE(meshesManually.contains("E")); @@ -341,9 +341,9 @@ TEST_CASE("custom_hierarchies", "[core]") TEST_CASE("custom_hierarchies_no_rw", "[core]") { - std::string filePath = "../samples/custom_hierarchies_no_rw.json"; + std::string filePath = "../samples/custom_hierarchies_no_rw.bp"; Series write(filePath, Access::CREATE); - write.setMeshesPath(std::vector{".*/meshes/"}); + write.setMeshesPath(std::vector{"%%/meshes/"}); write.iterations[0]["custom"]["hierarchy"]; write.iterations[0]["custom"].setAttribute("string", "attribute"); write.iterations[0]["custom"]["hierarchy"].setAttribute("number", 3);