From d55a0b2325e392eef6283d11d16e9f511f6e6b6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 13 Oct 2023 14:40:40 +0200 Subject: [PATCH] Replace Regexes with Globbing TODO: Since meshes/particles can no longer be directly addressed with this, maybe adapt the class hierarchy to disallow mixed groups that contain meshes, particles, groups and datasets at the same time. Only maybe though.. --- include/openPMD/CustomHierarchy.hpp | 15 +-- src/CustomHierarchy.cpp | 165 +++++++++++++++++----------- test/CoreTest.cpp | 6 +- 3 files changed, 109 insertions(+), 77 deletions(-) diff --git a/include/openPMD/CustomHierarchy.hpp b/include/openPMD/CustomHierarchy.hpp index b238ff0efe..10ad959b13 100644 --- a/include/openPMD/CustomHierarchy.hpp +++ b/include/openPMD/CustomHierarchy.hpp @@ -65,15 +65,12 @@ namespace internal std::vector const &particles); MeshesParticlesPath(Series const &); - [[nodiscard]] ContainedType determineType( - std::vector const &path, - std::string const &name) const; - [[nodiscard]] bool isParticle( - std::vector const &path, - std::string const &name) const; - [[nodiscard]] bool isMesh( - std::vector const &path, - std::string const &name) const; + [[nodiscard]] ContainedType + determineType(std::vector const &path) const; + [[nodiscard]] bool + isParticleContainer(std::vector const &path) const; + [[nodiscard]] bool + isMeshContainer(std::vector const &path) const; }; struct CustomHierarchyData : ContainerData diff --git a/src/CustomHierarchy.cpp b/src/CustomHierarchy.cpp index fb790ce538..d6d621d755 100644 --- a/src/CustomHierarchy.cpp +++ b/src/CustomHierarchy.cpp @@ -43,11 +43,18 @@ #include #include #include +#include #include #include #include #include +// @todo add handselected choice of [:punct:] characters to this +// using a macro here to make string interpolation simpler +#define OPENPMD_LEGAL_IDENTIFIER_CHARS "[:alnum:]_" +#define OPENPMD_SINGLE_GLOBBING_CHAR "%" +#define OPENPMD_DOUBLE_GLOBBING_CHAR "%%" + namespace { template @@ -84,7 +91,9 @@ void setDefaultMeshesParticlesPath( std::vector const &particles, OutParam &writeTarget) { - std::regex is_default_path_specification("[[:alnum:]_]+/", regex_flags); + std::regex is_default_path_specification( + "[" OPENPMD_LEGAL_IDENTIFIER_CHARS "]+/", + regex_flags | std::regex_constants::optimize); constexpr char const *default_default_mesh = "meshes"; constexpr char const *default_default_particle = "particles"; for (auto [vec, defaultPath, default_default] : @@ -116,42 +125,10 @@ void setDefaultMeshesParticlesPath( } } -bool anyPathRegexMatches( - std::regex regex, - std::vector const &path, - std::string const &name) +bool anyPathRegexMatches(std::regex regex, std::vector const &path) { - /* - * /group/meshes/E is a mesh if the meshes path contains: - * - * 1) '/group/meshes/' (absolute path to mesh container) - * 2) '/group/meshes/E' (absolute path to mesh itself) - * 3) 'meshes/' (relative path to mesh container) - * - * The potential fourth option 'E' (relative path to mesh itself) - * is not supported. ("Anything that is named 'E' is a mesh" is not - * really a semantic that we want to explicitly support.) - * '/' is never a valid meshes path. - * - * All this analogously for particles path. - */ - std::vector pathsToMatch = { - /* option 2) from above */ - "/" + (path.empty() ? "" : concatWithSep(path, "/") + "/") + name}; - if (!path.empty()) - { - // option 1) from above - pathsToMatch.emplace_back("/" + concatWithSep(path, "/") + "/"); - - // option 3 from above - pathsToMatch.emplace_back(*path.rbegin() + "/"); - } - return std::any_of( - pathsToMatch.begin(), - pathsToMatch.end(), - [®ex](std::string const &candidate_path) { - return std::regex_match(candidate_path, regex); - }); + std::string pathToMatch = '/' + concatWithSep(path, "/") + '/'; + return std::regex_match(pathToMatch, regex); } } // namespace @@ -159,28 +136,83 @@ namespace openPMD { namespace internal { + namespace + { + std::string globToRegexLongForm(std::string const &glob) + { + return auxiliary::replace_all( + auxiliary::replace_all( + glob, + OPENPMD_DOUBLE_GLOBBING_CHAR, + "([" OPENPMD_LEGAL_IDENTIFIER_CHARS "/]*)"), + OPENPMD_SINGLE_GLOBBING_CHAR, + "([" OPENPMD_LEGAL_IDENTIFIER_CHARS "]*)"); + } + + std::string globToRegexShortForm(std::string const &glob) + { + return "[" OPENPMD_LEGAL_IDENTIFIER_CHARS "/]*/" + glob; + } + } // namespace + MeshesParticlesPath::MeshesParticlesPath( std::vector const &meshes, std::vector const &particles) { - std::regex is_default_path_specification("[[:alnum:]_]+/", regex_flags); + /* + * /group/meshes/E is a mesh if the meshes path contains: + * + * 1) '/group/meshes/' (absolute path to mesh container) + * 2) 'meshes/' (relative path to mesh container) + * + * All this analogously for particles path. + */ + + // regex for detecting option 1) + // e.g. '/path/to/meshes/': The path to the meshes. Mandatory slashes at + // beginning and end, possibly slashes in + // between. Mandatory slash at beginning might + // be replaced with '%%' to enable paths like + // '%%/path/to/meshes'. + // resolves to: `(/|%%)[[:alnum:]_%/]+/` + std::regex is_legal_long_path_specification( + "(/|" OPENPMD_DOUBLE_GLOBBING_CHAR + ")[" OPENPMD_LEGAL_IDENTIFIER_CHARS OPENPMD_SINGLE_GLOBBING_CHAR + "/]+/", + regex_flags | std::regex_constants::optimize); + + // Regex for detecting option 2) + // e.g. 'meshes/': The name without path. One single mandatory slash + // at the end, no slashes otherwise. + // resolves to `[[:alnum:]_]+/` + std::regex is_legal_short_path_specification( + "[" OPENPMD_LEGAL_IDENTIFIER_CHARS "]+/", + regex_flags | std::regex_constants::optimize); + for (auto [target_regex, vec] : {std::make_tuple(&this->meshRegex, &meshes), std::make_tuple(&this->particleRegex, &particles)}) { - if (vec->empty()) - { - *target_regex = std::regex( - /* does not match anything */ "a^", - regex_flags | std::regex_constants::optimize); - continue; - } - auto begin = vec->begin(); std::stringstream build_regex; - build_regex << '(' << *begin++ << ')'; - for (; begin != vec->end(); ++begin) + // neutral element: empty language, regex doesn't match anything + build_regex << "(a^)"; + for (auto const &entry : *vec) { - build_regex << "|(" << *begin << ')'; + if (std::regex_match(entry, is_legal_short_path_specification)) + { + build_regex << "|(" << globToRegexShortForm(entry) << ')'; + } + else if (std::regex_match( + entry, is_legal_long_path_specification)) + { + build_regex << "|(" << globToRegexLongForm(entry) << ')'; + } + else + { + std::cerr + << "[WARNING] Not a legal meshes-/particles-path: '" + << entry << "'. Will skip." << std::endl; + } } auto regex_string = build_regex.str(); // std::cout << "Using regex string: " << regex_string << std::endl; @@ -192,13 +224,13 @@ namespace internal } ContainedType MeshesParticlesPath::determineType( - std::vector const &path, std::string const &name) const + std::vector const &path) const { - if (isMesh(path, name)) + if (isMeshContainer(path)) { return ContainedType::Mesh; } - else if (isParticle(path, name)) + else if (isParticleContainer(path)) { return ContainedType::Particle; } @@ -208,15 +240,15 @@ namespace internal } } - bool MeshesParticlesPath::isParticle( - std::vector const &path, std::string const &name) const + bool MeshesParticlesPath::isParticleContainer( + std::vector const &path) const { - return anyPathRegexMatches(particleRegex, path, name); + return anyPathRegexMatches(particleRegex, path); } - bool MeshesParticlesPath::isMesh( - std::vector const &path, std::string const &name) const + bool MeshesParticlesPath::isMeshContainer( + std::vector const &path) const { - return anyPathRegexMatches(meshRegex, path, name); + return anyPathRegexMatches(meshRegex, path); } CustomHierarchyData::CustomHierarchyData() @@ -367,7 +399,7 @@ void CustomHierarchy::read( EraseStaleParticles particlesMap(data.m_embeddedParticles); for (auto const &path : *pList.paths) { - switch (mpp.determineType(currentPath, path)) + switch (mpp.determineType(currentPath)) { case internal::ContainedType::Group: { Parameter pOpen; @@ -433,7 +465,7 @@ void CustomHierarchy::read( } for (auto const &path : *dList.datasets) { - switch (mpp.determineType(currentPath, path)) + switch (mpp.determineType(currentPath)) { // Group is a bit of an internal misnomer here, it just means that // it matches neither meshes nor particles path @@ -528,7 +560,7 @@ void CustomHierarchy::flush_internal( } for (auto &[name, mesh] : data.m_embeddedMeshes) { - if (!mpp.isMesh(currentPath, name)) + if (!mpp.isMeshContainer(currentPath)) { std::string extend_meshes_path; // Check if this can be covered by shorthand notation @@ -544,8 +576,7 @@ void CustomHierarchy::flush_internal( extend_meshes_path = "/" + (currentPath.empty() ? "" - : concatWithSep(currentPath, "/") + "/") + - name; + : concatWithSep(currentPath, "/") + "/"); } mpp.collectNewMeshesPaths.emplace(std::move(extend_meshes_path)); } @@ -553,7 +584,7 @@ void CustomHierarchy::flush_internal( } for (auto &[name, particleSpecies] : data.m_embeddedParticles) { - if (!mpp.isParticle(currentPath, name)) + if (!mpp.isParticleContainer(currentPath)) { std::string extend_particles_path; if (!currentPath.empty() && @@ -569,8 +600,8 @@ void CustomHierarchy::flush_internal( extend_particles_path = "/" + (currentPath.empty() ? "" - : concatWithSep(currentPath, "/") + "/") + - name; + : concatWithSep(currentPath, "/") + "/"); + ; } mpp.collectNewParticlesPaths.emplace( std::move(extend_particles_path)); @@ -829,3 +860,7 @@ Series &CustomHierarchy::getBufferedSeries() return *data.m_bufferedSeries; } } // namespace openPMD + +#undef OPENPMD_LEGAL_IDENTIFIER_CHARS +#undef OPENPMD_SINGLE_GLOBBING_CHAR +#undef OPENPMD_DOUBLE_GLOBBING_CHAR diff --git a/test/CoreTest.cpp b/test/CoreTest.cpp index db31f25e1c..a1a617feaf 100644 --- a/test/CoreTest.cpp +++ b/test/CoreTest.cpp @@ -212,7 +212,7 @@ TEST_CASE("custom_hierarchies", "[core]") auto meshesViaAlias = write.iterations[0].meshes; meshesViaAlias["E"]["x"].makeEmpty(2); - write.setMeshesPath(std::vector{"fields/", ".*/meshes/"}); + write.setMeshesPath(std::vector{"fields/", "%%/meshes/"}); auto meshesManually = write.iterations[0]["fields"].asContainerOf(); REQUIRE(meshesManually.contains("E")); @@ -341,9 +341,9 @@ TEST_CASE("custom_hierarchies", "[core]") TEST_CASE("custom_hierarchies_no_rw", "[core]") { - std::string filePath = "../samples/custom_hierarchies_no_rw.json"; + std::string filePath = "../samples/custom_hierarchies_no_rw.bp"; Series write(filePath, Access::CREATE); - write.setMeshesPath(std::vector{".*/meshes/"}); + write.setMeshesPath(std::vector{"%%/meshes/"}); write.iterations[0]["custom"]["hierarchy"]; write.iterations[0]["custom"].setAttribute("string", "attribute"); write.iterations[0]["custom"]["hierarchy"].setAttribute("number", 3);