Skip to content

Commit

Permalink
WIP: Add support for reading OpenPMD files
Browse files Browse the repository at this point in the history
Co-authored-by: Krishiv Bhatia <[email protected]>
Co-authored-by: Gabriele Bozzola <[email protected]>"
  • Loading branch information
krishivbhatia authored and Sbozzolo committed May 26, 2024
1 parent 970c574 commit 11e75ef
Show file tree
Hide file tree
Showing 5 changed files with 575 additions and 617 deletions.
2 changes: 1 addition & 1 deletion docs/features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ Units
Grid Data
---------

- Read 1D, 2D, and 3D ASCII and HDF5 files as ``HierarchicalGridData``, which supports:
- Read 1D, 2D, and 3D ASCII, HDF5 and OpenPMD files as ``HierarchicalGridData``, which supports:

- working with multiple components and refinement levels;
- handling ghost-zones;
Expand Down
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ For a full list of available features, see the :doc:`features page <features>`.
(:py:mod:`~.tensor`).
- Work with 1D, 2D, and 3D grid functions (:py:mod:`~.grid_data`,
:py:mod:`~.cactus_grid_functions`) as output by ``CarpetIOHDF5`` or
``CarpetIOASCII``.
``CarpetIOASCII``, or in OpenPMD format.
- Work with horizon data from (:py:mod:`~.cactus_horizons`) as output by
``QuasiLocalMeasures`` and ``AHFinderDirect``.
- Perform common operations with horizons (:py:mod:`~.hor_utils`).
Expand Down
199 changes: 102 additions & 97 deletions kuibit/cactus_grid_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,7 +1004,7 @@ def _parse_file(self, path):
# file. We keep a collection of all these in the variable self.alldata
try:
with h5py.File(path, "r") as f:
for group in f.keys():
for group in f:
matched = self.rx_group_name.match(group)
# If this is not an interesting group, just skip it
if not matched:
Expand Down Expand Up @@ -1246,7 +1246,7 @@ def clear_cache(self):
for filename, file_reader in self.alldata.items():
for iteration, iteration_reader in file_reader.items():
for ref_level, ref_level_reader in iteration_reader.items():
for component in ref_level_reader.keys():
for component in ref_level_reader:
self.alldata[filename][iteration][ref_level][
component
] = None
Expand Down Expand Up @@ -1301,37 +1301,39 @@ class OneGridFunctionOpenPMD(BaseOneGridFunction):
This class is derived from :py:class:`~.BaseOneGridFunction` and implements
the reading facilities.
"""

# This class implements the details on how to read the data for OpenPMD
# files, most of the functionalities of the class are in
# OneGridFunctionBase.

# Let's unpack the regex
#
# 1. ^ $ match the beginning and end of string
# 2. (\w+) match a word (the variable name)
# 3. _ is the literal underscore
# 4. (?:rl|lev) is a non-capturing group that matches the literals
# rl or lev
# 5. (\d+) matches a number, the refinement level
_pattern_group_name = r"^(\w+)_(?:rl|lev)(\d+)$"

def __init__(self, allfiles, var_name: str, dimension):
def __init__(self, allfiles, var_name: str, dimension, mesh_basename: str):
"""Constructor.
:param allfiles: Paths of files associated to the variable.
:type allfiles: list of str
:param var_name: Variable name.
:type var_name: str
:param mesh_basename: Basename of the OpenPMD mesh. Typically, <thornname>_<groupname>
:type mesh_basename: str
"""
self.dimension = dimension
self.mesh_basename = mesh_basename
self.map = None

self._iterations_to_times = {}

# Let's unpack the regex
#
# 1. ^ $ match the beginning and end of string
# 2. {mesh_basename} is substituted with the prefix
# 3. _ is the literal underscore
# 4. (?:rl|lev) is a non-capturing group that matches the literals
# rl or lev
# 5. (\d+) matches a number, the refinement level
self._pattern_mesh_name = rf"^{mesh_basename}_(?:rl|lev)(\d+)$"

super().__init__(allfiles, var_name)

# super() will fill the other variables that we need for dataset_format
Expand All @@ -1341,19 +1343,16 @@ def __init__(self, allfiles, var_name: str, dimension):
# OpenPMD data does not contain ghost zones
self.are_ghostzones_in_files = False

def _parse_file(self, path):
def _parse_file(self, path: str):
"""Read the content of the given file (without reading the data).
For OpenPMD files, a "file" is really a directory.
:param path: Path of the file.
:type path: str
"""
rx_mesh = re.compile(self._pattern_group_name)

# TODO: (Performance)
#
# Directly find the name of the relevant mesh, instead of looping over
# everything.
rx_mesh = re.compile(self._pattern_mesh_name)

with openpmd_series(path) as series:
iter_open_pmd = series.iterations
Expand All @@ -1362,35 +1361,27 @@ def _parse_file(self, path):
all_meshes = iteration_obj.meshes
for mesh_name, mesh_obj in all_meshes.items():
matched = rx_mesh.match(mesh_name)
if matched is None:
raise RuntimeError(f"Could not parse mesh {mesh_name}")
ref_level = int(matched.group(2))

# Here is where we prepare are nested alldata dictionary
alldata_file = self.alldata.setdefault(path, {})
alldata_iteration = alldata_file.setdefault(
int(iteration), {}
)
alldata_ref_level = alldata_iteration.setdefault(
ref_level, {}
)

# Loop through all variables in specific mesh and index all
# the variables and chunks (ie, components)
for variable in mesh_obj.items():
var_name = variable[0]
self.mesh_name = mesh_name
chunks = mesh_obj[var_name].available_chunks()
chunk_no = 0
for _chunk in chunks:
component = chunk_no
if matched is not None:
ref_level = matched.group(1)
# Here is where we prepare are nested alldata dictionary
alldata_file = self.alldata.setdefault(path, {})
alldata_iteration = alldata_file.setdefault(
int(iteration), {}
)
alldata_ref_level = alldata_iteration.setdefault(
int(ref_level), {}
)
component = 0
for _chunk in mesh_obj[
self.var_name
].available_chunks():
# We set the actual data to None, and we will read it in
# _read_component_as_uniform_grid_data upon request
alldata_ref_level.setdefault(component, None)
chunk_no += 1
component += 1

def _read_component_as_uniform_grid_data(
self, path, iteration, ref_level, component
self, path: str, iteration: int, ref_level: int, component: int
):
"""Return the component at the given iteration, refinement level, and component.
Expand All @@ -1407,51 +1398,36 @@ def _read_component_as_uniform_grid_data(
:rtype: :py:class:`~.UniformGridData`
"""
rx_mesh = re.compile(self._pattern_group_name)
# ref_level is an integer like 0, 1, 2, 3, 4, 5.
# So it is formatted to 2 digits to give 00, 01, 04, 10, 11
mesh_name = f"{self.mesh_basename}_lev{ref_level:02d}"

if self.alldata[path][iteration][ref_level][component] is None:
with openpmd_series(path) as series:
time = series.iterations[iteration].time
mesh = series.iterations[iteration].meshes
for mesh_name, mesh_obj in mesh.items():
matched = rx_mesh.match(mesh_name)
if matched is None:
raise RuntimeError(f"Could not parse mesh {mesh_name}")
ref_level_matched = int(matched.group(2))

if ref_level == ref_level_matched:
origin = np.array(mesh_obj.grid_global_offset)
dx = np.array(mesh_obj.grid_spacing)
for variable, variable_obj in mesh_obj.items():
if variable == self.var_name:
chunk = variable_obj.available_chunks()[
component
]

offset = np.array(chunk.offset)
shape = chunk.extent

# Do the actual reading
data = variable_obj.load_chunk(
chunk.offset, chunk.extent
)
series.flush()

grid = grid_data.UniformGrid(
shape,
x0=(origin + offset * dx),
dx=dx,
ref_level=ref_level,
num_ghost=[0, 0, 0],
time=time,
iteration=iteration,
component=component,
)

self.alldata[path][iteration][ref_level][
component
] = grid_data.UniformGridData(grid, data)

mesh_obj = series.iterations[iteration].meshes[mesh_name]
origin = np.array(mesh_obj.grid_global_offset)
dx = np.array(mesh_obj.grid_spacing)
mrc = mesh_obj[self.var_name]
chunk = mrc.available_chunks()[component]
offset = np.array(chunk.offset)
shape = chunk.extent
# Do the actual reading
data = mrc.load_chunk(chunk.offset, chunk.extent)
series.flush()
grid = grid_data.UniformGrid(
shape,
x0=(origin + offset * dx),
dx=dx,
ref_level=ref_level,
num_ghost=[0, 0, 0],
time=time,
iteration=iteration,
component=component,
)
self.alldata[path][iteration][ref_level][component] = (
grid_data.UniformGridData(grid, data)
)
return self.alldata[path][iteration][ref_level][component]

def clear_cache(self):
Expand All @@ -1466,12 +1442,12 @@ def clear_cache(self):
for filename, file_reader in self.alldata.items():
for iteration, iteration_reader in file_reader.items():
for ref_level, ref_level_reader in iteration_reader.items():
for component in ref_level_reader.keys():
for component in ref_level_reader:
self.alldata[filename][iteration][ref_level][
component
] = None

def time_at_iteration(self, iteration):
def time_at_iteration(self, iteration: int):
"""Return the time corresponding to the provided iteration.
:param iteration: Iteration.
Expand Down Expand Up @@ -1533,6 +1509,21 @@ class AllGridFunctions:
(0, 1, 2): "xyz",
}

# regex to match mesh_name like z4c_allc_lev05, admbasex_curv_lev00,
# admbasex_lapse_lev05
# Let's uinpack this regex:
# 1. ^ and $ mean that we are matching the entire string
# 2. (\w+) is a capturing group that matches a word. This is thorn_groupname
# For example, wavetoy_state
# 3. _ matches the literal underscore
# 4. (?: ) is a non-capturing group, rl|lev means that we match one of the two
# 5. (\d+) matches numbers
_mesh_name_pattern = r"^(\w+)_(?:rl|lev)(\d+)$"

# Mapping between a variable name and the prefix for the mesh name in the
# OpenPMD file. Typically, thorname_groupname.
_openpmd_mesh_basenames = {}

# The oddball case is with OpenPMD files. OpenPMD files are actually folders
# with extension .bp4 and they are always 3D.

Expand Down Expand Up @@ -1613,6 +1604,7 @@ def __init__(self, allfiles, dimension, num_ghost=None):
rx_h5 = re.compile(h5_pattern)
rx_ascii = re.compile(ascii_pattern)
rx_openpmd = re.compile(openpmd_pattern)
rx_mesh = re.compile(self._mesh_name_pattern)

# Here we scan all the files and find those with a name that match
# one of our regular expressions.
Expand Down Expand Up @@ -1659,7 +1651,7 @@ def __init__(self, allfiles, dimension, num_ghost=None):
try:
with h5py.File(f, "r") as h5f:
# Here group is in the sense of HDF5 group
for group in h5f.keys():
for group in h5f:
group_matched = rx_group_name.match(group)
# If this is not an interesting group, just skip it
if not group_matched:
Expand Down Expand Up @@ -1726,7 +1718,7 @@ def __init__(self, allfiles, dimension, num_ghost=None):
opener=opener,
opener_mode=opener_mode,
)
for variable_name in column_description.keys():
for variable_name in column_description:
var_list = self._vars_ascii_files.setdefault(
variable_name, set()
)
Expand All @@ -1739,16 +1731,25 @@ def __init__(self, allfiles, dimension, num_ghost=None):
# bp4 file.
dir_path = os.path.split(f)[0]
with openpmd_series(dir_path) as series:
iterations = series.iterations
for _iteration, iteration_obj in iterations.items():
all_meshes = iteration_obj.meshes
for _mesh_name, mesh_obj in all_meshes.items():
for mesh_var in mesh_obj.items():
variable_name = mesh_var[0]
for _iteration, iteration_obj in series.iterations.items():
for (
mesh_name,
mesh_obj,
) in iteration_obj.meshes.items():
matched = rx_mesh.match(mesh_name)
if matched is None:
raise RuntimeError(
f"Could not parse mesh {mesh_name}"
)
mesh_basename = matched.group(1)
for variable_name in mesh_obj:
var_list = self._vars_openpmd_files.setdefault(
variable_name, set()
)
var_list.add(dir_path)
self._openpmd_mesh_basenames[variable_name] = (
mesh_basename
)

# What pythonize_name_dict does is to make the various variables
# accessible as attributes, e.g. self.fields.rho
Expand All @@ -1771,6 +1772,10 @@ def __getitem__(self, key):
self._vars_openpmd_files[var_name],
var_name,
self.dimension,
# Get mesh_basenames corresponding to var name from dict.
# mesh_basename is thornname_groupname, where groupname is the
# name of the variable group that contains var_name
self._openpmd_mesh_basenames[var_name],
)
elif var_name in self._vars_ascii_files:
if self.num_ghost is None:
Expand Down Expand Up @@ -1989,7 +1994,7 @@ def total_filesize(self, unit="MB"):
"""
# First we find all the unique files
allfiles = set()
for dim in self._all_griddata.keys():
for dim in self._all_griddata:
allfiles.update(self[dim].allfiles)
# OpenPMD "files" are really directories, so if we want to measure their
# size accurately we have to open them up
Expand Down
Loading

0 comments on commit 11e75ef

Please sign in to comment.