Skip to content

Commit

Permalink
Documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
franzpoeschel committed Dec 6, 2024
1 parent 3a48b47 commit 9e14e5e
Showing 1 changed file with 89 additions and 8 deletions.
97 changes: 89 additions & 8 deletions mala/datahandling/data_shuffler.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,9 +288,59 @@ def __init__(self):
self.rank = 0
self.size = 1

# Takes the `extent` of an n-dimensional grid, the coordinates of a start
# point `x` and of an end point `y`.
# Interpreting the grid as a row-major n-dimensional array yields a
# contiguous slice from `x` to `y`.
# !!! Both `x` and `y` are **inclusive** ends as the semantics of what would
# be the next exclusive upper limit are unnecessarily complex in
# n dimensions, especially since the function recurses over the number of
# dimensions. !!!
# This slice is not in general an n-dimensional cuboid within the grid, but
# may be a non-convex form composed of multiple cuboid chunks.
#
# This function returns a list of cuboid sub-chunks of the n-dimensional
# grid such that:
#
# 1. The union of those chunks is equal to the contiguous slice spanned
# between `x` and `y`, both ends inclusive.
# 2. The chunks do not overlap.
# 3. Each single chunk is a contiguous slice within the row-major grid.
# 4. The chunks are sorted in ascending order of their position within the
# row-major grid.
#
# Row-major within the context of this function means that the indexes go
# from the slowest-running dimension at the left end to the fastest-running
# dimension at the right end.
#
# The output is a list of chunks as defined in openPMD, i.e. a start offset
# and the extent, counted from this offset (not from the grid origin!).
#
# Example: From the below 2D grid with extent [8, 10], the slice marked by
# the X-es should be loaded. The slice is defined by its first item
# at [2, 3] and its last item at [6, 6].
# The result would be a list of three chunks:
#
# 1. ([2, 3], [1, 7])
# 2. ([3, 0], [3, 10])
# 3. ([6, 0], [1, 7])
#
# OOOOOOOOOO
# OOOOOOOOOO
# OOOXXXXXXX
# XXXXXXXXXX
# XXXXXXXXXX
# XXXXXXXXXX
# XXXXXXXOOO
# OOOOOOOOOO

def __contiguous_slice_within_ndim_grid_as_blocks(
extent: list[int], x: list[int], y: list[int]
):
# Used for converting a block defined by inclusive lower and upper
# coordinate to a chunk as defined by openPMD.
# The openPMD extent is defined by (to-from)+1 (to make up for the
# inclusive upper end).
def get_extent(from_, to_):
res = [upper + 1 - lower for (lower, upper) in zip(from_, to_)]
if any(x < 1 for x in res):
Expand All @@ -299,8 +349,6 @@ def get_extent(from_, to_):
)
return res

# y is inclusive i have decided, there is no clear definition for what
# would be the next exclusive y
if len(extent) != len(x):
raise RuntimeError(
f"__shuffle_openpmd: Internal indexing error extent={extent} and x={x} must have the same length. This is a bug."
Expand All @@ -318,54 +366,87 @@ def get_extent(from_, to_):
elif x[0:-1] == y[0:-1]:
return [(x.copy(), get_extent(x, y))]

# Take the frontside and backside one-dimensional slices with extent
# defined by the last dimension, process the remainder recursively.

# The offset of the front slice is equal to x.
front_slice = (x.copy(), [1 for _ in range(len(x))])
# The chunk extent in the last dimension is the distance from the x
# coordinate to the grid extent in the last dimension.
# As the slice is one-dimensional, the extent is 1 in all other
# dimensions.
front_slice[1][-1] = extent[-1] - x[-1]

# Similar for the back slice.
# The offset of the back slice is 0 in the last dimension, equal to y
# in all other dimensions.
back_slice = (y.copy(), [1 for _ in range(len(y))])
back_slice[0][-1] = 0
# The extent is equal to the coordinate of y+1 (to convert inclusive to
# exclusive upper end) in the last dimension, 1 otherwise.
back_slice[1][-1] = y[-1] + 1

# Strip the last (i.e. fast-running) dimension for a recursive call with
# one dimensionality reduced
# one dimensionality reduced.
recursive_x = x[0:-1]
recursive_x[-1] += 1
recursive_y = y[0:-1]

# Since the first and last row are dealt with above, those rows are now
# stripped from the second-to-last dimension for the recursive call
# (in which they will be the last dimension)
recursive_x[-1] += 1
recursive_y[-1] -= 1

rec_res = DataShuffler.__contiguous_slice_within_ndim_grid_as_blocks(
extent[0:-1], recursive_x, recursive_y
)

# Add the last dimension again
# Add the last dimension again. The last dimension is covered from start
# to end since the leftovers have been dealt with by the front and back
# slice.
rec_res = [(xx + [0], yy + [extent[-1]]) for (xx, yy) in rec_res]

return [front_slice] + rec_res + [back_slice]

# Interpreting `ndim_extent` as the extents of an n-dimensional grid,
# returns the n-dimensional coordinate of the `idx`th item in the row-major
# representation of the grid.
def __resolve_flattened_index_into_ndim(idx: int, ndim_extent: list[int]):
if not ndim_extent:
raise RuntimeError("Cannot index into a zero-dimensional array.")
strides = []
current_stride = 1
for ext in reversed(ndim_extent):
strides = [current_stride] + strides
current_stride *= ext # sic!, the last one gets ignored
# sic!, the last stride is ignored, as it's just the entire grid
# extent
current_stride *= ext

def worker(inner_idx, inner_strides):
if not inner_strides:
if inner_idx != 0:
raise RuntimeError("Bug")
raise RuntimeError(
"This cannot happen. There is bug somewhere.")
else:
return []
div, mod = divmod(inner_idx, inner_strides[0])
return [div] + worker(mod, inner_strides[1:])

return worker(idx, strides)

# Load a chunk from the openPMD `record` into the buffer at `arr`.
# The indexes `offset` and `extent` are scalar 1-dimensional coordinates,
# and apply to the n-dimensional record by reinterpreting (reshaped) it as
# a one-dimensional array.
# The function deals internally with splitting the 1-dimensional slice into
# a sequence of n-dimensional block load operations.
def __load_chunk_1D(mesh, arr, offset, extent):
start_idx = DataShuffler.__resolve_flattened_index_into_ndim(
offset, mesh.shape
)
# inclusive end
# Inclusive upper end. See the documentation in
# __contiguous_slice_within_ndim_grid_as_blocks() for the reason why
# we work with inclusive upper ends.
end_idx = DataShuffler.__resolve_flattened_index_into_ndim(
offset + extent - 1, mesh.shape
)
Expand Down

0 comments on commit 9e14e5e

Please sign in to comment.