From c4d4325811e52cb7d5b8ca3160e96a4e27012ebe Mon Sep 17 00:00:00 2001
From: Tom Nicholas <tom@cworthy.org>
Date: Tue, 18 Jun 2024 14:22:58 -0600
Subject: [PATCH] Use 3 numpy arrays for manifest internally (#107)

* change entries property to a structured array, add from_dict

* fix validation

* equals method

* re-implemented concatenation through concatenation of the wrapped structured array

* fixed manifest.from_kerchunk_dict

* fixed kerchunk tests

* change private attributes to 3 numpy arrays

* add from_arrays method

* to and from dict working again

* fix dtype comparisons

* depend on numpy release candidate

* get concatenation and stacking working

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* remove manifest-level tests of concatenation

* generalized create_manifestarray fixture

* added tests of broadcasting

* made basic broadcasting tests pass by calling np.broadcast_to on underlying numpy arrays

* generalize fixture for creating scalar ManifestArrays

* improve regression test for expanding scalar ManifestArray

* remove now-unneeded scalar broadcasting logic

* rewrite __eq__ method on ManifestArray

* remove unused import

* reinstate the ChunkManifest.__init__ method to accept dictionaries

* hopefully fix broadcasting bug

* add backwards compatibility for pre-numpy2.0

* depend on numpy>=2.0.0

* rewrote broadcast_to shape logic

* remove faulty hypothesis strategies

* remove hypothesis from dependencies

* ignore remaining mypy errors

* release notes

* update dependencies in CI test env

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 ci/environment.yml                            |   4 +-
 docs/releases.rst                             |   4 +
 pyproject.toml                                |   4 +-
 virtualizarr/kerchunk.py                      |   4 +-
 virtualizarr/manifests/__init__.py            |   7 +-
 virtualizarr/manifests/array.py               |  26 +-
 virtualizarr/manifests/array_api.py           | 176 ++++-----
 virtualizarr/manifests/manifest.py            | 334 ++++++++++--------
 virtualizarr/tests/test_kerchunk.py           |  12 +-
 .../tests/test_manifests/test_array.py        |  49 ++-
 .../tests/test_manifests/test_manifest.py     |  78 +---
 virtualizarr/tests/test_zarr.py               |   6 +-
 virtualizarr/zarr.py                          |  28 +-
 13 files changed, 399 insertions(+), 333 deletions(-)

diff --git a/ci/environment.yml b/ci/environment.yml
index d8af6bf5..0385ea5a 100644
--- a/ci/environment.yml
+++ b/ci/environment.yml
@@ -7,10 +7,10 @@ dependencies:
   - h5py
   - hdf5
   - netcdf4
-  - xarray>=2024.5.0
+  - xarray>=2024.6.0
   - kerchunk>=0.2.5
   - pydantic
-  - numpy
+  - numpy>=2.0.0
   - ujson
   - packaging
   - universal_pathlib
diff --git a/docs/releases.rst b/docs/releases.rst
index 2255ac10..7228ca66 100644
--- a/docs/releases.rst
+++ b/docs/releases.rst
@@ -13,6 +13,8 @@ New Features
 Breaking changes
 ~~~~~~~~~~~~~~~~
 
+- Requires numpy 2.0 (for :pull:`107`).
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
 
 Deprecations
 ~~~~~~~~~~~~
@@ -29,6 +31,8 @@ Documentation
 Internal Changes
 ~~~~~~~~~~~~~~~~
 
+- Refactor `ChunkManifest` class to store chunk references internally using numpy arrays.
+  (:pull:`107`) By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - Mark tests which require network access so that they are only run when `--run-network-tests` is passed a command-line argument to pytest.
   (:pull:`144`) By `Tom Nicholas <https://github.com/TomNicholas>`_.
 
diff --git a/pyproject.toml b/pyproject.toml
index 9e1bde4e..075059cc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,11 +21,11 @@ classifiers = [
 requires-python = ">=3.10"
 dynamic = ["version"]
 dependencies = [
-    "xarray>=2024.5.0",
+    "xarray>=2024.06.0",
     "kerchunk>=0.2.5",
     "h5netcdf",
     "pydantic",
-    "numpy",
+    "numpy>=2.0.0",
     "ujson",
     "packaging",
     "universal-pathlib",
diff --git a/virtualizarr/kerchunk.py b/virtualizarr/kerchunk.py
index 9424ce40..ded76e3e 100644
--- a/virtualizarr/kerchunk.py
+++ b/virtualizarr/kerchunk.py
@@ -241,8 +241,8 @@ def variable_to_kerchunk_arr_refs(var: xr.Variable, var_name: str) -> KerchunkAr
         marr = var.data
 
         arr_refs: dict[str, str | list[str | int]] = {
-            str(chunk_key): chunk_entry.to_kerchunk()
-            for chunk_key, chunk_entry in marr.manifest.entries.items()
+            str(chunk_key): [entry["path"], entry["offset"], entry["length"]]
+            for chunk_key, entry in marr.manifest.dict().items()
         }
 
         zarray = marr.zarray
diff --git a/virtualizarr/manifests/__init__.py b/virtualizarr/manifests/__init__.py
index 39f60c59..c317ed6a 100644
--- a/virtualizarr/manifests/__init__.py
+++ b/virtualizarr/manifests/__init__.py
@@ -2,9 +2,4 @@
 # This is just to avoid conflicting with some type of file called manifest that .gitignore recommends ignoring.
 
 from .array import ManifestArray  # type: ignore # noqa
-from .manifest import (  # type: ignore # noqa
-    ChunkEntry,
-    ChunkManifest,
-    concat_manifests,
-    stack_manifests,
-)
+from .manifest import ChunkEntry, ChunkManifest  # type: ignore # noqa
diff --git a/virtualizarr/manifests/array.py b/virtualizarr/manifests/array.py
index cfc15cca..ed07d8e8 100644
--- a/virtualizarr/manifests/array.py
+++ b/virtualizarr/manifests/array.py
@@ -50,9 +50,12 @@ def __init__(
             _chunkmanifest = ChunkManifest(entries=chunkmanifest)
         else:
             raise TypeError(
-                f"chunkmanifest arg must be of type ChunkManifest, but got type {type(chunkmanifest)}"
+                f"chunkmanifest arg must be of type ChunkManifest or dict, but got type {type(chunkmanifest)}"
             )
 
+        # TODO check that the zarray shape and chunkmanifest shape are consistent with one another
+        # TODO also cover the special case of scalar arrays
+
         self._zarray = _zarray
         self._manifest = _chunkmanifest
 
@@ -140,7 +143,7 @@ def __eq__(  # type: ignore[override]
 
         Returns a numpy array of booleans.
         """
-        if isinstance(other, (int, float, bool)):
+        if isinstance(other, (int, float, bool, np.ndarray)):
             # TODO what should this do when comparing against numpy arrays?
             return np.full(shape=self.shape, fill_value=False, dtype=np.dtype(bool))
         elif not isinstance(other, ManifestArray):
@@ -164,9 +167,22 @@ def __eq__(  # type: ignore[override]
                     UserWarning,
                 )
 
-                # TODO do chunk-wise comparison
-                # TODO expand it into an element-wise result
-                return np.full(shape=self.shape, fill_value=False, dtype=np.dtype(bool))
+                # do chunk-wise comparison
+                equal_chunk_paths = self.manifest._paths == other.manifest._paths
+                equal_chunk_offsets = self.manifest._offsets == other.manifest._offsets
+                equal_chunk_lengths = self.manifest._lengths == other.manifest._lengths
+
+                equal_chunks = (
+                    equal_chunk_paths & equal_chunk_offsets & equal_chunk_lengths
+                )
+
+                if not equal_chunks.all():
+                    # TODO expand chunk-wise comparison into an element-wise result instead of just returning all False
+                    return np.full(
+                        shape=self.shape, fill_value=False, dtype=np.dtype(bool)
+                    )
+                else:
+                    raise RuntimeWarning("Should not be possible to get here")
 
     def astype(self, dtype: np.dtype, /, *, copy: bool = True) -> "ManifestArray":
         """Cannot change the dtype, but needed because xarray will call this even when it's a no-op."""
diff --git a/virtualizarr/manifests/array_api.py b/virtualizarr/manifests/array_api.py
index 8dac514e..0ecdc023 100644
--- a/virtualizarr/manifests/array_api.py
+++ b/virtualizarr/manifests/array_api.py
@@ -1,11 +1,10 @@
-import itertools
-from collections.abc import Callable, Iterable
-from typing import TYPE_CHECKING, cast
+from typing import TYPE_CHECKING, Callable, Iterable
 
 import numpy as np
 
-from ..zarr import Codec, ZArray
-from .manifest import concat_manifests, stack_manifests
+from virtualizarr.zarr import Codec, ceildiv
+
+from .manifest import ChunkManifest
 
 if TYPE_CHECKING:
     from .array import ManifestArray
@@ -125,21 +124,28 @@ def concatenate(
     new_shape = list(first_shape)
     new_shape[axis] = new_length_along_concat_axis
 
-    concatenated_manifest = concat_manifests(
-        [arr.manifest for arr in arrays],
+    # do concatenation of entries in manifest
+    concatenated_paths = np.concatenate(
+        [arr.manifest._paths for arr in arrays],
+        axis=axis,
+    )
+    concatenated_offsets = np.concatenate(
+        [arr.manifest._offsets for arr in arrays],
+        axis=axis,
+    )
+    concatenated_lengths = np.concatenate(
+        [arr.manifest._lengths for arr in arrays],
         axis=axis,
     )
+    concatenated_manifest = ChunkManifest.from_arrays(
+        paths=concatenated_paths,
+        offsets=concatenated_offsets,
+        lengths=concatenated_lengths,
+    )
 
-    new_zarray = ZArray(
-        chunks=first_arr.chunks,
-        compressor=first_arr.zarray.compressor,
-        dtype=first_arr.dtype,
-        fill_value=first_arr.zarray.fill_value,
-        filters=first_arr.zarray.filters,
-        shape=new_shape,
-        # TODO presumably these things should be checked for consistency across arrays too?
-        order=first_arr.zarray.order,
-        zarr_format=first_arr.zarray.zarr_format,
+    # chunk shape has not changed, there are just now more chunks along the concatenation axis
+    new_zarray = first_arr.zarray.replace(
+        shape=tuple(new_shape),
     )
 
     return ManifestArray(chunkmanifest=concatenated_manifest, zarray=new_zarray)
@@ -210,26 +216,33 @@ def stack(
     new_shape = list(first_shape)
     new_shape.insert(axis, length_along_new_stacked_axis)
 
-    stacked_manifest = stack_manifests(
-        [arr.manifest for arr in arrays],
+    # do stacking of entries in manifest
+    stacked_paths = np.stack(
+        [arr.manifest._paths for arr in arrays],
+        axis=axis,
+    )
+    stacked_offsets = np.stack(
+        [arr.manifest._offsets for arr in arrays],
+        axis=axis,
+    )
+    stacked_lengths = np.stack(
+        [arr.manifest._lengths for arr in arrays],
         axis=axis,
     )
+    stacked_manifest = ChunkManifest.from_arrays(
+        paths=stacked_paths,
+        offsets=stacked_offsets,
+        lengths=stacked_lengths,
+    )
 
-    # chunk size has changed because a length-1 axis has been inserted
+    # chunk shape has changed because a length-1 axis has been inserted
     old_chunks = first_arr.chunks
     new_chunks = list(old_chunks)
     new_chunks.insert(axis, 1)
 
-    new_zarray = ZArray(
-        chunks=new_chunks,
-        compressor=first_arr.zarray.compressor,
-        dtype=first_arr.dtype,
-        fill_value=first_arr.zarray.fill_value,
-        filters=first_arr.zarray.filters,
-        shape=new_shape,
-        # TODO presumably these things should be checked for consistency across arrays too?
-        order=first_arr.zarray.order,
-        zarr_format=first_arr.zarray.zarr_format,
+    new_zarray = first_arr.zarray.replace(
+        chunks=tuple(new_chunks),
+        shape=tuple(new_shape),
     )
 
     return ManifestArray(chunkmanifest=stacked_manifest, zarray=new_zarray)
@@ -254,74 +267,65 @@ def expand_dims(x: "ManifestArray", /, *, axis: int = 0) -> "ManifestArray":
 @implements(np.broadcast_to)
 def broadcast_to(x: "ManifestArray", /, shape: tuple[int, ...]) -> "ManifestArray":
     """
-    Broadcasts an array to a specified shape, by either manipulating chunk keys or copying chunk manifest entries.
+    Broadcasts a ManifestArray to a specified shape, by either adjusting chunk keys or copying chunk manifest entries.
     """
 
-    if len(x.shape) > len(shape):
-        raise ValueError("input operand has more dimensions than allowed")
-
-    # numpy broadcasting algorithm requires us to start by comparing the length of the final axes and work backwards
-    result = x
-    for axis, d, d_requested in itertools.zip_longest(
-        reversed(range(len(shape))), reversed(x.shape), reversed(shape), fillvalue=None
-    ):
-        # len(shape) check above ensures this can't be type None
-        d_requested = cast(int, d_requested)
-
-        if d == d_requested:
-            pass
-        elif d is None:
-            if result.shape == ():
-                # scalars are a special case because their manifests already have a chunk key with one dimension
-                # see https://github.com/TomNicholas/VirtualiZarr/issues/100#issuecomment-2097058282
-                result = _broadcast_scalar(result, new_axis_length=d_requested)
-            else:
-                # stack same array upon itself d_requested number of times, which inserts a new axis at axis=0
-                result = stack([result] * d_requested, axis=0)
-        elif d == 1:
-            # concatenate same array upon itself d_requested number of times along existing axis
-            result = concatenate([result] * d_requested, axis=axis)
-        else:
-            raise ValueError(
-                f"Array with shape {x.shape} cannot be broadcast to shape {shape}"
-            )
-
-    return result
-
-
-def _broadcast_scalar(x: "ManifestArray", new_axis_length: int) -> "ManifestArray":
-    """
-    Add an axis to a scalar ManifestArray, but without adding a new axis to the keys of the chunk manifest.
+    from .array import ManifestArray
 
-    This is not the same as concatenation, because there is no existing axis along which to concatenate.
-    It's also not the same as stacking, because we don't want to insert a new axis into the chunk keys.
+    new_shape = shape
 
-    Scalars are a special case because their manifests still have a chunk key with one dimension.
-    See https://github.com/TomNicholas/VirtualiZarr/issues/100#issuecomment-2097058282
-    """
+    # check its actually possible to broadcast to this new shape
+    mutually_broadcastable_shape = np.broadcast_shapes(x.shape, new_shape)
+    if mutually_broadcastable_shape != new_shape:
+        # we're not trying to broadcast both shapes to a third shape
+        raise ValueError(
+            f"array of shape {x.shape} cannot be broadcast to shape {new_shape}"
+        )
 
-    from .array import ManifestArray
+    # new chunk_shape is old chunk_shape with singleton dimensions pre-pended
+    # (chunk shape can never change by more than adding length-1 axes because each chunk represents a fixed number of array elements)
+    old_chunk_shape = x.chunks
+    new_chunk_shape = _prepend_singleton_dimensions(
+        old_chunk_shape, ndim=len(new_shape)
+    )
 
-    new_shape = (new_axis_length,)
-    new_chunks = (new_axis_length,)
+    # find new chunk grid shape by dividing new array shape by new chunk shape
+    new_chunk_grid_shape = tuple(
+        ceildiv(axis_length, chunk_length)
+        for axis_length, chunk_length in zip(new_shape, new_chunk_shape)
+    )
 
-    concatenated_manifest = concat_manifests(
-        [x.manifest] * new_axis_length,
-        axis=0,
+    # do broadcasting of entries in manifest
+    broadcasted_paths = np.broadcast_to(
+        x.manifest._paths,
+        shape=new_chunk_grid_shape,
+    )
+    broadcasted_offsets = np.broadcast_to(
+        x.manifest._offsets,
+        shape=new_chunk_grid_shape,
+    )
+    broadcasted_lengths = np.broadcast_to(
+        x.manifest._lengths,
+        shape=new_chunk_grid_shape,
+    )
+    broadcasted_manifest = ChunkManifest.from_arrays(
+        paths=broadcasted_paths,
+        offsets=broadcasted_offsets,
+        lengths=broadcasted_lengths,
     )
 
-    new_zarray = ZArray(
-        chunks=new_chunks,
-        compressor=x.zarray.compressor,
-        dtype=x.dtype,
-        fill_value=x.zarray.fill_value,
-        filters=x.zarray.filters,
+    new_zarray = x.zarray.replace(
+        chunks=new_chunk_shape,
         shape=new_shape,
-        order=x.zarray.order,
-        zarr_format=x.zarray.zarr_format,
     )
 
-    return ManifestArray(chunkmanifest=concatenated_manifest, zarray=new_zarray)
+    return ManifestArray(chunkmanifest=broadcasted_manifest, zarray=new_zarray)
+
+
+def _prepend_singleton_dimensions(shape: tuple[int, ...], ndim: int) -> tuple[int, ...]:
+    """Prepend as many new length-1 axes to shape as necessary such that the result has ndim number of axes."""
+    n_prepended_dims = ndim - len(shape)
+    return tuple([1] * n_prepended_dims + list(shape))
 
 
 # TODO broadcast_arrays, squeeze, permute_dims
diff --git a/virtualizarr/manifests/manifest.py b/virtualizarr/manifests/manifest.py
index c0a95c67..530ce17c 100644
--- a/virtualizarr/manifests/manifest.py
+++ b/virtualizarr/manifests/manifest.py
@@ -1,13 +1,12 @@
-import itertools
 import json
 import re
-from collections.abc import Iterable, Iterator, Mapping
-from typing import Any, cast
+from collections.abc import Iterable, Iterator
+from typing import Any, NewType, Tuple, Union, cast
 
 import numpy as np
-from pydantic import BaseModel, ConfigDict, field_validator
+from pydantic import BaseModel, ConfigDict
 
-from ..types import ChunkKey
+from virtualizarr.types import ChunkKey
 
 _INTEGER = (
     r"([1-9]+\d*|0)"  # matches 0 or an unsigned integer that does not begin with zero
@@ -16,6 +15,9 @@
 _CHUNK_KEY = rf"^{_INTEGER}+({_SEPARATOR}{_INTEGER})*$"  # matches 1 integer, optionally followed by more integers each separated by a separator (i.e. a period)
 
 
+ChunkDict = NewType("ChunkDict", dict[ChunkKey, dict[str, Union[str, int]]])
+
+
 class ChunkEntry(BaseModel):
     """
     Information for a single chunk in the manifest.
@@ -41,13 +43,17 @@ def to_kerchunk(self) -> list[str | int]:
         """Write out in the format that kerchunk uses for chunk entries."""
         return [self.path, self.offset, self.length]
 
+    def dict(self) -> dict[str, Union[str, int]]:
+        return dict(path=self.path, offset=self.offset, length=self.length)
+
 
-class ChunkManifest(BaseModel):
+class ChunkManifest:
     """
     In-memory representation of a single Zarr chunk manifest.
 
-    Stores the manifest as a dictionary under the .chunks attribute, in this form:
+    Stores the manifest internally as numpy arrays, so the most efficient way to create this object is via the `.from_arrays` constructor classmethod.
 
+    The manifest can be converted to or from a dictionary which looks like this
 
         {
             "0.0.0": {"path": "s3://bucket/foo.nc", "offset": 100, "length": 100},
@@ -56,25 +62,131 @@ class ChunkManifest(BaseModel):
             "0.1.1": {"path": "s3://bucket/foo.nc", "offset": 400, "length": 100},
         }
 
+    using the .__init__() and .dict() methods, so users of this class can think of the manifest as if it were a dict mapping zarr chunk keys to byte ranges.
 
-    See the chunk manifest SPEC proposal in https://github.com/zarr-developers/zarr-specs/issues/287 .
+    (See the chunk manifest SPEC proposal in https://github.com/zarr-developers/zarr-specs/issues/287.)
 
-    Validation is done when this object is instatiated, and this class is immutable,
-    so it's not possible to have a ChunkManifest object that does not represent a complete valid grid of chunks.
+    Validation is done when this object is instantiated, and this class is immutable,
+    so it's not possible to have a ChunkManifest object that does not represent a valid grid of chunks.
     """
 
-    model_config = ConfigDict(frozen=True)
+    _paths: np.ndarray[Any, np.dtypes.StringDType]  # type: ignore[name-defined]
+    _offsets: np.ndarray[Any, np.dtype[np.int32]]
+    _lengths: np.ndarray[Any, np.dtype[np.int32]]
+
+    def __init__(self, entries: dict) -> None:
+        """
+        Create a ChunkManifest from a dictionary mapping zarr chunk keys to byte ranges.
+
+        Parameters
+        ----------
+        entries: dict
+            Chunk keys and byte range information, as a dictionary of the form
+
+                {
+                    "0.0.0": {"path": "s3://bucket/foo.nc", "offset": 100, "length": 100},
+                    "0.0.1": {"path": "s3://bucket/foo.nc", "offset": 200, "length": 100},
+                    "0.1.0": {"path": "s3://bucket/foo.nc", "offset": 300, "length": 100},
+                    "0.1.1": {"path": "s3://bucket/foo.nc", "offset": 400, "length": 100},
+                }
+        """
 
-    entries: Mapping[ChunkKey, ChunkEntry]
-    # shape_chunk_grid: Tuple[int, ...]  # TODO do we need this for anything?
+        # TODO do some input validation here first?
+        validate_chunk_keys(entries.keys())
+
+        # TODO should we actually optionally pass chunk grid shape in,
+        # in case there are not enough chunks to give correct idea of full shape?
+        shape = get_chunk_grid_shape(entries.keys())
+
+        # Initializing to empty implies that entries with path='' are treated as missing chunks
+        paths = np.empty(shape=shape, dtype=np.dtypes.StringDType())  # type: ignore[attr-defined]
+        offsets = np.empty(shape=shape, dtype=np.dtype("int32"))
+        lengths = np.empty(shape=shape, dtype=np.dtype("int32"))
+
+        # populate the arrays
+        for key, entry in entries.items():
+            try:
+                path, offset, length = entry.values()
+                entry = ChunkEntry(path=path, offset=offset, length=length)
+            except (ValueError, TypeError) as e:
+                msg = (
+                    "Each chunk entry must be of the form dict(path=<str>, offset=<int>, length=<int>), "
+                    f"but got {entry}"
+                )
+                raise ValueError(msg) from e
+
+            split_key = split(key)
+            paths[split_key] = entry.path
+            offsets[split_key] = entry.offset
+            lengths[split_key] = entry.length
+
+        self._paths = paths
+        self._offsets = offsets
+        self._lengths = lengths
 
-    @field_validator("entries")
     @classmethod
-    def validate_chunks(cls, entries: Any) -> Mapping[ChunkKey, ChunkEntry]:
-        validate_chunk_keys(list(entries.keys()))
+    def from_arrays(
+        cls,
+        paths: np.ndarray[Any, np.dtype[np.dtypes.StringDType]],  # type: ignore[name-defined]
+        offsets: np.ndarray[Any, np.dtype[np.int32]],
+        lengths: np.ndarray[Any, np.dtype[np.int32]],
+    ) -> "ChunkManifest":
+        """
+        Create manifest directly from numpy arrays containing the path and byte range information.
+
+        Useful if you want to avoid the memory overhead of creating an intermediate dictionary first,
+        as these 3 arrays are what will be used internally to store the references.
+
+        Parameters
+        ----------
+        paths: np.ndarray
+        offsets: np.ndarray
+        lengths: np.ndarray
+        """
+
+        # check types
+        if not isinstance(paths, np.ndarray):
+            raise TypeError(f"paths must be a numpy array, but got type {type(paths)}")
+        if not isinstance(offsets, np.ndarray):
+            raise TypeError(
+                f"offsets must be a numpy array, but got type {type(offsets)}"
+            )
+        if not isinstance(lengths, np.ndarray):
+            raise TypeError(
+                f"lengths must be a numpy array, but got type {type(lengths)}"
+            )
+
+        # check dtypes
+        if paths.dtype != np.dtypes.StringDType():  # type: ignore[attr-defined]
+            raise ValueError(
+                f"paths array must have a numpy variable-length string dtype, but got dtype {paths.dtype}"
+            )
+        if offsets.dtype != np.dtype("int32"):
+            raise ValueError(
+                f"offsets array must have 32-bit integer dtype, but got dtype {offsets.dtype}"
+            )
+        if lengths.dtype != np.dtype("int32"):
+            raise ValueError(
+                f"lengths array must have 32-bit integer dtype, but got dtype {lengths.dtype}"
+            )
+
+        # check shapes
+        shape = paths.shape
+        if offsets.shape != shape:
+            raise ValueError(
+                f"Shapes of the arrays must be consistent, but shapes of paths array and offsets array do not match: {paths.shape} vs {offsets.shape}"
+            )
+        if lengths.shape != shape:
+            raise ValueError(
+                f"Shapes of the arrays must be consistent, but shapes of paths array and lengths array do not match: {paths.shape} vs {lengths.shape}"
+            )
+
+        obj = object.__new__(cls)
+        obj._paths = paths
+        obj._offsets = offsets
+        obj._lengths = lengths
 
-        # TODO what if pydantic adjusts anything during validation?
-        return entries
+        return obj
 
     @property
     def ndim_chunk_grid(self) -> int:
@@ -83,7 +195,7 @@ def ndim_chunk_grid(self) -> int:
 
         Not the same as the dimension of an array backed by this chunk manifest.
         """
-        return get_ndim_from_key(list(self.entries.keys())[0])
+        return self._paths.ndim
 
     @property
     def shape_chunk_grid(self) -> tuple[int, ...]:
@@ -92,54 +204,99 @@ def shape_chunk_grid(self) -> tuple[int, ...]:
 
         Not the same as the shape of an array backed by this chunk manifest.
         """
-        return get_chunk_grid_shape(list(self.entries.keys()))
+        return self._paths.shape
 
     def __repr__(self) -> str:
         return f"ChunkManifest<shape={self.shape_chunk_grid}>"
 
     def __getitem__(self, key: ChunkKey) -> ChunkEntry:
-        return self.entries[key]
+        indices = split(key)
+        path = self._paths[indices]
+        offset = self._offsets[indices]
+        length = self._lengths[indices]
+        return ChunkEntry(path=path, offset=offset, length=length)
 
     def __iter__(self) -> Iterator[ChunkKey]:
-        return iter(self.entries.keys())
+        # TODO make this work for numpy arrays
+        raise NotImplementedError()
+        # return iter(self._paths.keys())
 
     def __len__(self) -> int:
-        return len(self.entries)
+        return self._paths.size
+
+    def dict(self) -> ChunkDict:
+        """
+        Convert the entire manifest to a nested dictionary.
+
+        The returned dict will be of the form
+
+        {
+            "0.0.0": {"path": "s3://bucket/foo.nc", "offset": 100, "length": 100},
+            "0.0.1": {"path": "s3://bucket/foo.nc", "offset": 200, "length": 100},
+            "0.1.0": {"path": "s3://bucket/foo.nc", "offset": 300, "length": 100},
+            "0.1.1": {"path": "s3://bucket/foo.nc", "offset": 400, "length": 100},
+        }
+
+        Entries whose path is an empty string will be interpreted as missing chunks and omitted from the dictionary.
+        """
 
-    def dict(self) -> dict[str, dict[str, str | int]]:
-        """Converts the entire manifest to a nested dictionary."""
-        return {k: dict(entry) for k, entry in self.entries.items()}
+        coord_vectors = np.mgrid[
+            tuple(slice(None, length) for length in self.shape_chunk_grid)
+        ]
+
+        d = {
+            join(inds): dict(
+                path=path.item(), offset=offset.item(), length=length.item()
+            )
+            for *inds, path, offset, length in np.nditer(
+                [*coord_vectors, self._paths, self._offsets, self._lengths],
+                flags=("refs_ok",),
+            )
+            if path.item()[0] != ""  # don't include entry if path='' (i.e. empty chunk)
+        }
+
+        return cast(
+            ChunkDict,
+            d,
+        )
+
+    def __eq__(self, other: Any) -> bool:
+        """Two manifests are equal if all of their entries are identical."""
+        paths_equal = (self._paths == other._paths).all()
+        offsets_equal = (self._offsets == other._offsets).all()
+        lengths_equal = (self._lengths == other._lengths).all()
+        return paths_equal and offsets_equal and lengths_equal
 
     @classmethod
     def from_zarr_json(cls, filepath: str) -> "ChunkManifest":
         """Create a ChunkManifest from a Zarr manifest.json file."""
-        with open(filepath) as manifest_file:
-            entries_dict = json.load(manifest_file)
 
-        entries = {
-            cast(ChunkKey, k): ChunkEntry(**entry) for k, entry in entries_dict.items()
-        }
+        with open(filepath, "r") as manifest_file:
+            entries = json.load(manifest_file)
+
         return cls(entries=entries)
 
     def to_zarr_json(self, filepath: str) -> None:
-        """Write a ChunkManifest to a Zarr manifest.json file."""
+        """Write the manifest to a Zarr manifest.json file."""
+        entries = self.dict()
         with open(filepath, "w") as json_file:
-            json.dump(self.dict(), json_file, indent=4, separators=(", ", ": "))
+            json.dump(entries, json_file, indent=4, separators=(", ", ": "))
 
     @classmethod
     def _from_kerchunk_chunk_dict(cls, kerchunk_chunk_dict) -> "ChunkManifest":
         chunkentries = {
-            k: ChunkEntry.from_kerchunk(v) for k, v in kerchunk_chunk_dict.items()
+            cast(ChunkKey, k): ChunkEntry.from_kerchunk(v).dict()
+            for k, v in kerchunk_chunk_dict.items()
         }
-        return ChunkManifest(entries=chunkentries)
+        return ChunkManifest(entries=cast(ChunkDict, chunkentries))
 
 
-def split(key: ChunkKey) -> list[int]:
-    return list(int(i) for i in key.split("."))
+def split(key: ChunkKey) -> Tuple[int, ...]:
+    return tuple(int(i) for i in key.split("."))
 
 
-def join(inds: Iterable[int]) -> ChunkKey:
-    return cast(ChunkKey, ".".join(str(i) for i in inds))
+def join(inds: Iterable[Any]) -> ChunkKey:
+    return cast(ChunkKey, ".".join(str(i) for i in list(inds)))
 
 
 def get_ndim_from_key(key: str) -> int:
@@ -163,9 +320,6 @@ def validate_chunk_keys(chunk_keys: Iterable[ChunkKey]):
                 f"Inconsistent number of dimensions between chunk key {key} and {first_key}: {other_ndim} vs {ndim}"
             )
 
-    # Check that the keys collectively form a complete grid
-    check_keys_form_grid(chunk_keys)
-
 
 def get_chunk_grid_shape(chunk_keys: Iterable[ChunkKey]) -> tuple[int, ...]:
     # find max chunk index along each dimension
@@ -174,99 +328,3 @@ def get_chunk_grid_shape(chunk_keys: Iterable[ChunkKey]) -> tuple[int, ...]:
         max(indices_along_one_dim) + 1 for indices_along_one_dim in zipped_indices
     )
     return chunk_grid_shape
-
-
-def check_keys_form_grid(chunk_keys: Iterable[ChunkKey]):
-    """Check that the chunk keys collectively form a complete grid"""
-
-    chunk_grid_shape = get_chunk_grid_shape(chunk_keys)
-
-    # create every possible combination
-    all_possible_combos = itertools.product(
-        *[range(length) for length in chunk_grid_shape]
-    )
-    all_required_chunk_keys: set[ChunkKey] = {
-        join(inds) for inds in all_possible_combos
-    }
-
-    # check that every possible combination is represented once in the list of chunk keys
-    if set(chunk_keys) != all_required_chunk_keys:
-        raise ValueError("Chunk keys do not form a complete grid")
-
-
-def concat_manifests(manifests: list["ChunkManifest"], axis: int) -> "ChunkManifest":
-    """
-    Concatenate manifests along an existing dimension.
-
-    This only requires adjusting one index of chunk keys along a single dimension.
-
-    Note axis is not expected to be negative.
-    """
-    if len(manifests) == 1:
-        return manifests[0]
-
-    chunk_grid_shapes = [manifest.shape_chunk_grid for manifest in manifests]
-    lengths_along_concat_dim = [shape[axis] for shape in chunk_grid_shapes]
-
-    # Note we do not need to change the keys of the first manifest
-    chunk_index_offsets = np.cumsum(lengths_along_concat_dim)[:-1]
-    new_entries = [
-        adjust_chunk_keys(manifest.entries, axis, offset)
-        for manifest, offset in zip(manifests[1:], chunk_index_offsets)
-    ]
-    all_entries = [manifests[0].entries] + new_entries
-    merged_entries = {k: v for d in all_entries for k, v in d.items()}
-
-    # Arguably don't need to re-perform validation checks on a manifest we created out of already-validated manifests
-    # Could use pydantic's model_construct classmethod to skip these checks
-    # But we should actually performance test it because it might be pointless, and current implementation is safer
-    return ChunkManifest(entries=merged_entries)
-
-
-def adjust_chunk_keys(
-    entries: Mapping[ChunkKey, ChunkEntry], axis: int, offset: int
-) -> Mapping[ChunkKey, ChunkEntry]:
-    """Replace all chunk keys with keys which have been offset along one axis."""
-
-    def offset_key(key: ChunkKey, axis: int, offset: int) -> ChunkKey:
-        inds = split(key)
-        inds[axis] += offset
-        return join(inds)
-
-    return {offset_key(k, axis, offset): v for k, v in entries.items()}
-
-
-def stack_manifests(manifests: list[ChunkManifest], axis: int) -> "ChunkManifest":
-    """
-    Stack manifests along a new dimension.
-
-    This only requires inserting one index into all chunk keys to add a new dimension.
-
-    Note axis is not expected to be negative.
-    """
-
-    # even if there is only one manifest it still needs a new axis inserted
-    chunk_indexes_along_new_dim = range(len(manifests))
-    new_entries = [
-        insert_new_axis_into_chunk_keys(manifest.entries, axis, new_index_value)
-        for manifest, new_index_value in zip(manifests, chunk_indexes_along_new_dim)
-    ]
-    merged_entries = {k: v for d in new_entries for k, v in d.items()}
-
-    # Arguably don't need to re-perform validation checks on a manifest we created out of already-validated manifests
-    # Could use pydantic's model_construct classmethod to skip these checks
-    # But we should actually performance test it because it might be pointless, and current implementation is safer
-    return ChunkManifest(entries=merged_entries)
-
-
-def insert_new_axis_into_chunk_keys(
-    entries: Mapping[ChunkKey, ChunkEntry], axis: int, new_index_value: int
-) -> Mapping[ChunkKey, ChunkEntry]:
-    """Replace all chunk keys with keys which have a new axis inserted, with a given value."""
-
-    def insert_axis(key: ChunkKey, new_axis: int, index_value: int) -> ChunkKey:
-        inds = split(key)
-        inds.insert(new_axis, index_value)
-        return join(inds)
-
-    return {insert_axis(k, axis, new_index_value): v for k, v in entries.items()}
diff --git a/virtualizarr/tests/test_kerchunk.py b/virtualizarr/tests/test_kerchunk.py
index c255a8d8..a623bc02 100644
--- a/virtualizarr/tests/test_kerchunk.py
+++ b/virtualizarr/tests/test_kerchunk.py
@@ -6,7 +6,7 @@
 import xarray.testing as xrt
 
 from virtualizarr.kerchunk import FileType, _automatically_determine_filetype
-from virtualizarr.manifests import ChunkEntry, ChunkManifest, ManifestArray
+from virtualizarr.manifests import ChunkManifest, ManifestArray
 from virtualizarr.xarray import dataset_from_kerchunk_refs
 
 
@@ -68,8 +68,11 @@ def test_dataset_from_df_refs_with_filters():
 
 class TestAccessor:
     def test_accessor_to_kerchunk_dict(self):
+        manifest = ChunkManifest(
+            entries={"0.0": dict(path="test.nc", offset=6144, length=48)}
+        )
         arr = ManifestArray(
-            chunkmanifest={"0.0": ChunkEntry(path="test.nc", offset=6144, length=48)},
+            chunkmanifest=manifest,
             zarray=dict(
                 shape=(2, 3),
                 dtype=np.dtype("<i8"),
@@ -97,8 +100,11 @@ def test_accessor_to_kerchunk_dict(self):
         assert result_ds_refs == expected_ds_refs
 
     def test_accessor_to_kerchunk_json(self, tmp_path):
+        manifest = ChunkManifest(
+            entries={"0.0": dict(path="test.nc", offset=6144, length=48)}
+        )
         arr = ManifestArray(
-            chunkmanifest={"0.0": ChunkEntry(path="test.nc", offset=6144, length=48)},
+            chunkmanifest=manifest,
             zarray=dict(
                 shape=(2, 3),
                 dtype=np.dtype("<i8"),
diff --git a/virtualizarr/tests/test_manifests/test_array.py b/virtualizarr/tests/test_manifests/test_array.py
index 34a4464a..459e60be 100644
--- a/virtualizarr/tests/test_manifests/test_array.py
+++ b/virtualizarr/tests/test_manifests/test_array.py
@@ -124,13 +124,43 @@ def test_partly_equals(self): ...
 
 
 class TestBroadcast:
+    def test_broadcast_existing_axis(self):
+        marr = create_manifestarray(shape=(1, 2), chunks=(1, 2))
+        expanded = np.broadcast_to(marr, shape=(3, 2))
+        assert expanded.shape == (3, 2)
+        assert expanded.chunks == (1, 2)
+        assert expanded.manifest.dict() == {
+            "0.0": {"path": "file.0.0.nc", "offset": 0, "length": 5},
+            "1.0": {"path": "file.0.0.nc", "offset": 0, "length": 5},
+            "2.0": {"path": "file.0.0.nc", "offset": 0, "length": 5},
+        }
+
+    def test_broadcast_new_axis(self):
+        marr = create_manifestarray(shape=(3,), chunks=(1,))
+        expanded = np.broadcast_to(marr, shape=(1, 3))
+        assert expanded.shape == (1, 3)
+        assert expanded.chunks == (1, 1)
+        assert expanded.manifest.dict() == {
+            "0.0": {"path": "file.0.nc", "offset": 0, "length": 5},
+            "0.1": {"path": "file.1.nc", "offset": 10, "length": 6},
+            "0.2": {"path": "file.2.nc", "offset": 20, "length": 7},
+        }
+
     def test_broadcast_scalar(self):
         # regression test
         marr = create_manifestarray(shape=(), chunks=())
+        assert marr.shape == ()
+        assert marr.chunks == ()
+        assert marr.manifest.dict() == {
+            "0": {"path": "file.0.nc", "offset": 0, "length": 5},
+        }
+
         expanded = np.broadcast_to(marr, shape=(1,))
         assert expanded.shape == (1,)
         assert expanded.chunks == (1,)
-        assert expanded.manifest == marr.manifest
+        assert expanded.manifest.dict() == {
+            "0": {"path": "file.0.nc", "offset": 0, "length": 5},
+        }
 
     @pytest.mark.parametrize(
         "shape, chunks, target_shape",
@@ -153,12 +183,7 @@ def test_raise_on_invalid_broadcast_shapes(self, shape, chunks, target_shape):
             ((1,), (1,), (3,)),
             ((2,), (1,), (2,)),
             ((3,), (2,), (5, 4, 3)),
-            pytest.param(
-                (3, 1),
-                (2, 1),
-                (2, 3, 4),
-                marks=pytest.mark.xfail(reason="known bug, see issue 144"),
-            ),
+            ((3, 1), (2, 1), (2, 3, 4)),
         ],
     )
     def test_broadcast_any_shape(self, shape, chunks, target_shape):
@@ -285,28 +310,30 @@ def test_refuse_combine():
     chunks_dict1 = {
         "0.0.0": {"path": "foo.nc", "offset": 100, "length": 100},
     }
+    chunkmanifest1 = ChunkManifest(entries=chunks_dict1)
     chunks_dict2 = {
         "0.0.0": {"path": "foo.nc", "offset": 300, "length": 100},
     }
-    marr1 = ManifestArray(zarray=zarray_common, chunkmanifest=chunks_dict1)
+    chunkmanifest2 = ChunkManifest(entries=chunks_dict2)
+    marr1 = ManifestArray(zarray=zarray_common, chunkmanifest=chunkmanifest1)
 
     zarray_wrong_compressor = zarray_common.copy()
     zarray_wrong_compressor["compressor"] = None
-    marr2 = ManifestArray(zarray=zarray_wrong_compressor, chunkmanifest=chunks_dict2)
+    marr2 = ManifestArray(zarray=zarray_wrong_compressor, chunkmanifest=chunkmanifest2)
     for func in [np.concatenate, np.stack]:
         with pytest.raises(NotImplementedError, match="different codecs"):
             func([marr1, marr2], axis=0)
 
     zarray_wrong_dtype = zarray_common.copy()
     zarray_wrong_dtype["dtype"] = np.dtype("int64")
-    marr2 = ManifestArray(zarray=zarray_wrong_dtype, chunkmanifest=chunks_dict2)
+    marr2 = ManifestArray(zarray=zarray_wrong_dtype, chunkmanifest=chunkmanifest2)
     for func in [np.concatenate, np.stack]:
         with pytest.raises(ValueError, match="inconsistent dtypes"):
             func([marr1, marr2], axis=0)
 
     zarray_wrong_dtype = zarray_common.copy()
     zarray_wrong_dtype["dtype"] = np.dtype("int64")
-    marr2 = ManifestArray(zarray=zarray_wrong_dtype, chunkmanifest=chunks_dict2)
+    marr2 = ManifestArray(zarray=zarray_wrong_dtype, chunkmanifest=chunkmanifest2)
     for func in [np.concatenate, np.stack]:
         with pytest.raises(ValueError, match="inconsistent dtypes"):
             func([marr1, marr2], axis=0)
diff --git a/virtualizarr/tests/test_manifests/test_manifest.py b/virtualizarr/tests/test_manifests/test_manifest.py
index 7e364d78..acf8ac42 100644
--- a/virtualizarr/tests/test_manifests/test_manifest.py
+++ b/virtualizarr/tests/test_manifests/test_manifest.py
@@ -1,7 +1,6 @@
 import pytest
-from pydantic import ValidationError
 
-from virtualizarr.manifests import ChunkManifest, concat_manifests, stack_manifests
+from virtualizarr.manifests import ChunkManifest
 
 
 class TestCreateManifest:
@@ -25,7 +24,7 @@ def test_invalid_chunk_entries(self):
         chunks = {
             "0.0.0": {"path": "s3://bucket/foo.nc"},
         }
-        with pytest.raises(ValidationError, match="missing"):
+        with pytest.raises(ValueError, match="must be of the form"):
             ChunkManifest(entries=chunks)
 
         chunks = {
@@ -35,7 +34,7 @@ def test_invalid_chunk_entries(self):
                 "length": 100,
             },
         }
-        with pytest.raises(ValidationError, match="should be a valid integer"):
+        with pytest.raises(ValueError, match="must be of the form"):
             ChunkManifest(entries=chunks)
 
     def test_invalid_chunk_keys(self):
@@ -52,20 +51,6 @@ def test_invalid_chunk_keys(self):
         with pytest.raises(ValueError, match="Inconsistent number of dimensions"):
             ChunkManifest(entries=chunks)
 
-        chunks = {
-            "0.0.0": {"path": "s3://bucket/foo.nc", "offset": 100, "length": 100},
-            "0.0.1": {"path": "s3://bucket/foo.nc", "offset": 200, "length": 100},
-            "0.1.0": {"path": "s3://bucket/foo.nc", "offset": 300, "length": 100},
-        }
-        with pytest.raises(ValueError, match="do not form a complete grid"):
-            ChunkManifest(entries=chunks)
-
-        chunks = {
-            "1": {"path": "s3://bucket/foo.nc", "offset": 100, "length": 100},
-        }
-        with pytest.raises(ValueError, match="do not form a complete grid"):
-            ChunkManifest(entries=chunks)
-
 
 class TestProperties:
     def test_chunk_grid_info(self):
@@ -94,66 +79,9 @@ def test_equals(self):
                 "0.0.1": {"path": "foo.nc", "offset": 400, "length": 100},
             }
         )
-        assert not manifest1 == manifest2
         assert manifest1 != manifest2
 
 
-# TODO could we use hypothesis to test this?
-# Perhaps by testing the property that splitting along a dimension then concatenating the pieces along that dimension should recreate the original manifest?
-class TestCombineManifests:
-    def test_concat(self):
-        manifest1 = ChunkManifest(
-            entries={
-                "0.0.0": {"path": "foo.nc", "offset": 100, "length": 100},
-                "0.0.1": {"path": "foo.nc", "offset": 200, "length": 100},
-            }
-        )
-        manifest2 = ChunkManifest(
-            entries={
-                "0.0.0": {"path": "foo.nc", "offset": 300, "length": 100},
-                "0.0.1": {"path": "foo.nc", "offset": 400, "length": 100},
-            }
-        )
-        axis = 1
-        expected = ChunkManifest(
-            entries={
-                "0.0.0": {"path": "foo.nc", "offset": 100, "length": 100},
-                "0.0.1": {"path": "foo.nc", "offset": 200, "length": 100},
-                "0.1.0": {"path": "foo.nc", "offset": 300, "length": 100},
-                "0.1.1": {"path": "foo.nc", "offset": 400, "length": 100},
-            }
-        )
-
-        result = concat_manifests([manifest1, manifest2], axis=axis)
-        assert result.dict() == expected.dict()
-
-    def test_stack(self):
-        manifest1 = ChunkManifest(
-            entries={
-                "0.0": {"path": "foo.nc", "offset": 100, "length": 100},
-                "0.1": {"path": "foo.nc", "offset": 200, "length": 100},
-            }
-        )
-        manifest2 = ChunkManifest(
-            entries={
-                "0.0": {"path": "foo.nc", "offset": 300, "length": 100},
-                "0.1": {"path": "foo.nc", "offset": 400, "length": 100},
-            }
-        )
-        axis = 1
-        expected = ChunkManifest(
-            entries={
-                "0.0.0": {"path": "foo.nc", "offset": 100, "length": 100},
-                "0.0.1": {"path": "foo.nc", "offset": 200, "length": 100},
-                "0.1.0": {"path": "foo.nc", "offset": 300, "length": 100},
-                "0.1.1": {"path": "foo.nc", "offset": 400, "length": 100},
-            }
-        )
-
-        result = stack_manifests([manifest1, manifest2], axis=axis)
-        assert result.dict() == expected.dict()
-
-
 @pytest.mark.skip(reason="Not implemented")
 class TestSerializeManifest:
     def test_serialize_manifest_to_zarr(self): ...
diff --git a/virtualizarr/tests/test_zarr.py b/virtualizarr/tests/test_zarr.py
index c8753e0e..80d04b9c 100644
--- a/virtualizarr/tests/test_zarr.py
+++ b/virtualizarr/tests/test_zarr.py
@@ -3,12 +3,14 @@
 import xarray.testing as xrt
 
 from virtualizarr import ManifestArray, open_virtual_dataset
-from virtualizarr.manifests.manifest import ChunkEntry
+from virtualizarr.manifests.manifest import ChunkManifest
 
 
 def test_zarr_v3_roundtrip(tmpdir):
     arr = ManifestArray(
-        chunkmanifest={"0.0": ChunkEntry(path="test.nc", offset=6144, length=48)},
+        chunkmanifest=ChunkManifest(
+            entries={"0.0": dict(path="test.nc", offset=6144, length=48)}
+        ),
         zarray=dict(
             shape=(2, 3),
             dtype=np.dtype("<i8"),
diff --git a/virtualizarr/zarr.py b/virtualizarr/zarr.py
index 2a3825c5..22b46ffb 100644
--- a/virtualizarr/zarr.py
+++ b/virtualizarr/zarr.py
@@ -5,6 +5,7 @@
     Any,
     Literal,
     NewType,
+    Optional,
 )
 
 import numpy as np
@@ -75,7 +76,7 @@ def __repr__(self) -> str:
     def from_kerchunk_refs(cls, decoded_arr_refs_zarray) -> "ZArray":
         # coerce type of fill_value as kerchunk can be inconsistent with this
         fill_value = decoded_arr_refs_zarray["fill_value"]
-        if fill_value is None or fill_value == "NaN":
+        if fill_value is None or fill_value == "NaN" or fill_value == "nan":
             fill_value = np.nan
 
         return ZArray(
@@ -102,6 +103,31 @@ def dict(self) -> dict[str, Any]:
     def to_kerchunk_json(self) -> str:
         return ujson.dumps(self.dict())
 
+    def replace(
+        self,
+        chunks: Optional[tuple[int, ...]] = None,
+        compressor: Optional[str] = None,
+        dtype: Optional[np.dtype] = None,
+        fill_value: Optional[float] = None,  # float or int?
+        filters: Optional[list[dict]] = None,  # type: ignore[valid-type]
+        order: Optional[Literal["C"] | Literal["F"]] = None,
+        shape: Optional[tuple[int, ...]] = None,
+        zarr_format: Optional[Literal[2] | Literal[3]] = None,
+    ) -> "ZArray":
+        """
+        Convenience method to create a new ZArray from an existing one by altering only certain attributes.
+        """
+        return ZArray(
+            chunks=chunks if chunks is not None else self.chunks,
+            compressor=compressor if compressor is not None else self.compressor,
+            dtype=dtype if dtype is not None else self.dtype,
+            fill_value=fill_value if fill_value is not None else self.fill_value,
+            filters=filters if filters is not None else self.filters,
+            shape=shape if shape is not None else self.shape,
+            order=order if order is not None else self.order,
+            zarr_format=zarr_format if zarr_format is not None else self.zarr_format,
+        )
+
 
 def encode_dtype(dtype: np.dtype) -> str:
     # TODO not sure if there is a better way to get the '<i4' style representation of the dtype out