Skip to content

Commit

Permalink
progress on mypy
Browse files Browse the repository at this point in the history
  • Loading branch information
norlandrhagen committed Dec 17, 2024
1 parent 816e696 commit 31aacf9
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 11 deletions.
4 changes: 2 additions & 2 deletions virtualizarr/readers/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@ def open_loadable_vars_and_indexes(

# Updates the Xarray open_dataset kwargs if Zarr

if fpath.filepath.suffix == ".zarr":
if fpath.filepath.suffix == ".zarr": # type: ignore
engine = "zarr"
xr_input = fpath.filepath

else:
engine = None
xr_input = fpath.open_file()
xr_input = fpath.open_file() # type: ignore

ds = open_dataset(
xr_input, # type: ignore[arg-type]
Expand Down
28 changes: 21 additions & 7 deletions virtualizarr/readers/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,18 @@ def open_virtual_dataset(
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, Index] | None = None,
virtual_backend_kwargs: Optional[dict] = None,
reader_options: Optional[dict] = None,
) -> Dataset:
"""
Read a Zarr v3 store containing chunk manifests and return an xarray Dataset containing virtualized arrays.
This is experimental - chunk manifests are not part of the Zarr v3 Spec.
"""

if virtual_backend_kwargs:
raise NotImplementedError(
"Zarr V3 Chunk Manifest reader does not understand any virtual_backend_kwargs"
)
storepath = Path(filepath)

if group:
Expand Down Expand Up @@ -164,12 +168,22 @@ def virtual_dataset_from_zarr_group(
# use UPath for combining store path + chunk key when building chunk manifests
store_path = UPath(filepath)

if reader_options is None:
reader_options = {}

zg = zarr.open_group(
filepath, storage_options=reader_options.get("storage_options"), mode="r"
)

zarr_arrays = [val for val in zg.keys()]

# mypy typing
if loadable_variables is None:
loadable_variables = set()

if drop_variables is None:
drop_variables = set()

missing_vars = set(loadable_variables) - set(zarr_arrays)
if missing_vars:
raise ValueError(
Expand All @@ -182,7 +196,7 @@ def virtual_dataset_from_zarr_group(

virtual_variable_mapping = {
f"{var}": construct_virtual_array(
zarr_group=zg, var_name=var, filepath=store_path
zarr_group=zg, var_name=var, store_path=store_path
)
for var in virtual_vars
}
Expand Down Expand Up @@ -218,23 +232,23 @@ def virtual_dataset_from_zarr_group(
)


async def get_chunk_size(zarr_group: zarr.core.group, chunk_key: PosixPath) -> int:
async def get_chunk_size(zarr_group: zarr.Group, chunk_key: PosixPath) -> int:
# User zarr-pythons `getsize` method to get bytes per chunk
return await zarr_group.store.getsize(chunk_key)


async def chunk_exists(zarr_group: zarr.core.group, chunk_key: PosixPath) -> bool:
async def chunk_exists(zarr_group: zarr.Group, chunk_key: PosixPath) -> bool:
# calls zarr-pythons `exists` to check for a chunk
return await zarr_group.store.exists(chunk_key)


async def list_store_keys(zarr_group: zarr.core.group) -> list[str]:
async def list_store_keys(zarr_group: zarr.Group) -> list[str]:
# Lists all keys in a store
return [item async for item in zarr_group.store.list()]


async def get_chunk_paths(
zarr_group: zarr.core.group, array_name: str, store_path: upath.core.UPath
zarr_group: zarr.Group, array_name: str, store_path: upath.core.UPath
) -> dict:
chunk_paths = {}

Expand Down Expand Up @@ -279,7 +293,7 @@ async def get_chunk_paths(


def construct_virtual_array(
zarr_group: zarr.core.group.Group, var_name: str, store_path: upath.core.UPath
zarr_group: zarr.Group, var_name: str, store_path: upath.core.UPath
):
zarr_array = zarr_group[var_name]

Expand Down
7 changes: 5 additions & 2 deletions virtualizarr/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
if TYPE_CHECKING:
import fsspec.core
import fsspec.spec
import upath

# See pangeo_forge_recipes.storage
OpenFileType = Union[
Expand All @@ -32,7 +33,7 @@ class _FsspecFSFromFilepath:
"""

filepath: str
filepath: str | upath.core.UPath
reader_options: Optional[dict] = field(default_factory=dict)
fs: fsspec.AbstractFileSystem = field(init=False)

Expand All @@ -59,7 +60,9 @@ def __post_init__(self) -> None:
import fsspec
from upath import UPath

self.filepath = UPath(self.filepath)
if not isinstance(self.filepath, UPath):
self.filepath = UPath(self.filepath)

protocol = self.filepath.protocol

self.reader_options = self.reader_options or {}
Expand Down

0 comments on commit 31aacf9

Please sign in to comment.