diff --git a/docs/api.md b/docs/api.md index 0a4f41a8..b9b32bfc 100644 --- a/docs/api.md +++ b/docs/api.md @@ -16,6 +16,6 @@ ## Xarray ```{eval-rst} -.. autoclass:: virtualizarr.xarray.open_dataset_via_kerchunk +.. autoclass:: virtualizarr.xarray.open_virtual_dataset :members: ``` diff --git a/virtualizarr/__init__.py b/virtualizarr/__init__.py index 485fd87e..d2749941 100644 --- a/virtualizarr/__init__.py +++ b/virtualizarr/__init__.py @@ -1,3 +1,3 @@ from .manifests import ManifestArray # type: ignore # noqa from .xarray import VirtualiZarrDatasetAccessor # type: ignore # noqa -from .xarray import open_dataset_via_kerchunk # noqa: F401 +from .xarray import open_virtual_dataset # noqa: F401 diff --git a/virtualizarr/kerchunk.py b/virtualizarr/kerchunk.py index 02c0ff4d..3dbc575e 100644 --- a/virtualizarr/kerchunk.py +++ b/virtualizarr/kerchunk.py @@ -37,11 +37,11 @@ def read_kerchunk_references_from_file( if filetype is None: filetype = _automatically_determine_filetype(filepath) - if filetype == "netCDF3": + if filetype.lower() == "netcdf3": from kerchunk.netCDF3 import SingleHdf5ToZarr refs = SingleHdf5ToZarr.NetCDF3ToZarr(filepath).translate() - elif filetype == "netCDF4": + elif filetype.lower() == "netcdf4": from kerchunk.hdf import SingleHdf5ToZarr refs = SingleHdf5ToZarr(filepath).translate() @@ -49,11 +49,11 @@ def read_kerchunk_references_from_file( # TODO Grib files should be handled as a DataTree object # see https://github.com/TomNicholas/VirtualiZarr/issues/11 raise NotImplementedError(f"Unsupported file type: {filetype}") - elif filetype == "tiff": + elif filetype.lower() == "tiff": from kerchunk.tiff import tiff_to_zarr refs = tiff_to_zarr(filepath) - elif filetype == "fits": + elif filetype.lower() == "fits": from kerchunk.fits import process_file refs = process_file(filepath) diff --git a/virtualizarr/xarray.py b/virtualizarr/xarray.py index b452078b..c2f5219f 100644 --- a/virtualizarr/xarray.py +++ b/virtualizarr/xarray.py @@ -16,14 +16,14 @@ class ManifestBackendArray(ManifestArray, BackendArray): ... -def open_dataset_via_kerchunk( +def open_virtual_dataset( filepath: str, filetype: str, drop_variables: Optional[List[str]] = None, virtual_array_class=ManifestArray, ) -> xr.Dataset: """ - Use kerchunk to open a single legacy file as an xarray Dataset wrapping virtualized zarr arrays. + Open a file or store as an xarray Dataset wrapping virtualized zarr arrays. It's important that we avoid creating any IndexVariables, as our virtualized zarr array objects don't actually contain a collection that can be turned into a pandas.Index. @@ -33,6 +33,7 @@ def open_dataset_via_kerchunk( File path to open as a set of virtualized zarr arrays. filetype : str, default None Type of file to be opened. Used to determine which kerchunk file format backend to use. + Can be one of {'netCDF3', 'netCDF4'}. If not provided will attempt to automatically infer the correct filetype from the the filepath's extension. drop_variables: list[str], default is None Variables in the file to drop before returning.