Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add compression and documentation options to netCDF exporter #352

Merged
merged 4 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/psf/black
rev: 22.6.0
rev: 24.3.0
hooks:
- id: black
language_version: python3
57 changes: 52 additions & 5 deletions pysteps/io/exporters.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,11 @@ def initialize_forecast_exporter_netcdf(
shape,
metadata,
n_ens_members=1,
datatype=np.float32,
incremental=None,
fill_value=None,
scale_factor=None,
offset=None,
**kwargs,
):
"""
Expand Down Expand Up @@ -401,12 +405,35 @@ def initialize_forecast_exporter_netcdf(
n_ens_members: int
Number of ensemble members in the forecast. This argument is ignored if
incremental is set to 'member'.
datatype: np.dtype, optional
The datatype of the output values. Defaults to np.float32.
incremental: {None,'timestep','member'}, optional
Allow incremental writing of datasets into the netCDF files.\n
The available options are: 'timestep' = write a forecast or a forecast
ensemble for a given time step; 'member' = write a forecast sequence
for a given ensemble member. If set to None, incremental writing is
disabled.
fill_value: int, optional
Fill_value for missing data. Defaults to None, which means that the
standard netCDF4 fill_value is used.
scale_factor: float, optional
The scale factor to scale the data as: store_value = scale_factor *
precipitation_value + offset. Defaults to None. The scale_factor
can be used to reduce data storage.
offset: float, optional
The offset to offset the data as: store_value = scale_factor *
precipitation_value + offset. Defaults to None.

Other Parameters
----------------
institution: str
The instute, company or community that has created the nowcast.
Default: the pySTEPS community (https://pysteps.github.io)
references: str
Any references to be included in the netCDF file. Defaults to " ".
comment: str
Any comments about the data or storage protocol that should be
included in the netCDF file. Defaults to " ".

Returns
-------
Expand Down Expand Up @@ -448,18 +475,25 @@ def initialize_forecast_exporter_netcdf(
if n_ens_members > 1:
n_ens_gt_one = True

# Kwargs to be used as description strings in the netCDF
institution = kwargs.get(
"institution", "the pySTEPS community (https://pysteps.github.io)"
)
references = kwargs.get("references", "")
comment = kwargs.get("comment", "")

exporter = {}

outfn = os.path.join(outpath, outfnprefix + ".nc")
ncf = netCDF4.Dataset(outfn, "w", format="NETCDF4")

ncf.Conventions = "CF-1.7"
ncf.title = "pysteps-generated nowcast"
ncf.institution = "the pySTEPS community (https://pysteps.github.io)"
ncf.institution = institution
ncf.source = "pysteps" # TODO(exporters): Add pySTEPS version here
ncf.history = ""
ncf.references = ""
ncf.comment = ""
ncf.references = references
ncf.comment = comment

h, w = shape

Expand Down Expand Up @@ -559,14 +593,22 @@ def initialize_forecast_exporter_netcdf(
if incremental == "member" or n_ens_gt_one:
var_f = ncf.createVariable(
var_name,
np.float32,
datatype=datatype,
dimensions=("ens_number", "time", "y", "x"),
compression="zlib",
zlib=True,
complevel=9,
fill_value=fill_value,
)
else:
var_f = ncf.createVariable(
var_name, np.float32, dimensions=("time", "y", "x"), zlib=True, complevel=9
var_name,
datatype=datatype,
dimensions=("time", "y", "x"),
compression="zlib",
zlib=True,
complevel=9,
fill_value=fill_value,
)

if var_standard_name is not None:
Expand All @@ -576,6 +618,11 @@ def initialize_forecast_exporter_netcdf(
var_f.units = var_unit
if grid_mapping_var_name is not None:
var_f.grid_mapping = grid_mapping_var_name
# Add gain and offset
if scale_factor is not None:
var_f.scale_factor = scale_factor
if offset is not None:
var_f.add_offset = offset

exporter["method"] = "netcdf"
exporter["ncfile"] = ncf
Expand Down
27 changes: 20 additions & 7 deletions pysteps/tests/test_exporters.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,21 @@
from pysteps.tests.helpers import get_precipitation_fields, get_invalid_mask

# Test arguments
exporter_arg_names = ("n_ens_members", "incremental")
exporter_arg_names = (
"n_ens_members",
"incremental",
"datatype",
"fill_value",
"scale_factor",
"offset",
)

exporter_arg_values = [
(1, None),
(1, "timestep"),
(2, None),
(2, "timestep"),
(2, "member"),
(1, None, np.float32, None, None, None),
(1, "timestep", np.float32, 65535, None, None),
(2, None, np.float32, 65535, None, None),
(2, "timestep", np.float32, None, None, None),
(2, "member", np.float64, None, 0.01, 1.0),
]


Expand All @@ -46,7 +53,9 @@ def test_get_geotiff_filename():


@pytest.mark.parametrize(exporter_arg_names, exporter_arg_values)
def test_io_export_netcdf_one_member_one_time_step(n_ens_members, incremental):
def test_io_export_netcdf_one_member_one_time_step(
n_ens_members, incremental, datatype, fill_value, scale_factor, offset
):
"""
Test the export netcdf.
Also, test that the exported file can be read by the importer.
Expand Down Expand Up @@ -78,7 +87,11 @@ def test_io_export_netcdf_one_member_one_time_step(n_ens_members, incremental):
shape,
metadata,
n_ens_members=n_ens_members,
datatype=datatype,
incremental=incremental,
fill_value=fill_value,
scale_factor=scale_factor,
offset=offset,
)

if n_ens_members > 1:
Expand Down
Loading