Skip to content

Commit

Permalink
make all nowcast methods xarray compatible (#414)
Browse files Browse the repository at this point in the history
* make test steps skill run

* undo accidental change

* make steps nowcast xarray compatible

* wrap all nowcasts in xarray

* fix dimension.py tests

* update dimension.py to work with new dataarrays

* fix test_nowcast_utils tests

* update docs and make xarray usage more explicit in nowcasts

* update docs and make xarray usage in motion methods more explicit
  • Loading branch information
mats-knmi committed Sep 2, 2024
1 parent 5de0bcb commit 2e0aca2
Show file tree
Hide file tree
Showing 27 changed files with 865 additions and 441 deletions.
10 changes: 7 additions & 3 deletions pysteps/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

import numpy as np

from pysteps.converters import convert_to_xarray_dataset
from pysteps.xarray_helpers import convert_input_to_xarray_dataset


def _add_extra_kwrds_to_docstrings(target_func, extra_kwargs_doc_text):
Expand Down Expand Up @@ -90,7 +90,9 @@ def _import_with_postprocessing(*args, **kwargs):
mask = ~np.isfinite(precip)
precip[mask] = _fillna

return convert_to_xarray_dataset(precip.astype(_dtype), quality, metadata)
return convert_input_to_xarray_dataset(
precip.astype(_dtype), quality, metadata
)

extra_kwargs_doc = """
Other Parameters
Expand Down Expand Up @@ -126,7 +128,9 @@ def new_function(*args, **kwargs):
target motion_method_func function.
"""

input_images = args[0]
dataset = args[0]
precip_var = dataset.attrs["precip_var"]
input_images = dataset[precip_var].values
if input_images.ndim != 3:
raise ValueError(
"input_images dimension mismatch.\n"
Expand Down
43 changes: 27 additions & 16 deletions pysteps/io/importers.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,22 +74,27 @@
.. tabularcolumns:: |p{2cm}|L|
+---------------+-------------------------------------------------------------------------------------------+
| Coordinate | Description |
+===============+===========================================================================================+
| y | y-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
+---------------+-------------------------------------------------------------------------------------------+
| x | x-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
+---------------+-------------------------------------------------------------------------------------------+
| lat | latitude coordinate in degrees |
+---------------+-------------------------------------------------------------------------------------------+
| lon | longitude coordinate in degrees |
+---------------+-------------------------------------------------------------------------------------------+
| time | forecast time in seconds since forecast start time |
+---------------+-------------------------------------------------------------------------------------------+
| member | ensemble member number (integer) |
+---------------+-------------------------------------------------------------------------------------------+
+--------------------+-------------------------------------------------------------------------------------------+
| Coordinate | Description |
+====================+===========================================================================================+
| y | y-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
+--------------------+-------------------------------------------------------------------------------------------+
| x | x-coordinate in Cartesian system, with units determined by ``metadata["cartesian_unit"]`` |
+--------------------+-------------------------------------------------------------------------------------------+
| lat | latitude coordinate in degrees |
+--------------------+-------------------------------------------------------------------------------------------+
| lon | longitude coordinate in degrees |
+--------------------+-------------------------------------------------------------------------------------------+
| time | forecast time in seconds since forecast start time |
+--------------------+-------------------------------------------------------------------------------------------+
| ens_number | ensemble member number (integer) |
+--------------------+-------------------------------------------------------------------------------------------+
| direction | used by proesmans to return the forward and backward advection and consistency fields |
+--------------------+-------------------------------------------------------------------------------------------+
The time, x and y dimensions all MUST be regularly spaced, with the stepsize included
in a ``stepsize`` attribute. The stepsize is given in the unit of the dimension (this
is alwyas seconds for the time dimension).
The dataset can contain the following data variables:
Expand All @@ -102,8 +107,14 @@
| precip_accum | precip_intensity if unit is ``mm/h``, precip_accum if unit is ``mm`` and reflectivity if unit is ``dBZ``, |
| or reflectivity | the attributes of this variable contain metadata relevant to this attribute (see below) |
+-------------------+-----------------------------------------------------------------------------------------------------------+
| velocity_x | x-component of the advection field in cartesian_unit per timestep |
+-------------------+-----------------------------------------------------------------------------------------------------------+
| velocity_y | y-component of the advection field in cartesian_unit per timestep |
+-------------------+-----------------------------------------------------------------------------------------------------------+
| quality | value between 0 and 1 denoting the quality of the precipitation data, currently not used for anything |
+-------------------+-----------------------------------------------------------------------------------------------------------+
| velocity_quality | value between 0 and 1 denoting the quality of the velocity data, currently only returned by proesmans |
+-------------------+-----------------------------------------------------------------------------------------------------------+
Some of the metadata in the metadata dictionary is not explicitely stored in the dataset,
but is still implicitly present. For example ``x1`` can easily be found by taking the first
Expand Down
16 changes: 15 additions & 1 deletion pysteps/io/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import xarray as xr


def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None:
def read_timeseries(inputfns, importer, timestep=None, **kwargs) -> xr.Dataset | None:
"""
Read a time series of input files using the methods implemented in the
:py:mod:`pysteps.io.importers` module and stack them into a 3d xarray
Expand All @@ -28,6 +28,9 @@ def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None:
:py:mod:`pysteps.io.archive` module.
importer: function
A function implemented in the :py:mod:`pysteps.io.importers` module.
timestep: int, optional
The timestep in seconds, this value is optional if more than 1 inputfns
are given.
kwargs: dict
Optional keyword arguments for the importer.
Expand Down Expand Up @@ -58,6 +61,16 @@ def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None:
return None

startdate = min(inputfns[1])
sorted_dates = sorted(inputfns[1])
timestep_dates = int((sorted_dates[1] - sorted_dates[0]).total_seconds())

if timestep is None:
timestep = timestep_dates
if timestep != timestep_dates:
raise ValueError("given timestep does not match inputfns")
for i in range(len(sorted_dates) - 1):
if int((sorted_dates[i + 1] - sorted_dates[i]).total_seconds()) != timestep:
raise ValueError("supplied dates are not evenly spaced")

datasets = []
for i, ifn in enumerate(inputfns[0]):
Expand All @@ -73,6 +86,7 @@ def read_timeseries(inputfns, importer, **kwargs) -> xr.Dataset | None:
{
"long_name": "forecast time",
"units": f"seconds since {startdate:%Y-%m-%d %H:%M:%S}",
"stepsize": timestep,
},
)
)
Expand Down
26 changes: 17 additions & 9 deletions pysteps/motion/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,32 @@

import numpy as np
import scipy.optimize as op
import xarray as xr
from scipy.ndimage import map_coordinates


def constant(R, **kwargs):
def constant(dataset: xr.Dataset, **kwargs):
"""
Compute a constant advection field by finding a translation vector that
maximizes the correlation between two successive images.
Parameters
----------
R: array_like
Array of shape (T,m,n) containing a sequence of T two-dimensional input
images of shape (m,n). If T > 2, two last elements along axis 0 are used.
dataset: xarray.Dataset
Input dataset as described in the documentation of
:py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
The dataset has to have a time dimension. If the size of this dimension
is larger than 2, the last 2 entries of this dimension are used.
Returns
-------
out: array_like
The constant advection field having shape (2, m, n), where out[0, :, :]
contains the x-components of the motion vectors and out[1, :, :]
contains the y-components.
out: xarray.Dataset
The input dataset with the constant advection field added in the ``velocity_x``
and ``velocity_y`` data variables.
"""
dataset = dataset.copy(deep=True)
precip_var = dataset.attrs["precip_var"]
R = dataset[precip_var].values
m, n = R.shape[1:]
X, Y = np.meshgrid(np.arange(n), np.arange(m))

Expand All @@ -51,4 +56,7 @@ def f(v):
options = {"initial_simplex": (np.array([(0, 1), (1, 0), (1, 1)]))}
result = op.minimize(f, (1, 1), method="Nelder-Mead", options=options)

return np.stack([-result.x[0] * np.ones((m, n)), -result.x[1] * np.ones((m, n))])
output = np.stack([-result.x[0] * np.ones((m, n)), -result.x[1] * np.ones((m, n))])
dataset["velocity_x"] = (["y", "x"], output[0])
dataset["velocity_y"] = (["y", "x"], output[1])
return dataset
29 changes: 19 additions & 10 deletions pysteps/motion/darts.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,28 @@
DARTS
"""

import numpy as np
import time

import numpy as np
import xarray as xr
from numpy.linalg import lstsq, svd

from pysteps import utils
from pysteps.decorators import check_input_frames


@check_input_frames(just_ndim=True)
def DARTS(input_images, **kwargs):
def DARTS(dataset: xr.Dataset, **kwargs):
"""
Compute the advection field from a sequence of input images by using the
DARTS method. :cite:`RCW2011`
Parameters
----------
input_images: array-like
Array of shape (T,m,n) containing a sequence of T two-dimensional input
images of shape (m,n).
dataset: xarray.Dataset
Input dataset as described in the documentation of
:py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
The dataset has to have a time dimension.
Other Parameters
----------------
Expand Down Expand Up @@ -67,13 +70,15 @@ def DARTS(input_images, **kwargs):
Returns
-------
out: ndarray
Three-dimensional array (2,m,n) containing the dense x- and y-components
of the motion field in units of pixels / timestep as given by the input
array R.
out: xarray.Dataset
The input dataset with the advection field added in the ``velocity_x``
and ``velocity_y`` data variables.
"""

dataset = dataset.copy(deep=True)
precip_var = dataset.attrs["precip_var"]
input_images = dataset[precip_var].values
N_x = kwargs.get("N_x", 50)
N_y = kwargs.get("N_y", 50)
N_t = kwargs.get("N_t", 4)
Expand Down Expand Up @@ -214,10 +219,14 @@ def DARTS(input_images, **kwargs):
fft.ifft2(_fill(V, input_images.shape[0], input_images.shape[1], k_x, k_y))
)

output = np.stack([U, V])
dataset["velocity_x"] = (["y", "x"], output[0])
dataset["velocity_y"] = (["y", "x"], output[1])

if verbose:
print("--- %s seconds ---" % (time.time() - t0))

return np.stack([U, V])
return dataset


def _leastsq(A, B, y):
Expand Down
61 changes: 32 additions & 29 deletions pysteps/motion/lucaskanade.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,22 @@
dense_lucaskanade
"""

import time

import numpy as np
import xarray as xr
from numpy.ma.core import MaskedArray

from pysteps import feature, utils
from pysteps.decorators import check_input_frames

from pysteps import utils, feature
from pysteps.tracking.lucaskanade import track_features
from pysteps.utils.cleansing import decluster, detect_outliers
from pysteps.utils.images import morph_opening

import time


@check_input_frames(2)
def dense_lucaskanade(
input_images,
dataset: xr.Dataset,
lk_kwargs=None,
fd_method="shitomasi",
fd_kwargs=None,
Expand Down Expand Up @@ -73,18 +73,14 @@ def dense_lucaskanade(
Parameters
----------
input_images: ndarray_ or MaskedArray_
Array of shape (T, m, n) containing a sequence of *T* two-dimensional
input images of shape (m, n). The indexing order in **input_images** is
assumed to be (time, latitude, longitude).
*T* = 2 is the minimum required number of images.
With *T* > 2, all the resulting sparse vectors are pooled together for
the final interpolation on a regular grid.
In case of ndarray_, invalid values (Nans or infs) are masked,
otherwise the mask of the MaskedArray_ is used. Such mask defines a
region where features are not detected for the tracking algorithm.
dataset: xarray.Dataset
Input dataset as described in the documentation of
:py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
The dataset has to have a time dimension. The size of the time dimension needs to
be at least 2. If it is larger than 2, all the resulting sparse vectors are pooled
together for the final interpolation on a regular grid. Invalid values (Nans or infs)
are masked. This mask defines a region where features are not detected for the tracking
algorithm.
lk_kwargs: dict, optional
Optional dictionary containing keyword arguments for the `Lucas-Kanade`_
Expand Down Expand Up @@ -151,14 +147,10 @@ def dense_lucaskanade(
Returns
-------
out: ndarray_ or tuple
If **dense=True** (the default), return the advection field having shape
(2, m, n), where out[0, :, :] contains the x-components of the motion
vectors and out[1, :, :] contains the y-components.
The velocities are in units of pixels / timestep, where timestep is the
time difference between the two input images.
Return a zero motion field of shape (2, m, n) when no motion is
detected.
out: xarray.Dataset or tuple
If **dense=True** (the default), return the input dataset with the advection
field added in the ``velocity_x`` and ``velocity_y`` data variables.
Return a zero motion field when no motion is detected.
If **dense=False**, it returns a tuple containing the 2-dimensional
arrays **xy** and **uv**, where x, y define the vector locations,
Expand All @@ -179,7 +171,9 @@ def dense_lucaskanade(
Understanding Workshop, pp. 121–130, 1981.
"""

input_images = input_images.copy()
dataset = dataset.copy(deep=True)
precip_var = dataset.attrs["precip_var"]
input_images = dataset[precip_var].values

if verbose:
print("Computing the motion field with the Lucas-Kanade method.")
Expand Down Expand Up @@ -244,7 +238,10 @@ def dense_lucaskanade(
# return zero motion field is no sparse vectors are found
if xy.shape[0] == 0:
if dense:
return np.zeros((2, domain_size[0], domain_size[1]))
uvgrid = np.zeros((2, domain_size[0], domain_size[1]))
dataset["velocity_x"] = (["y", "x"], uvgrid[0])
dataset["velocity_y"] = (["y", "x"], uvgrid[1])
return dataset
else:
return xy, uv

Expand All @@ -266,14 +263,20 @@ def dense_lucaskanade(

# return zero motion field if no sparse vectors are left for interpolation
if xy.shape[0] == 0:
return np.zeros((2, domain_size[0], domain_size[1]))
uvgrid = np.zeros((2, domain_size[0], domain_size[1]))
dataset["velocity_x"] = (["y", "x"], uvgrid[0])
dataset["velocity_y"] = (["y", "x"], uvgrid[1])
return dataset

# interpolation
xgrid = np.arange(domain_size[1])
ygrid = np.arange(domain_size[0])
uvgrid = interpolation_method(xy, uv, xgrid, ygrid, **interp_kwargs)

dataset["velocity_x"] = (["y", "x"], uvgrid[0])
dataset["velocity_y"] = (["y", "x"], uvgrid[1])

if verbose:
print("--- total time: %.2f seconds ---" % (time.time() - t0))

return uvgrid
return dataset
Loading

0 comments on commit 2e0aca2

Please sign in to comment.