Add RMSE (#33)

* Add RMSE
nci · Aug 21, 2023 · fd61b09 · fd61b09
1 parent 68875f1
commit fd61b09
Show file tree

Hide file tree

Showing 4 changed files with 516 additions and 1 deletion.
diff --git a/docs/userguide.md b/docs/userguide.md
@@ -123,10 +123,10 @@ Each score is documented in the API documentation [ api.md ](api.md). A simple l
 
  - Mean Absolute Error
  - Mean Squared Error
+ - Root Mean Squared Error
  - Continuous Ranked Probability Score
 
 The following scores are expected to be added shortly
  - Flip Flop Index
  - FIRM
- - Root Mean Square Error
 
diff --git a/src/scores/continuous.py b/src/scores/continuous.py
@@ -59,6 +59,50 @@ def mse(fcst, obs, reduce_dims=None, preserve_dims=None, weights=None):
     return _mse
 
 
+def rmse(fcst, obs, reduce_dims = None, preserve_dims = None, weights=None):    
+    """Calculate the Root Mean Squared Error from xarray or pandas objects.
+
+      A detailed explanation is on [Wikipedia](https://en.wikipedia.org/wiki/Root-mean-square_deviation)
+
+      
+      Dimensional reduction is not supported for pandas and the user should
+      convert their data to xarray to formulate the call to the metric.
+      At most one of `reduce_dims` and `preserve_dims` may be specified.
+      Specifying both will result in an exception.
+
+
+      Args:
+          fcst Union[xr.Dataset, xr.DataArray, pd.Dataframe, pd.Series]: Forecast
+              or predicted variables in xarray or pandas.
+          obs (Union[xr.Dataset, xr.DataArray, pd.Dataframe, pd.Series]): Observed
+              variables in xarray or pandas.
+          reduce_dims (Union[str, Iterable[str]): Optionally specify which dimensions to reduce when
+              calculating RMSE. All other dimensions will be preserved.
+          preserve_dims (Union[str, Iterable[str]): Optionally specify which dimensions to preserve
+              when calculating RMSE. All other dimensions will be reduced.
+              As a special case, 'all' will allow all dimensions to be
+              preserved. In this case, the result will be in the same
+              shape/dimensionality as the forecast, and the errors will be
+              the absolute error at each point (i.e. single-value comparison
+              against observed), and the forecast and observed dimensions
+              must match precisely.
+        weights: Not yet implemented. Allow weighted averaging (e.g. by
+            area, by latitude, by population, custom)
+            
+      Returns:
+          Union[xr.Dataset, xr.DataArray, pd.Dataframe, pd.Series]:  An object containing
+              a single floating point number representing the root mean squared
+              error for the supplied data. All dimensions will be reduced.
+              Otherwise: Returns an object representing the root mean squared error,
+              reduced along the relevant dimensions and weighted appropriately.
+
+    """
+    _mse = mse(fcst=fcst, obs=obs, reduce_dims=reduce_dims, preserve_dims=preserve_dims, weights=weights)
+
+    _rmse = pow(_mse, (1 / 2))
+
+    return _rmse
+
 def mae(fcst, obs, reduce_dims=None, preserve_dims=None, weights=None):
     """Calculates the mean absolute error from forecast and observed data.
 

diff --git a/tests/scores/test_continuous.py b/tests/scores/test_continuous.py
@@ -99,6 +99,143 @@ def test_2d_xarray_mse_with_dimensions():
     assert all(result.round(4) == expected_values)
     assert result.dims == expected_dimensions
 
+# Root Mean Squared Error
+@pytest.fixture
+def rmse_fcst_xarray():
+    return xr.DataArray([-1, 3, 1, 3, 0, 2, 2, 1, 1, 2, 3])
+
+@pytest.fixture
+def rmse_fcst_nan_xarray():
+    return xr.DataArray([-1, 3, 1, 3, np.nan, 2, 2, 1, 1, 2, 3])
+
+@pytest.fixture
+def rmse_obs_xarray():
+    return xr.DataArray([1, 1, 1, 2, 1, 2, 1, -1, 1, 3, 1])
+
+@pytest.fixture
+def rmse_fcst_pandas():
+    return pd.Series([-1, 3, 1, 3, 0, 2, 2, 1, 1, 2, 3])
+
+@pytest.fixture
+def rmse_fcst_nan_pandas():
+    return pd.Series([-1, 3, 1, 3, np.nan, 2, 2, 1, 1, 2, 3])
+
+@pytest.fixture
+def rmse_obs_pandas():
+    return pd.Series([1, 1, 1, 2, 1, 2, 1, 1, -1, 3, 1])
+
+
+@pytest.mark.parametrize(
+    "forecast, observations, expected, request_kwargs", 
+    [
+        ('rmse_fcst_xarray', 'rmse_obs_xarray', xr.DataArray(1.3484), {}),
+        ('rmse_fcst_xarray', 'rmse_obs_xarray', xr.DataArray([2, 2, 0, 1, 1, 0, 1, 2, 0, 1, 2]), dict(preserve_dims = 'all')),
+        ('rmse_fcst_nan_xarray', 'rmse_obs_xarray', xr.DataArray(1.3784), {}),
+        ('rmse_fcst_xarray', 1, xr.DataArray(1.3484), {}),
+        ('rmse_fcst_nan_xarray', 1, xr.DataArray(1.3784), {}),
+        ('rmse_fcst_pandas', 'rmse_obs_pandas', 1.3484, {}),
+        ('rmse_fcst_pandas', 1, 1.3484, {}),
+        ('rmse_fcst_nan_pandas', 'rmse_obs_pandas', 1.3784, {}),
+    ],
+    ids=["simple-1d", "preserve-1d", "simple-1d-w-nan", "to-point","to-point-w-nan", "pandas-series-1d", "pandas-to-point", "pandas-series-nan-1d"]
+)
+def test_rmse_xarray_1d(forecast, observations, expected, request_kwargs, request):
+    """
+    Test RMSE for the following cases:
+       * Calculates the correct value for a simple xarray 1d sequence
+       * Calculates the correct value for an xarray 1d sequence preserving all
+       * Calculates the correct value for a simple pandas 1d series 
+       * Calculates the correct value for an xarray 1d sequence comparing to a point
+    """
+    if isinstance(forecast, str): forecast = request.getfixturevalue(forecast)
+    if isinstance(observations, str): observations = request.getfixturevalue(observations)
+    result = scores.continuous.rmse(forecast, observations, **request_kwargs)
+    print(result)
+    assert (result.round(PRECISION) == expected).all()
+
+
+
+@pytest.fixture
+def rmse_2d_fcst_xarray():
+    numpy.random.seed(0)
+    lats = [50, 51, 52, 53]
+    lons = [30, 31, 32, 33]
+    fcst_temperatures_2d = 15 + 8 * np.random.randn(1, 4, 4)
+    fcst_temperatures_2d[0,2,1] = np.nan
+    return xr.DataArray(fcst_temperatures_2d[0], dims=["latitude", "longitude"], coords=[lats, lons])
+
+@pytest.fixture
+def rmse_2d_obs_xarray():
+    numpy.random.seed(0)
+    lats = [50, 51, 52, 53]
+    lons = [30, 31, 32, 33]
+    obs_temperatures_2d = 15 + 6 * np.random.randn(1, 4, 4)
+    obs_temperatures_2d[0,1,2] = np.nan
+    return xr.DataArray(obs_temperatures_2d[0], dims=["latitude", "longitude"], coords=[lats, lons])
+
+@pytest.fixture
+def rmse_2d_expected_xarray():
+    lons = [30, 31, 32, 33]
+    exp_temperatures_2d = [2.6813, 1.2275, 1.252 , 2.6964]
+    return xr.DataArray(exp_temperatures_2d, dims=["longitude"], coords=[lons])
+
+@pytest.mark.parametrize(
+    "forecast, observations, expected, request_kwargs, expected_dimensions", 
+    [
+        ('rmse_2d_fcst_xarray', 'rmse_2d_obs_xarray', xr.DataArray(2.1887), {}, ()),
+        ('rmse_2d_fcst_xarray', 'rmse_2d_obs_xarray', 'rmse_2d_expected_xarray', dict(reduce_dims="latitude"), ("longitude",)),
+        ('rmse_2d_fcst_xarray', 'rmse_2d_obs_xarray', 'rmse_2d_expected_xarray', dict(preserve_dims="longitude"), ("longitude",)),
+    ],
+    ids=["simple-2d", "reduce-2d", "preserve-2d"]
+)
+def test_rmse_xarray_2d_rand(forecast, observations, expected, request_kwargs, expected_dimensions, request):
+    """
+    Test RMSE for the following cases on 2d Data:
+       * Calculates the correct value for a simple xarray 2d sequence
+       * Calculates the correct value for an xarray 2d sequence reducing over set dim
+       * Calculates the correct value for an xarray 2d sequence preserving set dim
+    """
+    if isinstance(forecast, str): forecast = request.getfixturevalue(forecast)
+    if isinstance(observations, str): observations = request.getfixturevalue(observations)
+    if isinstance(expected, str): expected = request.getfixturevalue(expected)
+
+    result = scores.continuous.rmse(forecast, observations, **request_kwargs)
+    xr.testing.assert_allclose(result.round(PRECISION), expected)
+    assert result.dims == expected_dimensions
+
+def create_xarray(data : list[list[float]]):
+    data = np.array(data)
+    lats = np.arange(data.shape[0]) + 50
+    lons = np.arange(data.shape[1]) + 30
+    return xr.DataArray(data, dims=["latitude", "longitude"], coords=[lats, lons])
+
+@pytest.mark.parametrize(
+    "forecast, observations, expected, request_kwargs,", 
+    [
+        (create_xarray([[0,0],[1,1]]), create_xarray([[0,0],[1,1]]), xr.DataArray(0), {}),
+        (create_xarray([[-1,0],[1,1]]), create_xarray([[-1,0],[1,1]]), xr.DataArray(0), {}),
+        (create_xarray([[1,0],[1,1]]), create_xarray([[-1,0],[1,1]]), xr.DataArray(1), {}),
+        (create_xarray([[-1,0],[1,1]]), create_xarray([[1,0],[1,1]]), xr.DataArray(1), {}),
+        (create_xarray([[np.nan,0],[1,1]]), create_xarray([[1,0],[1,1]]), xr.DataArray(0), {}),
+        (create_xarray([[np.nan,1],[1,1]]), create_xarray([[1,0],[1,1]]), xr.DataArray(np.sqrt(1/3).round(PRECISION)), {}),
+        (create_xarray([[1,0],[1,1]]), create_xarray([[np.nan,0],[1,1]]), xr.DataArray(0), {}),
+        (create_xarray([[1,0],[1,1]]), create_xarray([[np.nan,1],[1,1]]), xr.DataArray(np.sqrt(1/3).round(PRECISION)), {}),
+    ],
+    ids=["perfect", "perfect-neg","single","single_neg", "perfect-nan","single-nan", "perfect-nan-r","single-nan-r"]
+
+)
+def test_rmse_xarray_2d_defined(forecast, observations, expected, request_kwargs, request):
+    """
+    Test RMSE Values for defined edge cases
+
+    """
+    if isinstance(forecast, str): forecast = request.getfixturevalue(forecast)
+    if isinstance(observations, str): observations = request.getfixturevalue(observations)
+    if isinstance(expected, str): expected = request.getfixturevalue(expected)
+
+    result = scores.continuous.rmse(forecast, observations, **request_kwargs)
+    xr.testing.assert_allclose(result.round(PRECISION), expected)
+
 
 # Mean Absolute Error
 

diff --git a/tutorials/Root Mean Squared Error.ipynb b/tutorials/Root Mean Squared Error.ipynb