From 6f251937e11efd394943057ca1ef093ada3ef41e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=BCtschow?= Date: Thu, 28 Nov 2024 21:24:15 +0100 Subject: [PATCH] finalized local trends filling strategy (ready for review) --- primap2/csg/_strategies/gaps.py | 41 +++++++++++++------------ primap2/csg/_strategies/local_trends.py | 4 +-- primap2/tests/csg/test_gaps.py | 11 +++++++ primap2/tests/csg/test_strategies.py | 35 +++++++++++++++------ 4 files changed, 59 insertions(+), 32 deletions(-) diff --git a/primap2/csg/_strategies/gaps.py b/primap2/csg/_strategies/gaps.py index e02b731..58b753c 100644 --- a/primap2/csg/_strategies/gaps.py +++ b/primap2/csg/_strategies/gaps.py @@ -207,13 +207,18 @@ def calculate_boundary_trend_with_fallback( gap=gap, fit_params=fit_params.get_fallback(), ) - if any(np.isnan(trend_ts)): + if all(np.isnan(trend_ts)): logger.info( f"Not enough values to calculate fit for ts and gap:" f"{gap.type}, [{gap.left}:{gap.right}].\n" f"{fit_params.log_string(fallback=True)}" f"Timeseries info: {timeseries_coord_repr(ts)}" ) + # fill nan from other boundary + if np.isnan(trend_ts[0]): + trend_ts[0] = trend_ts[1] + elif np.isnan(trend_ts[1]): + trend_ts[1] = trend_ts[0] return trend_ts @@ -224,7 +229,7 @@ def calculate_boundary_trend( fit_params: FitParameters, ) -> np.array: """ - Calculate trend values for boundary points + Calculate trend values for boundary points. Parameters ---------- @@ -306,15 +311,13 @@ def calculate_right_boundary_trend( """ point_to_modify = get_shifted_time_value(ts, original_value=boundary, shift=1) - ts_fit = ts.pr.loc[ - { - "time": pd.date_range( - start=point_to_modify, - periods=fit_params.trend_length, - freq=fit_params.trend_length_unit, - ) - } - ] + trend_index = pd.date_range( + start=point_to_modify, + periods=fit_params.trend_length, + freq=fit_params.trend_length_unit, + ) + trend_index = trend_index.intersection(ts.coords["time"]) + ts_fit = ts.pr.loc[{"time": trend_index}] if len(ts_fit.where(ts_fit.notnull(), drop=True)) >= fit_params.min_trend_points: fit = ts_fit.polyfit(dim="time", deg=fit_params.fit_degree, skipna=True) @@ -361,15 +364,13 @@ def calculate_left_boundary_trend( """ point_to_modify = get_shifted_time_value(ts, original_value=boundary, shift=-1) - ts_fit = ts.pr.loc[ - { - "time": pd.date_range( - end=point_to_modify, - periods=fit_params.trend_length, - freq=fit_params.trend_length_unit, - ) - } - ] + trend_index = pd.date_range( + end=point_to_modify, + periods=fit_params.trend_length, + freq=fit_params.trend_length_unit, + ) + trend_index = trend_index.intersection(ts.coords["time"]) + ts_fit = ts.pr.loc[{"time": trend_index}] if len(ts_fit.where(ts_fit.notnull(), drop=True)) >= fit_params.min_trend_points: fit = ts_fit.polyfit(dim="time", deg=fit_params.fit_degree, skipna=True) diff --git a/primap2/csg/_strategies/local_trends.py b/primap2/csg/_strategies/local_trends.py index 6caef90..0cd29c0 100644 --- a/primap2/csg/_strategies/local_trends.py +++ b/primap2/csg/_strategies/local_trends.py @@ -218,13 +218,13 @@ def fill( gap_description = ( gap_description + f" filled for times " f"{np.datetime_as_string(time_filled_gap, unit='h')} " - f"using factor {factor[0]};" + f"using factor {factor[0]:.2f};" ) else: gap_description = ( gap_description + f" filled for times " f"{np.datetime_as_string(time_filled_gap, unit='h')} " - f"using factors {factor[0]} and {factor[1]};" + f"using factors {factor[0]:.2f} and {factor[1]:.2f};" ) # update description diff --git a/primap2/tests/csg/test_gaps.py b/primap2/tests/csg/test_gaps.py index 20eadcd..8047999 100644 --- a/primap2/tests/csg/test_gaps.py +++ b/primap2/tests/csg/test_gaps.py @@ -396,6 +396,17 @@ def test_calculate_boundary_trend_with_fallback(test_ts, fit_params_linear): ) assert np.allclose(trend_values, expected_constant, rtol=1e-04) + # test filling of missing boundary trends for a gap + # this can only occur when we use gap information for one time-series on another + # time-series, so we fake a gap here + fake_gap = Gap(left=np.datetime64("1953-01-01"), right=np.datetime64("1955-01-01"), type="gap") + trend_values = calculate_boundary_trend_with_fallback( + test_ts, + gap=fake_gap, + fit_params=fit_params_linear, + ) + assert np.allclose(trend_values, expected_constant, rtol=1e-04) + def test_calculate_scaling_factor(test_ts, fill_ts, fit_params_linear, caplog): gaps = get_gaps(test_ts) diff --git a/primap2/tests/csg/test_strategies.py b/primap2/tests/csg/test_strategies.py index d1ff270..8dd9919 100644 --- a/primap2/tests/csg/test_strategies.py +++ b/primap2/tests/csg/test_strategies.py @@ -149,7 +149,7 @@ def test_localTrends_strategy(): result_descriptions[0].description == "filled with local trend matched data from B. " "The following gaps have been filled: " "gap 1850-01-01T00 - 1850-01-01T00: filled for " - "times ['1850-01-01T00'] using factor 0.5;" + "times ['1850-01-01T00'] using factor 0.50;" ) ts[20:22] = np.nan @@ -165,12 +165,12 @@ def test_localTrends_strategy(): result_descriptions[0].description == "filled with local trend matched data from B. " "The following gaps have been filled: " "gap 1850-01-01T00 - 1850-01-01T00: filled for " - "times ['1850-01-01T00'] using factor 0.5; " + "times ['1850-01-01T00'] using factor 0.50; " "gap 1870-01-01T00 - 1871-01-01T00: filled for " - "times ['1870-01-01T00' '1871-01-01T00'] using factor 0.5;" + "times ['1870-01-01T00' '1871-01-01T00'] using factor 0.50;" ) - fill_ts[23:] = fill_ts[23:] * -1 + fill_ts[22:] = fill_ts[22:] * -1 result_ts, result_descriptions = primap2.csg.LocalTrendsStrategy(fit_params=fit_params).fill( ts=ts, fill_ts=fill_ts, fill_ts_repr="B" ) @@ -182,22 +182,37 @@ def test_localTrends_strategy(): result_descriptions[0].description == "filled with local trend matched data from B. " "The following gaps have been filled: " "gap 1850-01-01T00 - 1850-01-01T00: filled for " - "times ['1850-01-01T00'] using factor 0.5; " + "times ['1850-01-01T00'] using factor 0.50; " "gap 1870-01-01T00 - 1871-01-01T00: negative scaling factor - " "use fallback degree 0 negative scaling after fallback - " "failed to fill gap;" ) - ts[1:5] = np.nan + # gap description for differing scaling factors + fill_ts[22:] = fill_ts[22:] * -1 + ts[0] = 1 + ts[22:] = ts[22:] * 3 + result_ts, result_descriptions = primap2.csg.LocalTrendsStrategy(fit_params=fit_params).fill( + ts=ts, fill_ts=fill_ts, fill_ts_repr="B" + ) + expected_ts = ts.copy() + expected_ts[20] = 1 + expected_ts[21] = 3 + xr.testing.assert_allclose(expected_ts, result_ts) + assert all(result_descriptions[0].time == np.array(["1870", "1871"], dtype=np.datetime64)) + assert ( + result_descriptions[0].description == "filled with local trend matched data from B. " + "The following gaps have been filled: " + "gap 1870-01-01T00 - 1871-01-01T00: filled for " + "times ['1870-01-01T00' '1871-01-01T00'] using factors 0.50 and 1.50;" + ) + + ts[0:5] = np.nan fill_ts[5:] = np.nan with pytest.raises(StrategyUnableToProcess): primap2.csg.LocalTrendsStrategy(fit_params=fit_params).fill( ts=ts, fill_ts=fill_ts, fill_ts_repr="B" ) - # TODO: to test - # all fails and fallbacks - # fallback if negative - # general assert "source" not in result_ts.coords.keys()