Merge pull request #43 from DamienIrving/master

Add moments test
AusClimateService · Aug 7, 2023 · f210bde · f210bde
2 parents 621c91f + 503a9f4
commit f210bde
Show file tree

Hide file tree

Showing 3 changed files with 240 additions and 16 deletions.
diff --git a/setup.py b/setup.py
@@ -28,6 +28,7 @@
             "similarity = unseen.similarity:_main",
             "bias_correction = unseen.bias_correction:_main",
             "stability = unseen.stability:_main",
+            "moments = unseen.moments:_main",
         ]
     },
 )
diff --git a/unseen/moments.py b/unseen/moments.py
@@ -0,0 +1,217 @@
+"""Functions and command line program for moments testing."""
+
+import argparse
+import logging
+
+import numpy as np
+import matplotlib.pyplot as plt
+import scipy
+
+from . import fileio
+
+
+logging.basicConfig(level=logging.INFO)
+
+
+def calc_ci(data):
+    """Calculate the 95% confidence interval"""
+
+    lower_ci = np.percentile(np.array(data), 2.5, axis=0)
+    upper_ci = np.percentile(np.array(data), 97.5, axis=0)
+
+    return lower_ci, upper_ci
+
+
+def create_plot(
+    fcst_file,
+    obs_file,
+    var,
+    outfile=None,
+    min_lead=None,
+    ensemble_dim="ensemble",
+    init_dim="init_date",
+    lead_dim="lead_time",
+):
+    """Create a stability assessment plot.
+
+    Parameters
+    ----------
+    fcst_file : str
+        Forecast file containing metric of interest
+    obs_file : str
+        Observations file containing metric of interest
+    var : str
+        Variable name (in fcst_file)
+    outfile : str, default None
+        Path for output image file
+    min_lead : int, optional
+        Minimum lead time
+    ensemble_dim : str, default ensemble
+        Name of ensemble member dimension
+    init_dim : str, default init_date
+        Name of initial date dimension
+    lead_dim : str, default lead_time
+        Name of lead time dimension
+    """
+
+    ds_obs = fileio.open_dataset(obs_file)
+    da_obs = ds_obs[var].dropna("time")
+    sample_size = len(da_obs)
+    mean_obs = float(np.mean(da_obs))
+    std_obs = float(np.std(da_obs))
+    skew_obs = float(scipy.stats.skew(da_obs))
+    kurtosis_obs = float(scipy.stats.kurtosis(da_obs))
+
+    ds_fcst = fileio.open_dataset(fcst_file)
+    da_fcst = ds_fcst[var]
+    if min_lead is not None:
+        da_fcst = da_fcst.where(ds_fcst[lead_dim] >= min_lead)
+    dims = [ensemble_dim, init_dim, lead_dim]
+    da_fcst_stacked = da_fcst.dropna(lead_dim).stack({"sample": dims})
+
+    mean_values = []
+    std_values = []
+    skew_values = []
+    kurtosis_values = []
+    for i in range(1000):
+        random_sample = np.random.choice(da_fcst_stacked, sample_size)
+        mean = float(np.mean(random_sample))
+        std = float(np.std(random_sample))
+        skew = float(scipy.stats.skew(random_sample))
+        kurtosis = float(scipy.stats.kurtosis(random_sample))
+        mean_values.append(mean)
+        std_values.append(std)
+        skew_values.append(skew)
+        kurtosis_values.append(kurtosis)
+
+    mean_lower_ci, mean_upper_ci = calc_ci(mean_values)
+    std_lower_ci, std_upper_ci = calc_ci(std_values)
+    skew_lower_ci, skew_upper_ci = calc_ci(skew_values)
+    kurtosis_lower_ci, kurtosis_upper_ci = calc_ci(kurtosis_values)
+
+    fig = plt.figure(figsize=[15, 12])
+    ax1 = fig.add_subplot(221)
+    ax2 = fig.add_subplot(222)
+    ax3 = fig.add_subplot(223)
+    ax4 = fig.add_subplot(224)
+
+    ax1.hist(mean_values, rwidth=0.8, color="0.5")
+    ax1.set_title("(a) mean")
+    ax1.axvline(mean_lower_ci, color="0.2", linestyle="--")
+    ax1.axvline(mean_upper_ci, color="0.2", linestyle="--")
+    ax1.axvline(mean_obs)
+    ax1.set_ylabel("count")
+
+    ax2.hist(std_values, rwidth=0.8, color="0.5")
+    ax2.set_title("(b) standard deviation")
+    ax2.axvline(std_lower_ci, color="0.2", linestyle="--")
+    ax2.axvline(std_upper_ci, color="0.2", linestyle="--")
+    ax2.axvline(std_obs)
+    ax2.set_ylabel("count")
+
+    ax3.hist(skew_values, rwidth=0.8, color="0.5")
+    ax3.set_title("(c) skewness")
+    ax3.set_ylabel("count")
+    ax3.axvline(skew_lower_ci, color="0.2", linestyle="--")
+    ax3.axvline(skew_upper_ci, color="0.2", linestyle="--")
+    ax3.axvline(skew_obs)
+
+    ax4.hist(kurtosis_values, rwidth=0.8, color="0.5")
+    ax4.set_title("(d) kurtosis")
+    ax4.set_ylabel("count")
+    ax4.axvline(kurtosis_lower_ci, color="0.2", linestyle="--")
+    ax4.axvline(kurtosis_upper_ci, color="0.2", linestyle="--")
+    ax4.axvline(kurtosis_obs)
+
+    mean_text = f"Obs = {mean_obs}, Model 95% CI ={mean_lower_ci} to {mean_upper_ci}"
+    std_text = f"Obs = {std_obs}, Model 95% CI ={std_lower_ci} to {std_upper_ci}"
+    skew_text = f"Obs = {skew_obs}, Model 95% CI ={skew_lower_ci} to {skew_upper_ci}"
+    kurtosis_text = f"Obs = {kurtosis_obs}, Model 95% CI ={kurtosis_lower_ci} to {kurtosis_upper_ci}"
+    logging.info(f"Mean: {mean_text}")
+    logging.info(f"Standard deviation: {std_text}")
+    logging.info(f"Skewness: {skew_text}")
+    logging.info(f"Kurtosis: {kurtosis_text}")
+
+    if outfile:
+        infile_logs = {
+            fcst_file: ds_fcst.attrs["history"],
+            obs_file: ds_obs.attrs["history"],
+        }
+        command_history = fileio.get_new_log(infile_logs=infile_logs)
+        metadata = {
+            "mean": mean_text,
+            "standard deviation": std_text,
+            "skewness": skew_text,
+            "kurtosis": kurtosis_text,
+            "history": command_history,
+        }
+        plt.savefig(
+            outfile,
+            bbox_inches="tight",
+            facecolor="white",
+            dpi=200,
+            metadata=metadata,
+        )
+    else:
+        plt.show()
+
+
+def _parse_command_line():
+    """Parse the command line for input agruments"""
+
+    parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    parser.add_argument("fcst_file", type=str, help="Forecast file")
+    parser.add_argument("obs_file", type=str, help="Observations file")
+    parser.add_argument("var", type=str, help="Variable name")
+
+    parser.add_argument("--outfile", type=str, default=None, help="Output file name")
+    parser.add_argument(
+        "--ensemble_dim",
+        type=str,
+        default="ensemble",
+        help="Name of ensemble member dimension",
+    )
+    parser.add_argument(
+        "--init_dim",
+        type=str,
+        default="init_date",
+        help="Name of initial date dimension",
+    )
+    parser.add_argument(
+        "--lead_dim",
+        type=str,
+        default="lead_time",
+        help="Name of lead time dimension",
+    )
+    parser.add_argument(
+        "--min_lead",
+        type=int,
+        default=None,
+        help="Minimum lead time",
+    )
+    args = parser.parse_args()
+
+    return args
+
+
+def _main():
+    """Run the command line program."""
+
+    args = _parse_command_line()
+
+    create_plot(
+        args.fcst_file,
+        args.obs_file,
+        args.var,
+        outfile=args.outfile,
+        min_lead=args.min_lead,
+        ensemble_dim=args.ensemble_dim,
+        init_dim=args.init_dim,
+        lead_dim=args.lead_dim,
+    )
+
+
+if __name__ == "__main__":
+    _main()
diff --git a/unseen/stability.py b/unseen/stability.py
@@ -139,7 +139,7 @@ def plot_return(data, method, outfile=None):
 
 
 def plot_return_by_lead(
-    ax, sample_da, metric, method, uncertainty=False, lead_dim="lead_time"
+    ax, sample_da, metric, method, uncertainty=False, ymax=None, lead_dim="lead_time"
 ):
     """Plot return period curves for each lead time.
 
@@ -155,6 +155,8 @@ def plot_return_by_lead(
         Method for producing return period curve
     uncertainty: bool, default False
         Plot 95% confidence interval
+    ymax : float, optional
+        ymax for return curve plot
     lead_dim: str, default 'lead_time'
         Name of the lead time dimension in sample_da
     """
@@ -194,10 +196,12 @@ def plot_return_by_lead(
     ax.set_xlabel("return period (years)")
     ax.set_ylabel(sample_da.attrs["units"])
     ax.legend()
-    ax.set_ylim((50, None))
+    ax.set_ylim((50, ymax))
 
 
-def plot_return_by_time(ax, sample_da, metric, start_years, method, uncertainty=False):
+def plot_return_by_time(
+    ax, sample_da, metric, start_years, method, uncertainty=False, ymax=None
+):
     """Plot return period curves for each time slice (e.g. decade).
 
     Parameters
@@ -214,6 +218,8 @@ def plot_return_by_time(ax, sample_da, metric, start_years, method, uncertainty=
         Method for producing return period curve
     uncertainty: bool, default False
         Plot 95% confidence interval
+    ymax : float, optional
+        ymax for return curve plot
     """
 
     step = start_years[1] - start_years[0] - 1
@@ -253,7 +259,7 @@ def plot_return_by_time(ax, sample_da, metric, start_years, method, uncertainty=
     ax.set_xscale("log")
     ax.set_xlabel("return period (years)")
     ax.set_ylabel(sample_da.attrs["units"])
-    ax.set_ylim((50, None))
+    ax.set_ylim((50, ymax))
     ax.legend()
 
 
@@ -263,8 +269,8 @@ def create_plot(
     metric,
     start_years,
     outfile=None,
-    min_lead=None,
     uncertainty=False,
+    ymax=None,
     return_method="empirical",
     ensemble_dim="ensemble",
     init_dim="init_date",
@@ -282,10 +288,10 @@ def create_plot(
         Metric name (for plot title)
     outfile : str, default None
         Path for output image file
-    min_lead : int, optional
-        Minimum lead time
     uncertainty: bool, default False
         Plot the 95% confidence interval
+    ymax : float, optional
+        ymax for return curve plots
     return_method : {'empirical', 'gev'}, default empirial
         Method for fitting the return period curve
     ensemble_dim : str, default ensemble
@@ -298,8 +304,6 @@ def create_plot(
 
     ds_fcst = fileio.open_dataset(fcst_file)
     da_fcst = ds_fcst[var]
-    if min_lead is not None:
-        da_fcst = da_fcst.where(ds_fcst[lead_dim] >= min_lead)
     dims = [ensemble_dim, init_dim, lead_dim]
     da_fcst_stacked = da_fcst.dropna(lead_dim).stack({"sample": dims})
 
@@ -316,6 +320,7 @@ def create_plot(
         metric,
         return_method,
         uncertainty=uncertainty,
+        ymax=ymax,
         lead_dim=lead_dim,
     )
     plot_dist_by_time(ax3, da_fcst_stacked, metric, start_years)
@@ -326,6 +331,7 @@ def create_plot(
         start_years,
         return_method,
         uncertainty=uncertainty,
+        ymax=ymax,
     )
 
     if outfile:
@@ -360,6 +366,12 @@ def _parse_command_line():
         action="store_true",
         help="Plot the 95 percent confidence interval [default: False]",
     )
+    parser.add_argument(
+        "--ymax",
+        type=float,
+        default=None,
+        help="ymax for return curve plots",
+    )
     parser.add_argument(
         "--return_method",
         type=str,
@@ -385,12 +397,6 @@ def _parse_command_line():
         default="lead_time",
         help="Name of lead time dimension",
     )
-    parser.add_argument(
-        "--min_lead",
-        type=int,
-        default=None,
-        help="Minimum lead time",
-    )
     args = parser.parse_args()
 
     return args
@@ -407,9 +413,9 @@ def _main():
         args.metric,
         args.start_years,
         outfile=args.outfile,
-        min_lead=args.min_lead,
         return_method=args.return_method,
         uncertainty=args.uncertainty,
+        ymax=args.ymax,
         ensemble_dim=args.ensemble_dim,
         init_dim=args.init_dim,
         lead_dim=args.lead_dim,