Skip to content

Commit

Permalink
Merge pull request #43 from DamienIrving/master
Browse files Browse the repository at this point in the history
Add moments test
  • Loading branch information
DamienIrving authored Aug 7, 2023
2 parents 621c91f + 503a9f4 commit f210bde
Show file tree
Hide file tree
Showing 3 changed files with 240 additions and 16 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"similarity = unseen.similarity:_main",
"bias_correction = unseen.bias_correction:_main",
"stability = unseen.stability:_main",
"moments = unseen.moments:_main",
]
},
)
217 changes: 217 additions & 0 deletions unseen/moments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
"""Functions and command line program for moments testing."""

import argparse
import logging

import numpy as np
import matplotlib.pyplot as plt
import scipy

from . import fileio


logging.basicConfig(level=logging.INFO)


def calc_ci(data):
"""Calculate the 95% confidence interval"""

lower_ci = np.percentile(np.array(data), 2.5, axis=0)
upper_ci = np.percentile(np.array(data), 97.5, axis=0)

return lower_ci, upper_ci


def create_plot(
fcst_file,
obs_file,
var,
outfile=None,
min_lead=None,
ensemble_dim="ensemble",
init_dim="init_date",
lead_dim="lead_time",
):
"""Create a stability assessment plot.
Parameters
----------
fcst_file : str
Forecast file containing metric of interest
obs_file : str
Observations file containing metric of interest
var : str
Variable name (in fcst_file)
outfile : str, default None
Path for output image file
min_lead : int, optional
Minimum lead time
ensemble_dim : str, default ensemble
Name of ensemble member dimension
init_dim : str, default init_date
Name of initial date dimension
lead_dim : str, default lead_time
Name of lead time dimension
"""

ds_obs = fileio.open_dataset(obs_file)
da_obs = ds_obs[var].dropna("time")
sample_size = len(da_obs)
mean_obs = float(np.mean(da_obs))
std_obs = float(np.std(da_obs))
skew_obs = float(scipy.stats.skew(da_obs))
kurtosis_obs = float(scipy.stats.kurtosis(da_obs))

ds_fcst = fileio.open_dataset(fcst_file)
da_fcst = ds_fcst[var]
if min_lead is not None:
da_fcst = da_fcst.where(ds_fcst[lead_dim] >= min_lead)
dims = [ensemble_dim, init_dim, lead_dim]
da_fcst_stacked = da_fcst.dropna(lead_dim).stack({"sample": dims})

mean_values = []
std_values = []
skew_values = []
kurtosis_values = []
for i in range(1000):
random_sample = np.random.choice(da_fcst_stacked, sample_size)
mean = float(np.mean(random_sample))
std = float(np.std(random_sample))
skew = float(scipy.stats.skew(random_sample))
kurtosis = float(scipy.stats.kurtosis(random_sample))
mean_values.append(mean)
std_values.append(std)
skew_values.append(skew)
kurtosis_values.append(kurtosis)

mean_lower_ci, mean_upper_ci = calc_ci(mean_values)
std_lower_ci, std_upper_ci = calc_ci(std_values)
skew_lower_ci, skew_upper_ci = calc_ci(skew_values)
kurtosis_lower_ci, kurtosis_upper_ci = calc_ci(kurtosis_values)

fig = plt.figure(figsize=[15, 12])
ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(222)
ax3 = fig.add_subplot(223)
ax4 = fig.add_subplot(224)

ax1.hist(mean_values, rwidth=0.8, color="0.5")
ax1.set_title("(a) mean")
ax1.axvline(mean_lower_ci, color="0.2", linestyle="--")
ax1.axvline(mean_upper_ci, color="0.2", linestyle="--")
ax1.axvline(mean_obs)
ax1.set_ylabel("count")

ax2.hist(std_values, rwidth=0.8, color="0.5")
ax2.set_title("(b) standard deviation")
ax2.axvline(std_lower_ci, color="0.2", linestyle="--")
ax2.axvline(std_upper_ci, color="0.2", linestyle="--")
ax2.axvline(std_obs)
ax2.set_ylabel("count")

ax3.hist(skew_values, rwidth=0.8, color="0.5")
ax3.set_title("(c) skewness")
ax3.set_ylabel("count")
ax3.axvline(skew_lower_ci, color="0.2", linestyle="--")
ax3.axvline(skew_upper_ci, color="0.2", linestyle="--")
ax3.axvline(skew_obs)

ax4.hist(kurtosis_values, rwidth=0.8, color="0.5")
ax4.set_title("(d) kurtosis")
ax4.set_ylabel("count")
ax4.axvline(kurtosis_lower_ci, color="0.2", linestyle="--")
ax4.axvline(kurtosis_upper_ci, color="0.2", linestyle="--")
ax4.axvline(kurtosis_obs)

mean_text = f"Obs = {mean_obs}, Model 95% CI ={mean_lower_ci} to {mean_upper_ci}"
std_text = f"Obs = {std_obs}, Model 95% CI ={std_lower_ci} to {std_upper_ci}"
skew_text = f"Obs = {skew_obs}, Model 95% CI ={skew_lower_ci} to {skew_upper_ci}"
kurtosis_text = f"Obs = {kurtosis_obs}, Model 95% CI ={kurtosis_lower_ci} to {kurtosis_upper_ci}"
logging.info(f"Mean: {mean_text}")
logging.info(f"Standard deviation: {std_text}")
logging.info(f"Skewness: {skew_text}")
logging.info(f"Kurtosis: {kurtosis_text}")

if outfile:
infile_logs = {
fcst_file: ds_fcst.attrs["history"],
obs_file: ds_obs.attrs["history"],
}
command_history = fileio.get_new_log(infile_logs=infile_logs)
metadata = {
"mean": mean_text,
"standard deviation": std_text,
"skewness": skew_text,
"kurtosis": kurtosis_text,
"history": command_history,
}
plt.savefig(
outfile,
bbox_inches="tight",
facecolor="white",
dpi=200,
metadata=metadata,
)
else:
plt.show()


def _parse_command_line():
"""Parse the command line for input agruments"""

parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument("fcst_file", type=str, help="Forecast file")
parser.add_argument("obs_file", type=str, help="Observations file")
parser.add_argument("var", type=str, help="Variable name")

parser.add_argument("--outfile", type=str, default=None, help="Output file name")
parser.add_argument(
"--ensemble_dim",
type=str,
default="ensemble",
help="Name of ensemble member dimension",
)
parser.add_argument(
"--init_dim",
type=str,
default="init_date",
help="Name of initial date dimension",
)
parser.add_argument(
"--lead_dim",
type=str,
default="lead_time",
help="Name of lead time dimension",
)
parser.add_argument(
"--min_lead",
type=int,
default=None,
help="Minimum lead time",
)
args = parser.parse_args()

return args


def _main():
"""Run the command line program."""

args = _parse_command_line()

create_plot(
args.fcst_file,
args.obs_file,
args.var,
outfile=args.outfile,
min_lead=args.min_lead,
ensemble_dim=args.ensemble_dim,
init_dim=args.init_dim,
lead_dim=args.lead_dim,
)


if __name__ == "__main__":
_main()
38 changes: 22 additions & 16 deletions unseen/stability.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def plot_return(data, method, outfile=None):


def plot_return_by_lead(
ax, sample_da, metric, method, uncertainty=False, lead_dim="lead_time"
ax, sample_da, metric, method, uncertainty=False, ymax=None, lead_dim="lead_time"
):
"""Plot return period curves for each lead time.
Expand All @@ -155,6 +155,8 @@ def plot_return_by_lead(
Method for producing return period curve
uncertainty: bool, default False
Plot 95% confidence interval
ymax : float, optional
ymax for return curve plot
lead_dim: str, default 'lead_time'
Name of the lead time dimension in sample_da
"""
Expand Down Expand Up @@ -194,10 +196,12 @@ def plot_return_by_lead(
ax.set_xlabel("return period (years)")
ax.set_ylabel(sample_da.attrs["units"])
ax.legend()
ax.set_ylim((50, None))
ax.set_ylim((50, ymax))


def plot_return_by_time(ax, sample_da, metric, start_years, method, uncertainty=False):
def plot_return_by_time(
ax, sample_da, metric, start_years, method, uncertainty=False, ymax=None
):
"""Plot return period curves for each time slice (e.g. decade).
Parameters
Expand All @@ -214,6 +218,8 @@ def plot_return_by_time(ax, sample_da, metric, start_years, method, uncertainty=
Method for producing return period curve
uncertainty: bool, default False
Plot 95% confidence interval
ymax : float, optional
ymax for return curve plot
"""

step = start_years[1] - start_years[0] - 1
Expand Down Expand Up @@ -253,7 +259,7 @@ def plot_return_by_time(ax, sample_da, metric, start_years, method, uncertainty=
ax.set_xscale("log")
ax.set_xlabel("return period (years)")
ax.set_ylabel(sample_da.attrs["units"])
ax.set_ylim((50, None))
ax.set_ylim((50, ymax))
ax.legend()


Expand All @@ -263,8 +269,8 @@ def create_plot(
metric,
start_years,
outfile=None,
min_lead=None,
uncertainty=False,
ymax=None,
return_method="empirical",
ensemble_dim="ensemble",
init_dim="init_date",
Expand All @@ -282,10 +288,10 @@ def create_plot(
Metric name (for plot title)
outfile : str, default None
Path for output image file
min_lead : int, optional
Minimum lead time
uncertainty: bool, default False
Plot the 95% confidence interval
ymax : float, optional
ymax for return curve plots
return_method : {'empirical', 'gev'}, default empirial
Method for fitting the return period curve
ensemble_dim : str, default ensemble
Expand All @@ -298,8 +304,6 @@ def create_plot(

ds_fcst = fileio.open_dataset(fcst_file)
da_fcst = ds_fcst[var]
if min_lead is not None:
da_fcst = da_fcst.where(ds_fcst[lead_dim] >= min_lead)
dims = [ensemble_dim, init_dim, lead_dim]
da_fcst_stacked = da_fcst.dropna(lead_dim).stack({"sample": dims})

Expand All @@ -316,6 +320,7 @@ def create_plot(
metric,
return_method,
uncertainty=uncertainty,
ymax=ymax,
lead_dim=lead_dim,
)
plot_dist_by_time(ax3, da_fcst_stacked, metric, start_years)
Expand All @@ -326,6 +331,7 @@ def create_plot(
start_years,
return_method,
uncertainty=uncertainty,
ymax=ymax,
)

if outfile:
Expand Down Expand Up @@ -360,6 +366,12 @@ def _parse_command_line():
action="store_true",
help="Plot the 95 percent confidence interval [default: False]",
)
parser.add_argument(
"--ymax",
type=float,
default=None,
help="ymax for return curve plots",
)
parser.add_argument(
"--return_method",
type=str,
Expand All @@ -385,12 +397,6 @@ def _parse_command_line():
default="lead_time",
help="Name of lead time dimension",
)
parser.add_argument(
"--min_lead",
type=int,
default=None,
help="Minimum lead time",
)
args = parser.parse_args()

return args
Expand All @@ -407,9 +413,9 @@ def _main():
args.metric,
args.start_years,
outfile=args.outfile,
min_lead=args.min_lead,
return_method=args.return_method,
uncertainty=args.uncertainty,
ymax=args.ymax,
ensemble_dim=args.ensemble_dim,
init_dim=args.init_dim,
lead_dim=args.lead_dim,
Expand Down

0 comments on commit f210bde

Please sign in to comment.