diff --git a/.gitignore b/.gitignore index 5e5e572f0..4fb9315da 100644 --- a/.gitignore +++ b/.gitignore @@ -94,4 +94,5 @@ pip-log.txt pip-delete-this-directory.txt # Jupyter Notebook -.ipynb_checkpoints \ No newline at end of file +.ipynb_checkpoints +.DS_Store diff --git a/data/fieldlist_NCAR.jsonc b/data/fieldlist_NCAR.jsonc index b69097b3c..a46f73a8b 100644 --- a/data/fieldlist_NCAR.jsonc +++ b/data/fieldlist_NCAR.jsonc @@ -90,7 +90,6 @@ "ndim": 3 }, "TREFHT" : { - // correct name? CMIP6 equivalent should be tas, temp at 2m ref height "standard_name": "air_temperature", "units": "K", "ndim": 3 @@ -157,6 +156,22 @@ "units": "W m-2", "ndim": 3 }, + // Variables for flow_dep_diag module: + "Z250": { + "standard_name": "geopotential_height_250_mbar", + "units": "m", + "ndim": 3 + }, + "T250": { + "standard_name": "temperature_250_mbar", + "units": "K", + "ndim": 3 + }, + "T500": { + "standard_name": "temperature_500_mbar", + "units": "K", + "ndim": 3 + }, // Variables for AMOC_3D_Structure module: // "uo": { // "standard_name": "sea_water_x_velocity", diff --git a/diagnostics/convective_transition_diag/convecTransCriticalCollapse_usp.py b/diagnostics/convective_transition_diag/convecTransCriticalCollapse_usp.py index ba0f85a15..c1ba02e52 100644 --- a/diagnostics/convective_transition_diag/convecTransCriticalCollapse_usp.py +++ b/diagnostics/convective_transition_diag/convecTransCriticalCollapse_usp.py @@ -1,12 +1,11 @@ -# This file is part of the convective_transition_diag module of the MDTF code package (see LICENSE.txt) - +# This file is part of the convective_transition_diag module of the MDTF code package (see mdtf/MDTF_v2.0/LICENSE.txt) # ====================================================================== # convecTransCriticalCollapse_usp.py # # Called by convecTransCriticalCollapse.py # Provides User-Specified Parameters for Fitting and Plotting # -# This file is part of the Convective Transition Diagnostic Package +# This file is part of the Convective Transition Diagnostic Package # and the MDTF code package. See LICENSE.txt for the license. # import json @@ -32,7 +31,7 @@ TAVE_VAR=os.environ["tave_var"] QSAT_INT_VAR=os.environ["qsat_int_var"] -# Use 1:tave, or 2:qsat_int as Bulk Tropospheric Temperature Measure +# Use 1:tave, or 2:qsat_int as Bulk Tropospheric Temperature Measure BULK_TROPOSPHERIC_TEMPERATURE_MEASURE=int(os.environ["BULK_TROPOSPHERIC_TEMPERATURE_MEASURE"]) # Directory & Filename for saving binned results (netCDF4) @@ -53,7 +52,7 @@ # List binned data file (with filename corresponding to casename) bin_output_list=sorted(glob.glob(BIN_OUTPUT_DIR+"/"+BIN_OUTPUT_FILENAME+".nc")) -# Directory & Filename for saving figures +# Directory & Filename for saving figures # convecTransCriticalCollapse.py generates 2 sets figures for MODEL FIG_OUTPUT_DIR=os.environ["WK_DIR"]+"/model/PS" # Figure filename for Convective Transition Statistics (CTS) @@ -87,7 +86,7 @@ ##### Start: FITTING-REQUIRED PARAMETERS ##### # Use PRECIP_REF (units: mm/hr) to find a 0-th order approximation of Critical CWV w_c # and PRECIP_FIT_MIN//...nc -# Here and frequency are requested in the "varlist" part of +# Here and frequency are requested in the "varlist" part of # settings.json. # The following command sets input_path to the value of the shell environment -# variable called TAS_FILE. This variable is set by the framework to let the +# variable called TAS_FILE. This variable is set by the framework to let the # script know where the locally downloaded copy of the data for this variable # (which we called "tas") is. input_path = os.environ["TAS_FILE"] # command to load the netcdf file model_dataset = xr.open_dataset(input_path) - - ### 2) Doing computations: ##################################################### # # Diagnostics in the framework are intended to work with native output from a # variety of models. For this reason, variable names should not be hard-coded -# but instead set from environment variables. +# but instead set from environment variables. # tas_var_name = os.environ["tas_var"] # For safety, don't even assume that the time dimension of the input file is @@ -117,10 +115,10 @@ ### 3) Saving output data: ##################################################### # -# Diagnostics should write output data to disk to a) make relevant results +# Diagnostics should write output data to disk to a) make relevant results # available to the user for further use or b) to pass large amounts of data # between stages of a calculation run as different sub-scripts. Data can be in -# any format (as long as it's documented) and should be written to the +# any format (as long as it's documented) and should be written to the # directory /model/netCDF (created by the framework). # out_path = "{WK_DIR}/model/netCDF/temp_means.nc".format(**os.environ) @@ -131,9 +129,9 @@ ### 4) Saving output plots: #################################################### # -# Plots should be saved in EPS or PS format at //PS -# (created by the framework). Plots can be given any filename, but should have -# the extension ".eps" or ".ps". To make the webpage output, the framework will +# Plots should be saved in EPS or PS format at //PS +# (created by the framework). Plots can be given any filename, but should have +# the extension ".eps" or ".ps". To make the webpage output, the framework will # convert these to bitmaps with the same name but extension ".png". # Define a python function to make the plot, since we'll be doing it twice and @@ -160,14 +158,14 @@ def plot_and_save_figure(model_or_obs, title_string, dataset): ### 5) Loading obs data files & plotting obs figures: ########################## # -# If your diagnostic uses any model-independent supporting data (eg. reference +# If your diagnostic uses any model-independent supporting data (eg. reference # or observational data) larger than a few kB of text, it should be provided via # the observational data distribution instead of being included with the source -# code. This data can be in any format: the framework doesn't process it. The +# code. This data can be in any format: the framework doesn't process it. The # environment variable OBS_DATA will be set to a path where the framework has # copied a directory containing your supplied data. # -# The following command replaces the substring "{OBS_DATA}" with the value of +# The following command replaces the substring "{OBS_DATA}" with the value of # the OBS_DATA environment variable. input_path = "{OBS_DATA}/example_tas_means.nc".format(**os.environ) diff --git a/diagnostics/flow_dep_diag/ClimAnom_func.py b/diagnostics/flow_dep_diag/ClimAnom_func.py new file mode 100644 index 000000000..82ffbb24e --- /dev/null +++ b/diagnostics/flow_dep_diag/ClimAnom_func.py @@ -0,0 +1,23 @@ +import os +import xarray as xr +import numpy as np +from datetime import datetime +np.seterr(divide='ignore', invalid='ignore') + +### Function to calculate climatology anomalies for each variable in the diagnostic. + +time_var = os.environ["time_coord"] #set environment variable for time equal to var for function +#function to calculate climatology anomalies for eah variable +def climAnom(var_path, var_name): + + ds = xr.open_dataset(var_path, use_cftime = True) + ds[time_var] = ds.indexes[time_var].to_datetimeindex() #convert time to datetime so we can use groupby functionality + # Drop 1 dimensional coordinates + ds = ds.squeeze() + da = ds[var_name] + da_ensmean = da.copy() + da_day_clim = da_ensmean.groupby('{time_coord}.dayofyear'.format(**os.environ)).mean(time_var) + da_day_anom = da.groupby('{time_coord}.dayofyear'.format(**os.environ)) - da_day_clim + da_day_anom = da_day_anom.drop('dayofyear') + da_day_anom.to_netcdf("{WK_DIR}/model/netCDF/".format(**os.environ) + var_name + "_climAnom.nc") + return da_day_anom diff --git a/diagnostics/flow_dep_diag/PyWR.py b/diagnostics/flow_dep_diag/PyWR.py new file mode 100644 index 000000000..ab196de26 --- /dev/null +++ b/diagnostics/flow_dep_diag/PyWR.py @@ -0,0 +1,118 @@ +#PyWR.py (version1.1) -- 27 Oct 2020 +#Python functions for weather typing (using K-means) and flow-dependent model diagnostics +#Authors: ÁG Muñoz (agmunoz@iri.columbia.edu), James Doss-Gollin, AW Robertson (awr@iri.columbia.edu) +#The International Research Institute for Climate and Society (IRI) +#The Earth Institute at Columbia University. + +#Notes: +#Log: see version.log in GitHub + +import numpy as np +from numba import jit +import matplotlib.pyplot as plt +from sklearn.cluster import KMeans +from sklearn.decomposition import PCA +from typing import Tuple + +def get_number_eof(X: np.ndarray, var_to_explain: float, plot=False) -> int: + """Get the number of EOFs of X that explain a given variance proportion + """ + assert var_to_explain > 0, 'var_to_explain must be between 0 and 1' + assert var_to_explain < 1, 'var_to_explain must be between 0 and 1' + pca = PCA().fit(X) + var_explained = pca.explained_variance_ratio_ + cum_var = var_explained.cumsum() + n_eof = np.where(cum_var > var_to_explain)[0].min() + 1 + + if plot: + n_padding = 4 + plt.figure(figsize=(12, 7)) + plt.plot(np.arange(1, n_eof + 1 + n_padding), cum_var[0:(n_padding + n_eof)]) + plt.scatter(np.arange(1, n_eof + 1 + n_padding), cum_var[0:(n_padding + n_eof)]) + plt.xlabel('Number of EOFs') + plt.ylabel('Cumulative Proportion of Variance Explained') + plt.grid() + plt.title('{} EOF Retained'.format(n_eof)) + plt.show() + return n_eof + +@jit +def _vector_ci(P: np.ndarray, Q: np.ndarray) -> float: + """Implement the Michaelangeli (1995) Classifiability Index + + The variable naming here is not pythonic but follows the notation in the 1995 paper + which makes it easier to follow what is going on. You shouldn't need to call + this function directly but it is called in cluster_xr_eof. + + PARAMETERS + ---------- + P: a cluster centroid + Q: another cluster centroid + """ + k = P.shape[0] + Aij = np.ones([k, k]) + for i in range(k): + for j in range(k): + Aij[i, j] = np.corrcoef(P[i, :], Q[j, :])[0, 1] + Aprime = Aij.max(axis=0) + ci = Aprime.min() + return ci + +def calc_classifiability(P, Q): + """Implement the Michaelangeli (1995) Classifiability Index + The variable naming here is not pythonic but follows the notation in the 1995 paper + which makes it easier to follow what is going on. You shouldn't need to call + this function directly but it is called in cluster_xr_eof. + Args: + P: a cluster centroid + Q: another cluster centroid + """ + k = P.shape[0] + Aij = np.ones([k, k]) + for i in range(k): + for j in range(k): + Aij[i, j], _ = correl(P[i, :], Q[j, :]) + Aprime = Aij.max(axis=0) + ci = Aprime.min() + return ci + +@jit +def get_classifiability_index(centroids: np.ndarray) -> Tuple[float, int]: + """Get the classifiability of a set of centroids + + This function will compute the classifiability index for a set of centroids and + indicate which is the best one + + PARAMETERS + ---------- + centroids: input array of centroids, indexed [simulation, dimension] + """ + nsim = centroids.shape[0] + c_pq = np.ones([nsim, nsim]) + for i in range(0, nsim): + for j in range(0, nsim): + if i == j: + c_pq[i, j] = np.nan + else: + c_pq[i, j] = _vector_ci(P=centroids[i, :, :], Q=centroids[j, :, :]) + classifiability = np.nanmean(c_pq) + best_part = np.where(c_pq == np.nanmax(c_pq))[0][0] + return classifiability, best_part + +@jit +def loop_kmeans(X: np.ndarray, n_cluster: int, n_sim: int) -> Tuple[np.ndarray, np.ndarray]: + """Compute weather types + + PARAMETERS + ---------- + X: an array (should be in reduced dimension space already) indexed [time, dimension] + n_cluster: how many clusters to compute + n_sim: how many times to initialize the clusters (note: computation increases order (n_sim**2)) + """ + centroids = np.zeros(shape=(n_sim, n_cluster, X.shape[1])) + w_types = np.zeros(shape=(n_sim, X.shape[0])) + for i in np.arange(n_sim): + km = KMeans(n_clusters=n_cluster).fit(X) + centroids[i, :, :] = km.cluster_centers_ + w_types[i, :] = km.labels_ + return centroids, w_types diff --git a/diagnostics/flow_dep_diag/WeatherTypes.py b/diagnostics/flow_dep_diag/WeatherTypes.py new file mode 100644 index 000000000..a07ea1603 --- /dev/null +++ b/diagnostics/flow_dep_diag/WeatherTypes.py @@ -0,0 +1,145 @@ +#Python packages/ modules imported for the diagnostic +import os +import xarray as xr +import numpy as np +from sklearn.cluster import KMeans +from sklearn.decomposition import PCA +import cartopy.crs as ccrs +from cartopy import feature +import matplotlib.pyplot as plt +# The following are function scripts which the diagnostic calls to run specific calculations +# refer to PyWR.py and ClimAnom_func.py for details on imported functions +from PyWR import * +from ClimAnom_func import * + +#Setting variables equal to environment variables set by the diagnostic +time_coord = os.environ["time_coord"] +lat_coord = os.environ["lat_coord"] +lon_coord = os.environ["lon_coord"] +Z250_var = os.environ["Z250_var"] +PRECT_var = os.environ["PRECT_var"] +tas_var = os.environ["T250_var"] + +def weatherTypes(reanalysis_path, reanalysis_var, rainfall_path, rainfall_var, t2m_path, t2m_var): + reanalysis = climAnom(var_path=reanalysis_path, var_name=reanalysis_var).stack(T=[time_coord], grid=[lat_coord, lon_coord]) + rainfall = climAnom(var_path=rainfall_path, var_name=rainfall_var).stack(T=[time_coord], grid=[lat_coord, lon_coord]) + t2m = climAnom(var_path=t2m_path, var_name=t2m_var).stack(T=[time_coord], grid=[lat_coord, lon_coord]) + reanalysis = reanalysis.to_dataset() + rainfall = rainfall.to_dataset() + t2m = t2m.to_dataset() + print("Climatology anomaly calculations completed.") +#dimension reduction; projection of data onto leading EOFs for principle component time series +#PCA model saved for later use as reanalysis_pc + n_eof = get_number_eof(X=reanalysis[Z250_var].values, var_to_explain=0.9, plot=True) + pca_model = PCA(n_components=n_eof).fit(reanalysis[Z250_var].values) + reanalysis_pc = pca_model.transform(reanalysis[Z250_var].values) +#perform clustering using manually specified number of clusters + n_cluster = int(os.environ["NCLUSTER"]) + n_sim = int(os.environ["NSIM"]) + centroids, wtypes = loop_kmeans(X=reanalysis_pc, n_cluster=n_cluster, n_sim=n_sim) + class_idx, best_part = get_classifiability_index(centroids) + print('The classifiability index is {}'.format(class_idx)) +#define KMeans object + best_fit = KMeans(n_clusters=n_cluster, init=centroids[best_part, :, :], n_init=1, max_iter=1).fit(reanalysis_pc) +#start reanalysis + reanalysis_composite = reanalysis.copy() + model_clust = best_fit.fit_predict(reanalysis_pc) # get centroids + weather_types = xr.DataArray( + model_clust, + coords = {'T': reanalysis_composite['T']}, + dims='T' + ) + reanalysis_composite['WT'] = weather_types + reanalysis_composite = reanalysis_composite.groupby('WT').mean(dim='T').unstack('grid')[Z250_var] + reanalysis_composite['M'] = 0 + print("Reanalysis completed.") + wt_anomalies = [] # initialize empty list + wt_anomalies.append(reanalysis_composite) + wt_anomalies = xr.concat(wt_anomalies, dim='M') # join together + wt_anomalies['WT'] = wt_anomalies['WT'] + 1 # start from 1 +#prepare a figure with rainfall and temperature composites + map_proj = ccrs.PlateCarree() #ccrs.Orthographic(-110, 10) + data_proj = ccrs.PlateCarree() + wt_unique = np.unique(wt_anomalies['WT']) + figsize = (14, 8) +#WT proportions + wt=weather_types.to_dataframe(name='WT') + wt=wt+1 + wt_counts = wt.groupby('WT').size().div(wt['WT'].size) + print("Beginning plotting..") + #plotting + plot_path = "{WK_DIR}/model/PS/model_plot.png".format(**os.environ) + # Set up the Figure + plt.rcParams.update({'font.size': 12}) + fig, axes = plt.subplots( + nrows=3, ncols=len(wt_unique), subplot_kw={'projection': map_proj}, + figsize=figsize, sharex=True, sharey=True + ) + for i,w in enumerate(wt_unique): + def selector(ds): + times = wt.loc[wt['WT'] == w].index.get_level_values(time_coord) + ds = ds.sel(T = np.in1d(ds.unstack('T')[time_coord], times)) + ds = ds.mean(dim = 'T') + return(ds) + # Top row: geopotential height anomalies + ax = axes[0, i] + ax.set_title('WT {}: {:.1%} of days'.format(w, wt_counts.values[i])) + C0 = selector(reanalysis[Z250_var]).unstack('grid').plot.contourf( + transform = data_proj, + ax=ax, + cmap='PuOr', + extend="both", + levels=np.linspace(-2e2, 2e2, 21), + add_colorbar=False, + add_labels=False + ) + ax.coastlines() + ax.add_feature(feature.BORDERS) + + # Middle row: rainfall anomalies + ax = axes[1, i] + C1 = selector(rainfall[PRECT_var]).unstack('grid').plot.contourf( + transform = data_proj, + ax=ax, + cmap = 'BrBG', + extend="both", + levels=np.linspace(-2, 2, 13), + add_colorbar=False, + add_labels=False + ) + ax.coastlines() + ax = axes[2, i] + C2 = selector(t2m[tas_var]).unstack('grid').plot.contourf( + transform = data_proj, + ax=ax, + cmap = 'RdBu_r', + extend="both", + levels=np.linspace(-2, 2, 13), + add_colorbar=False, + add_labels=False + ) + ax.coastlines() + ax.add_feature(feature.BORDERS) + #ax.set_extent([-95, -70, -9, 5]) + ax.tick_params(colors='b') + # # Add Colorbar + plt.tight_layout() + fig.subplots_adjust(right=0.94) + cax0 = fig.add_axes([0.97, 0.65, 0.0075, 0.3]) + cax1 = fig.add_axes([0.97, 0.33, 0.0075, 0.3]) + cax2 = fig.add_axes([0.97, 0.01, 0.0075, 0.3]) + cbar0 = fig.colorbar(C0, cax = cax0) + cbar0.formatter.set_powerlimits((4, 4)) + cbar0.update_ticks() + cbar0.set_label('{Z250_var} anomaly [$m^2$/$s^2$]'.format(**os.environ), rotation=270) + cbar0.ax.get_yaxis().labelpad = 20 + cbar1 = fig.colorbar(C1, cax=cax1) + cbar1.set_label('{PRECT_var} anomaly [mm/d]'.format(**os.environ), rotation=270) + cbar1.ax.get_yaxis().labelpad = 20 + cbar2 = fig.colorbar(C2, cax=cax2) + cbar2.set_label('{T250_var} anomaly [$^o$C]'.format(**os.environ), rotation=270) + cbar2.ax.get_yaxis().labelpad = 20 + fig = fig.savefig(plot_path, bbox_inches='tight') + return fig + +weatherTypes(os.environ["Z250_FILE"],os.environ["Z250_var"],os.environ["PRECT_FILE"],os.environ["PRECT_var"],os.environ["T250_FILE"],os.environ["T250_var"]) diff --git a/diagnostics/flow_dep_diag/cropping.py b/diagnostics/flow_dep_diag/cropping.py new file mode 100644 index 000000000..363f581d5 --- /dev/null +++ b/diagnostics/flow_dep_diag/cropping.py @@ -0,0 +1,24 @@ +import xarray as xr + +file_path = None +output_path = None + +#United States BB provided; replace with region of interest +#lon in range -180,180 +nla=71.36 +sla=18.91 +wlo=-171.79 +elo=-66.96 +ds = xr.open_dataset(file_path, decode_times=False) + +#the diagnostic assumes your grid has a longitudinal range -180,180 +#Shift your grid from 0,360 using the following lines; this will retain the correct +#coordinates if values are between -180,180 +ds.coords['lon'] = (ds.coords['lon'] + 180) % 360 - 180 #shift the values +ds = ds.sortby(ds.lon) #sort them +#cropping the dataset by the bounding box coordinates specified above +mask_lon = (ds.lon >= wlo) & (ds.lon <= elo) +mask_lat = (ds.lat >= sla) & (ds.lat <= nla) +cropped_ds = ds.where(mask_lon & mask_lat, drop=True) + +cropped_ds.to_netcdf(output_path) diff --git a/diagnostics/flow_dep_diag/doc/figure1.png b/diagnostics/flow_dep_diag/doc/figure1.png new file mode 100644 index 000000000..ab83145a2 Binary files /dev/null and b/diagnostics/flow_dep_diag/doc/figure1.png differ diff --git a/diagnostics/flow_dep_diag/doc/figure2.png b/diagnostics/flow_dep_diag/doc/figure2.png new file mode 100644 index 000000000..57d62fc07 Binary files /dev/null and b/diagnostics/flow_dep_diag/doc/figure2.png differ diff --git a/diagnostics/flow_dep_diag/doc/figure3.png b/diagnostics/flow_dep_diag/doc/figure3.png new file mode 100644 index 000000000..eb549f188 Binary files /dev/null and b/diagnostics/flow_dep_diag/doc/figure3.png differ diff --git a/diagnostics/flow_dep_diag/doc/flow_dep_diag.rst b/diagnostics/flow_dep_diag/doc/flow_dep_diag.rst new file mode 100644 index 000000000..ecc172d57 --- /dev/null +++ b/diagnostics/flow_dep_diag/doc/flow_dep_diag.rst @@ -0,0 +1,132 @@ +Flow-Dependent Model Diagnostic Documentation +================================ + +Last update: 5/14/2021 + +The flow-dependent model diagnostics compares daily atmospheric circulation patterns, or weather types, characteristics in reanalyses and models to analyze misrepresented physical processes related to spatiotemporal systematic errors in those models. Relationships between these biases and climate teleconnections (e.g., SST patterns, ENSO, MJO, etc.) can be explored in different models. + +Version & Contact info +---------------------- + +.. '-' starts items in a bulleted list: + https://docutils.sourceforge.io/docs/user/rst/quickref.html#bullet-lists + +- Version/revision information: version 1 (5/14/2021) +- PI (Ángel G. Muñoz, IRI Columbia University, agmunoz@iri.columbia.edu) +- Developer/point of contact (Ángel G. Muñoz, agmunoz@iri.columbia.edu and Andrew W. Robertson, awr@iri.columbia.edu, IRI Columbia University) +- Other contributors (Drew Resnick, IRI Columbia University, drewr@iri.columbia, James Doss-Gollin) + +.. Underline with '^'s to make a third-level heading. + +Open source copyright agreement +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The MDTF framework is distributed under the LGPLv3 license (see LICENSE.txt). + +Functionality +------------- + +The currently package consists of following functionalities: + +(1) Calculation of climatologies and anomalies for the input fields (ClimAnom_func.py) + +(2) Calculation of weather types spatial patterns (WeatherTypes.py) + +(3) Calculation of weather types temporal characteristics (to be added soon) + +(4) Procrustes analysis (to be added soon) + +(**) cropping.py can be referenced if code is needed to either shift the grid of your data +or to crop your data to a specified region + +As a module of the MDTF code package, all scripts of this package can be found under +``mdtf/MDTF_$ver/diagnostics/flow_dep_diag`` + +.. and pre-digested observational data under mdtf/inputdata/obs_data/convective_transition_diag + + +Required programming language and libraries +------------------------------------------- + +Python3 packages: "netCDF4", "xarray", "numpy", "sklearn", "cartopy", "matplotlib", +"numba", "datetime", "typing" + +Required model output variables +------------------------------- + +(1) Geopotential height anomalies at 500 hPa (units: HPa, daily resolution) + +(2) Rainfall (units: mm/day, daily resolution) + +(3) Temperature (units: Celsius, daily resolution) + + +References +---------- + +.. _ref-Muñoz1: + +Muñoz, Á. G., Yang, X., Vecchi, G. A., Robertson, A. W., & Cooke, W. F. (2017): PA Weather-Type-Based +Cross-Time-Scale Diagnostic Framework for Coupled Circulation Models. *Journal of Climate*, **30** (22), +8951–8972, +`doi:10.1175/JCLI-D-17-0115.1 `__. + +.. _ref-Muñoz2: + +Muñoz, Á.G., Robertson, A.W., Vecchi, G.A., Zhang, M., Chourio, X., Yang, J., (2020): Going with the Flow? +Flow-dependent Multi-timescale Model Diagnostics. AGU Fall Meeting. Dec 2020. + +More about this diagnostic +-------------------------- + +Common approaches to diagnose systematic errors involve the computation of metrics aimed at providing +an overall summary of the performance of the model in reproducing the particular variables of interest +in the study, normally tied to specific spatial and temporal scales. + +However, the evaluation of model performance is not always tied to the understanding of the physical +processes that are correctly represented, distorted or even absent in the model world. As the physical +mechanisms are more often than not related to interactions taking place at multiple time and spatial scales, +cross-scale model diagnostic tools are not only desirable but required. Here, a recently proposed +circulation-based diagnostic framework is extended to consider systematic errors in both spatial and temporal +patterns at multiple timescales. + +The framework, which uses a weather-typing dynamical approach, quantifies biases in shape, location and tilt of +modeled circulation patterns, as well as biases associated with their temporal characteristics, such as frequency +of occurrence, duration, persistence and transitions. Relationships between these biases and climate +teleconnections (e.g., ENSO and MJO) are explored using different models. + +.. _figure1: + +.. figure:: figure1.png + :align: left + :width: 75 % + + Figure 1. Weather types (WT, or “flows”) in the MERRA reanalysis and in a suite of GFDL model experiments + (for details, see Muñoz et al 2017). Some biases in magnitude and spatial rotation in WT3 and WT5 are indicated. + +For example, :ref:`Figure 1 ` exhibits atmospheric circulation patterns for North Eastern North America, +as analyzed by :ref:`Muñoz (2017) `, in a reanalysis and in different model experiments produced using GFDL models +LOAR and FLOR. The POD permits for the calculation of the atmospheric circulation patterns :ref:`Figure 1 ` as well as +for the rainfall and temperature anomaly fields related to each “flow”, computed via a composite analysis. +It’s also possible to identify the typical sea-surface temperature patterns related to the occurrence of each +pattern :ref:`Figure 2 `. + +Beyond the analysis of spatial biases in the modeled atmospheric circulation patterns, the POD can help assess biases +in temporal characteristics. A variety of metrics have been suggested by Muñoz et al (2017), and are summarized +in :ref:`Figure 3 `. + +.. _figure2: + +.. figure:: figure2.png + :align: left + :width: 75 % + + Figure 2. Atmospheric circulation, rainfall and sea-surface temperature (SST) patterns associated to weather type 5 (WT5). + +.. _figure3: + +.. figure:: figure3.png + :align: left + :width: 75 % + + Figure 3. A brief list of suggested metrics to evaluate flow-dependent temporal characteristics in models. diff --git a/diagnostics/flow_dep_diag/flow_dep_diag.html b/diagnostics/flow_dep_diag/flow_dep_diag.html new file mode 100644 index 000000000..5f7c85a7e --- /dev/null +++ b/diagnostics/flow_dep_diag/flow_dep_diag.html @@ -0,0 +1,24 @@ +MDTF example diagnostic + + +

Flow-Dependent, Cross-Timescale Model Diagnostics

+

+The flow-dependent model diagnostics compares daily atmospheric circulation pattern, +or weather types, characteristics in reanalyses and models to analyze misrepresented +physical processes related to spatiotemporal systematic errors in those models. +Relationships between these biases and climate teleconnections +(e.g., SST patterns, ENSO, MJO, etc.) can be explored in different models. +

+

+The number of simulations run on the model is "{{NSIM}}."
The number of clusters is "{{NCLUSTER}}." +

+ + + +
Time averages, {{FIRSTYR}}-{{LASTYR}} +{{CASENAME}} +Model +
Full comparison plots + +plot +
diff --git a/diagnostics/flow_dep_diag/flow_dep_diag.py b/diagnostics/flow_dep_diag/flow_dep_diag.py new file mode 100644 index 000000000..2f800a0c5 --- /dev/null +++ b/diagnostics/flow_dep_diag/flow_dep_diag.py @@ -0,0 +1,92 @@ +# Flow-Dependent, Cross-Timescale Model Diagnostics POD +# +# ================================================================================ +# +# Last update: 4/27/2021 +# +# The flow-dependent model diagnostics compares daily atmospheric circulation patterns, +# or weather types, characteristics in reanalyses and models to analyze misrepresented +# physical processes related to spatiotemporal systematic errors in those models. +# Relationships between these biases and climate teleconnections +# (e.g., SST patterns, ENSO, MJO, etc.) can be explored in different models. +# +# Version and contact info +# +# - Version/revision information: version 1 (4/27/2021) +# - Developer/point of contact: Ángel G. Muñoz (agmunoz@iri.columbia.edu) and +# Andrew W. Robertson (awr@iri.columbia.edu) +# - Other contributors: Drew Resnick (drewr@iri.columbia.edu), James Doss-Gollin +# +# Open source copyright agreement +# +# The MDTF framework is distributed under the LGPLv3 license (see LICENSE.txt). +# +# Functionality +# +# The current package consists of following functionalities: +# (1) Calculation of climatologies and anomalies for the input fields (ClimAnom_func.py) +# (2) Calculation of weather types spatial patterns (WeatherTypes.py.py) +# (3) Calculation of weather types temporal characteristics (* to be added soon) +# (4) Procrustes analysis(* to be added soon) +# As a module of the MDTF code package, all scripts in this package can be +# found under mdtf/MDTF_$ver/var_code/flow_dep_diag + +# This diagnostic assumes: +# (1)the longitude is in range -180,180 for plotting purposes +# (2) The data has been cropped for a specific region +# Refer to cropping.py for code to crop your data / shift the grid +# +# Required programming language and libraries +# +# This diagnostic runs on the most recent version of python3. +# The required packages are as follows, and all should be the most updated version. +# Python Libraries used: "netCDF4", "xarray", "numpy", "sklearn", +# "cartopy", "matplotlib", "numba", "datetime", "typing" +# +# Required model output variables +# +# Geopotential height anomalies (units: hPa, daily resolution) +# Rainfall (units: mm/day, daily resolution) +# Temperature (units: Celsius, daily resolution) +# +# This diagnostic assumes the data is structured on a time grid with no leap years. +# It also assumes each variable is for a single ensemble member. +# +# References +# +# Muñoz, Á. G., Yang, X., Vecchi, G. A., Robertson, A. W., & Cooke, W. F. (2017): +# PA Weather-Type-Based Cross-Time-Scale Diagnostic Framework for Coupled Circulation +# Models. Journal of Climate, 30 (22), 8951–8972, doi:10.1175/JCLI-D-17-0115.1. +# +# ================================================================================ + +#driver file +import os +import glob + +missing_file=0 +if len(glob.glob(os.environ["PRECT_FILE"]))==0: + print("Required Precipitation data missing!") + missing_file=1 +if len(glob.glob(os.environ["Z250_FILE"]))==0: + print("Required Geopotential height data missing!") + missing_file=1 +if len(glob.glob(os.environ["T250_FILE"]))==0: + print("Required temperature data missing!") + missing_file=1 + +if missing_file==1: + print("Flow-Dependent, Cross-Timescale Model Diagnostics Package will NOT be executed!") +else: + + try: + os.system("python3 "+os.environ["POD_HOME"]+"/"+"WeatherTypes.py") + except OSError as e: + print('WARNING',e.errno,e.strerror) + print("**************************************************") + print("Flow-Dependent, Cross-Timescale Model Diagnostics (WeatherTypes.py) is NOT Executed as Expected!") + print("**************************************************") + + print("**************************************************") + print("Flow-Dependent, Cross-Timescale Model Diagnostics (WeatherTypes.py) Executed!") + print("**************************************************") diff --git a/diagnostics/flow_dep_diag/flow_dep_diag.yml b/diagnostics/flow_dep_diag/flow_dep_diag.yml new file mode 100644 index 000000000..83352c9fd --- /dev/null +++ b/diagnostics/flow_dep_diag/flow_dep_diag.yml @@ -0,0 +1,103 @@ +name: MDTF_flow_dep_diag +channels: + - conda-forge + - defaults +dependencies: + - blas=1.0=mkl + - bokeh=2.3.1=py37hecd8cb5_0 + - ca-certificates=2020.12.5=h033912b_0 + - cartopy=0.18.0=py37hf1ba7ce_1 + - certifi=2020.12.5=py37hf985489_1 + - cftime=1.4.1=py37he3068b8_0 + - click=7.1.2=pyhd3eb1b0_0 + - cloudpickle=1.6.0=py_0 + - curl=7.71.1=hb0a8c7a_1 + - cycler=0.10.0=py37_0 + - cytoolz=0.11.0=py37haf1e3a3_0 + - dask=2021.4.0=pyhd3eb1b0_0 + - dask-core=2021.4.0=pyhd3eb1b0_0 + - distributed=2021.4.0=py37hecd8cb5_0 + - freetype=2.10.4=ha233b18_0 + - fsspec=0.9.0=pyhd3eb1b0_0 + - geos=3.8.0=hb1e8313_0 + - hdf4=4.2.13=h39711bb_2 + - hdf5=1.10.6=hdbbcd12_0 + - heapdict=1.0.1=py_0 + - importlib-metadata=3.10.0=py37hecd8cb5_0 + - importlib_metadata=3.10.0=hd3eb1b0_0 + - importlib_resources=5.1.2=py37hecd8cb5_1 + - intel-openmp=2021.2.0=hecd8cb5_564 + - jinja2=2.11.3=pyhd3eb1b0_0 + - joblib=1.0.1=pyhd8ed1ab_0 + - jpeg=9b=he5867d9_2 + - kiwisolver=1.3.1=py37h23ab428_0 + - krb5=1.18.2=h75d18d8_0 + - lcms2=2.12=hf1fd2bf_0 + - libblas=3.8.0=7_hd44dcd8_netlib + - libcblas=3.8.0=7_hd44dcd8_netlib + - libcurl=7.71.1=h8a08a2b_1 + - libcxx=10.0.0=1 + - libedit=3.1.20210216=h9ed2024_1 + - libffi=3.3=hb1e8313_2 + - libgfortran=3.0.1=h93005f0_2 + - libllvm10=10.0.1=h009f743_3 + - libnetcdf=4.6.1=hfd9a460_3 + - libpng=1.6.37=ha441bb4_0 + - libssh2=1.9.0=ha12b0ac_1 + - libtiff=4.1.0=hcb84e12_1 + - llvm-openmp=11.1.0=hda6cdc1_1 + - llvmlite=0.36.0=py37he4411ff_4 + - locket=0.2.1=py37hecd8cb5_1 + - lz4-c=1.9.3=h23ab428_0 + - markupsafe=1.1.1=py37h1de35cc_0 + - matplotlib=3.3.4=py37hecd8cb5_0 + - matplotlib-base=3.3.4=py37h8b3ea08_0 + - mkl=2021.2.0=hecd8cb5_269 + - mkl-service=2.3.0=py37h9ed2024_1 + - mkl_fft=1.3.0=py37h4a7008c_2 + - mkl_random=1.2.1=py37hb2f4e1b_2 + - msgpack-python=1.0.2=py37hf7b0b51_1 + - ncurses=6.2=h0a44026_1 + - netcdf4=1.5.6=py37h1695cb1_0 + - numba=0.53.1=py37hb2f4e1b_0 + - numpy=1.20.1=py37hd6e1bb9_0 + - numpy-base=1.20.1=py37h585ceec_0 + - olefile=0.46=py37_0 + - openssl=1.1.1k=h0d85af4_0 + - packaging=20.9=pyhd3eb1b0_0 + - pandas=1.2.4=py37h23ab428_0 + - partd=1.2.0=pyhd3eb1b0_0 + - pillow=8.2.0=py37h5270095_0 + - pint=0.16.1=py_0 + - pip=21.0.1=py37hecd8cb5_0 + - proj=6.2.1=hfd5b9e3_0 + - psutil=5.8.0=py37h9ed2024_1 + - pyparsing=2.4.7=pyhd3eb1b0_0 + - pyshp=2.1.3=pyhd3eb1b0_0 + - python=3.7.10=h88f2d9e_0 + - python-dateutil=2.8.1=pyhd3eb1b0_0 + - python_abi=3.7=1_cp37m + - pytz=2021.1=pyhd3eb1b0_0 + - pyyaml=5.4.1=py37h9ed2024_1 + - readline=8.1=h9ed2024_0 + - scikit-learn=0.22.1=py37h3dc85bc_1 + - scipy=1.6.2=py37hd5f7400_1 + - setuptools=52.0.0=py37hecd8cb5_0 + - shapely=1.7.1=py37hb435bde_0 + - six=1.15.0=py37hecd8cb5_0 + - sortedcontainers=2.3.0=pyhd3eb1b0_0 + - sqlite=3.35.4=hce871da_0 + - tbb=2020.3=h879752b_0 + - tblib=1.7.0=py_0 + - tk=8.6.10=hb0a8c7a_0 + - toolz=0.11.1=pyhd3eb1b0_0 + - tornado=6.1=py37h9ed2024_0 + - typing_extensions=3.7.4.3=pyha847dfd_0 + - wheel=0.36.2=pyhd3eb1b0_0 + - xarray=0.17.0=pyhd3eb1b0_0 + - xz=5.2.5=h1de35cc_0 + - yaml=0.2.5=haf1e3a3_0 + - zict=2.0.0=pyhd3eb1b0_0 + - zipp=3.4.1=pyhd3eb1b0_0 + - zlib=1.2.11=h1de35cc_3 + - zstd=1.4.9=h322a384_0 diff --git a/diagnostics/flow_dep_diag/settings.jsonc b/diagnostics/flow_dep_diag/settings.jsonc new file mode 100644 index 000000000..2748fa7f3 --- /dev/null +++ b/diagnostics/flow_dep_diag/settings.jsonc @@ -0,0 +1,77 @@ +{ + "settings" : { + "long_name" : "Flow-Dependent, Cross-Timescale Model Diagnostics Documentation", + "driver" : "flow_dep_diag.py", + "realm" : "atmos", + "description" : "Comparing daily atmospheric circulation pattern characteristics in reanalysis and models", + "runtime_requirements" : { + "python3" : ["netCDF4", "xarray", "numpy", "sklearn", "cartopy", "matplotlib", "numba", "datetime", "typing"] + }, + "pod_env_vars" : { + // Number of simulations run in reanalysis weather typing. + // Typically 25-50; try 25 for a quick preliminary computation only + "NSIM" : "25", + // number of weather types in clustering + "NCLUSTER" : "6" + } + }, + "data" : {"frequency" : "day"}, + "dimensions" : { + "lat" : { + "standard_name" : "latitude", + "units" : "degrees_N" + }, + "lon" : { + "standard_name" : "longitude", + "units" : "degrees_E" + }, + "plev" : { + "standard_name" : "air_pressure", + "units" : "hPa", + "positive" : "down" + }, + "time" : { + "standard_name" : "time", + "units" : "days", + "calendar" : "noleap" + } + }, + "varlist" : { + "Z250": { + // "var_name" : "Z250_var", + "standard_name" : "geopotential_height_250_mbar", + "path_variable" : "Z250_FILE", + "freq" : "day", + "requirement" : "required", + "units" : "m", + "dimensions" : ["time", "lat", "lon"] + }, + "T500": { + "standard_name" : "temperature_500_mbar", + "path_variable" : "T500_FILE", + "freq" : "day", + "units" : "K", + "requirement" : "required", + "dimensions" : ["time", "lat", "lon"], + "alternates": ["T250"] + }, + "T250": { + // "var_name" : "T250_var", + "standard_name" : "temperature_250_mbar", + "path_variable" : "T250_FILE", + "freq" : "day", + "units" : "K", + "requirement" : "required", + "dimensions" : ["time", "lat", "lon"] + }, + "PRECT": { + // "var_name" : "PRECT_var", + "standard_name" : "precipitation_rate", + "path_variable" : "PRECT_FILE", + "freq" : "day", + "units": "m s-1", + "requirement" : "required", + "dimensions" : ["time", "lat", "lon"] + } + } +}