Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

processing TROPOMI L2 NO2 data and plot #225

Merged
merged 8 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 0 additions & 199 deletions examples/jupyter_notebooks/MM_trp_no2_l2_func.ipynb

This file was deleted.

1,637 changes: 1,637 additions & 0 deletions examples/jupyter_notebooks/MM_trp_no2_l2_plot.ipynb

Large diffs are not rendered by default.

1,782 changes: 1,782 additions & 0 deletions examples/jupyter_notebooks/MM_trp_no2_l2_plot_wavk.ipynb

Large diffs are not rendered by default.

171 changes: 161 additions & 10 deletions examples/yaml/control_tropomi_l2_no2.yaml
Original file line number Diff line number Diff line change
@@ -1,23 +1,174 @@
analysis:
start_time: '2022-04-30'
end_time: '2022-05-01'
start_time: '2019-07-15'
end_time: '2019-07-16'
debug: True
output_dir: /Users/mengli/Work/melodies-monet/outdata
output_dir_save: /Users/mengli/Work/melodies-monet/outdata/save_intermediate
output_dir_read: /Users/mengli/Work/melodies-monet/outdata/read_intermediate
save:
paired:
method: 'netcdf'
prefix: '201907'
data: all
read:
paired:
method: 'netcdf'
filenames:
{tropomi_l2_no2_wrfchem_v4.2: ['201907_tropomi_l2_no2_wrfchem_v4.2.nc4']}

obs:
tropomi_l2_no2:
debug: True
filename: /Users/mengli/Work/melodies-monet/obsdata/tropomi_no2/*
filename: /Users/mengli/Work/melodies-monet/obsdata/tropomi_no2/20190715/*
#filename: /Volumes/Meng/TROPOMI/20190715/*
obs_type: sat_swath_clm
sat_type: tropomi_l2_no2
variables:
qa_value:
quality_flag_min: 0.7
var_applied: ['nitrogendioxide_tropospheric_column']
fillvalue: 9.96921e+36
nitrogendioxide_tropospheric_column:
scale: 6.022141e+19 # unit convert form mol_perm2to molec_percm2,6.022141e+19
fillvalue: 9.96921e+36
#averaging_kernel: None
#air_mass_factor_troposphere: None
#tm5_tropopause_layer_index: None
#tm5_constant_a: None
#tm5_constant_b: None


ylabel_plot: 'NO2 trop. columns'
#ty_scale: 3.0 #opt
vmin_plot: 0.0 # optMin for y-axis during plotting. To apply to a plot, change restrict_yaxis = True.
vmax_plot: 1.0e+16
#vdiff_plot: 15.0 # Opt +/- range to use in bias plots. To apply to a plot, change restrict_yaxis = True.
nlevels_plot: 23 # Opt number of levels used in colorbar for contourf plot.
regulatory: False
averaging_kernel:
fillvalue: 9.96921e+36
air_mass_factor_total:
fillvalue: 9.96921e+36
air_mass_factor_troposphere:
fillvalue: 9.96921e+36
latitude: None
longitude: None

preslev: # pressure of the vertical layer center
tm5_constant_a:
group: ['PRODUCT']
maximum: 9.0e+36
tm5_constant_b:
group: ['PRODUCT']
maximum: 9.0e+36
surface_pressure:
group: ['PRODUCT/SUPPORT_DATA/INPUT_DATA/']
maximum: 9.0e+36
tm5_tropopause_layer_index:
group: ['PRODUCT']
#latitude_bounds:
# group: ['PRODUCT/SUPPORT_DATA/GEOLOCATIONS/']
#longitude_bounds:
# group: ['PRODUCT/SUPPORT_DATA/GEOLOCATIONS/']

model:
wrfchem_v4.2:
files: /Users/mengli/Work/melodies-monet/modeldata/wrfchem/0715/*
#files: /Volumes/Meng/WRF_Chem/0715/*
mod_type: 'wrfchem'
apply_ak: True
mod_kwargs:
mech: 'racm_esrl_vcp'
mapping: #model species name : obs species name
tropomi_l2_no2:
no2: nitrogendioxide_tropospheric_column
projection: ~
plot_kwargs: #Opt
color: 'dodgerblue'
marker: '^'
linestyle: ':'

plots:
plot_grp1:
type: 'timeseries' # plot type
fig_kwargs: #Opt to define figure options
figsize: [12,6] # figure size if multiple plots
default_plot_kwargs: # Opt to define defaults for all plots. Model kwargs overwrite these.
linewidth: 2.0
markersize: 10.
text_kwargs: #Opt
fontsize: 18.
domain_type: ['all'] #List of domain types: 'all' or any domain in obs file. (e.g., airnow: epa_region, state_name, siteid, etc.)
domain_name: ['CONUS'] #List of domain names. If domain_type = all domain_name is used in plot title.
data: ['tropomi_l2_no2_wrfchem_v4.2'] # make this a list of pairs in obs_model where the obs is the obs label and model is the model_label
data_proc:
#See 'altitde_yax2' list below for secondary y-axis options
#altitude_variable: altitude
#altitude_ticks: 1000 # Altitude tick interval in meters (for secondary y-axis for altitude (m))
rem_obs_nan: True # True: Remove all points where model or obs variable is NaN. False: Remove only points where model variable is NaN.
ts_select_time: 'time' #Time used for avg and plotting: Options: 'time' for UTC or 'time_local'
ts_avg_window: 'H'# pandas resample rule (e.g., 'H', 'D'). No averaging is done if ts_avg_window is null or not specified.
set_axis: False #If select True, add vmin_plot and vmax_plot for each variable in obs.
#vmin2, vmax2 filter not needed as filter_dict option added in 'altitude_yax2' to subset the paireddf as per altitude secondary-axis limits
#vmin2: #0 #Optional
#vmax2: #5000 #12000 #Optional #Subset limits for secondary y-axis (altitude_variable)
plot_grp2:
type: 'gridded_spatial_bias' # plot type
fig_kwargs: #For all spatial plots, specify map_kwargs here too.
states: True
figsize: [10, 5] # figure size
text_kwargs: #Opt
fontsize: 16.
domain_type: ['all'] #List of domain types: 'all' or any domain in obs file. (e.g., airnow: epa_region, state_name, siteid, etc.)
domain_name: ['CONUS'] #List of domain names. If domain_type = all domain_name is used in plot title.
data: ['tropomi_l2_no2_wrfchem_v4.2'] # make this a list of pairs in obs_model where the obs is the obs label and model is the model_label
data_proc:
#filter_dict: {'state_name':{'value':['CA','NY'],'oper':'isin'},'WS':{'value':1,'oper':'<'}}
#filter_string: state_name in ['CA','NY'] and WS < 1 # Uses pandas query method.
#rem_obs_by_nan_pct: {'group_var': 'siteid','pct_cutoff': 25,'times':'hourly'} # Groups by group_var, then removes all instances of groupvar where obs variable is > pct_cutoff % nan values
rem_obs_nan: True # True: Remove all points where model or obs variable is NaN. False: Remove only points where model variable is NaN.
set_axis: True #If select True, add vdiff_plot for each variable in obs.
plot_grp3:
type: 'taylor' # plot type
fig_kwargs: #Opt to define figure options
figsize: [8,8] # figure size if multiple plots
default_plot_kwargs: # Opt to define defaults for all plots. Model kwargs overwrite these.
markersize: 10.
text_kwargs: #Opt
fontsize: 16.
domain_type: ['all'] #List of domain types: 'all' or any domain in obs file. (e.g., airnow: epa_region, state_name, siteid, etc.)
domain_name: ['CONUS'] #List of domain names. If domain_type = all domain_name is used in plot title.
data: ['tropomi_l2_no2_wrfchem_v4.2'] # make this a list of pairs in obs_model where the obs is the obs label and model is the model_label
data_proc:
#filter_dict: {'state_name':{'value':['CA','NY'],'oper':'isin'},'WS':{'value':1,'oper':'<'}}
#filter_string: state_name in ['CA','NY'] and WS < 1 # Uses pandas query method.
#rem_obs_by_nan_pct: {'group_var': 'siteid','pct_cutoff': 25,'times':'hourly'} # Groups by group_var, then removes all instances of groupvar where obs variable is > pct_cutoff % nan values
rem_obs_nan: True # True: Remove all points where model or obs variable is NaN. False: Remove only points where model variable is NaN.
set_axis: False #If select True, add ty_scale for each variable in obs.
plot_grp4:
type: 'boxplot' # plot type
fig_kwargs:
figsize: [8, 6] # figure size
default_plot_kwargs: # Opt to define defaults for all plots. Model kwargs overwrite these.
markersize: 10.
text_kwargs: #Opt
fontsize: 20.
domain_type: ['all'] #List of domain types: 'all' or any domain in obs file. (e.g., airnow: epa_region, state_name, siteid, etc.)
domain_name: ['CONUS'] #List of domain names. If domain_type = all domain_name is used in plot title.
data: ['tropomi_l2_no2_wrfchem_v4.2'] # make this a list of pairs in obs_model where the obs is the obs label and model is the model_label
data_proc:
#filter_dict: {'state_name':{'value':['CA','NY'],'oper':'isin'},'WS':{'value':1,'oper':'<'}}
#filter_string: state_name in ['CA','NY'] and WS < 1 # Uses pandas query method.
#rem_obs_by_nan_pct: {'group_var': 'siteid','pct_cutoff': 25,'times':'hourly'} # Groups by group_var, then removes all instances of groupvar where obs variable is > pct_cutoff % nan values
rem_obs_nan: True # True: Remove all points where model or obs variable is NaN. False: Remove only points where model variable is NaN.
set_axis: True #If select True, add vmin_plot and vmax_plot for each variable in obs.

stats:
#Stats require positive numbers, so if you want to calculate temperature use Kelvin!
stat_list: ['MB','NMB', 'R2', 'RMSE'] #List stats to calculate. Dictionary of definitions included in plots/proc_stats.py Only stats listed below are currently working.
#Full calc list ['STDO', 'STDP', 'MdnNB','MdnNE','NMdnGE', 'NO','NOP','NP','MO','MP', 'MdnO', 'MdnP', 'RM', 'RMdn', 'MB', 'MdnB', 'NMB', 'NMdnB', 'FB', 'ME','MdnE','NME', 'NMdnE', 'FE', 'R2', 'RMSE','d1','E1', 'IOA', 'AC']
round_output: 2 #Opt, defaults to rounding to 3rd decimal place.
output_table: True #Always outputs a .txt file. Optional to also output as a table.
output_table_kwargs: #Opt
figsize: [12, 6] # figure size
fontsize: 12.
xscale: 1.4
yscale: 1.4
edges: 'horizontal'
domain_type: ['all'] #List of domain types: 'all' or any domain in obs file. (e.g., airnow: epa_region, state_name, siteid, etc.)
domain_name: ['CONUS'] #List of domain names. If domain_type = all domain_name is used in plot title.
data: ['tropomi_l2_no2_wrfchem_v4.2'] # make this a list of pairs in obs_model where the obs is the obs labeland model is the model_label

67 changes: 61 additions & 6 deletions melodies_monet/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,9 @@ def open_sat_obs(self,time_interval=None):
self.obj = modis_l2.read_mfdataset(
self.file, self.variable_dict, debug=self.debug)
elif self.sat_type == 'tropomi_l2_no2':
from monetio import tropomi_l2_no2
#from monetio import tropomi_l2_no2
print('Reading TROPOMI L2 NO2')
self.obj = tropomi_l2_no2.read_trpdataset(
self.obj = mio.sat._tropomi_l2_no2_mm.read_trpdataset(
self.file, self.variable_dict, debug=self.debug)
else:
print('file reader not implemented for {} observation'.format(self.sat_type))
Expand Down Expand Up @@ -1144,6 +1144,37 @@ def pair_data(self, time_interval=None):
p.obj = paired_data
label = '{}_{}'.format(p.obs,p.model)
self.paired[label] = p

if obs.label == 'tropomi_l2_no2':
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be called on the obs.sat_type instead? I think we decided to keep label as a fairly subjective thing (say there are several version of tropomi retrievals, then you could use the label to differentiate them).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that should now be obs.sat_type and not obs.label. I think that change may have been made during the process of the big develop_satellite merge.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, great - I changed that in the driver file for tropomi.

from .util import sat_l2_swath_utility as sutil
from .util import cal_mod_no2col as mutil

# calculate model no2 trop. columns. M.Li
model_obj = mutil.cal_model_no2columns(mod.obj)
#obs_dat = obs.obj.sel(time=slice(self.start_time.date(),self.end_time.date())).copy()

if mod.apply_ak == True:
paired_data = sutil.trp_interp_swatogrd_ak(obs.obj, model_obj)
else:
paired_data = sutil.trp_interp_swatogrd(obs.obj, model_obj)

self.models[model_label].obj = model_obj

p = pair()

paired_data = paired_data.reset_index("y") # for saving
paired_data_cp = paired_data.sel(time=slice(self.start_time.date(),self.end_time.date())).copy()

p.type = obs.obs_type
p.obs = obs.label
p.model = mod.label
p.model_vars = keys
p.obs_vars = obs_vars
p.obj = paired_data_cp
label = '{}_{}'.format(p.obs,p.model)

self.paired[label] = p

# if sat_grid_clm (satellite l3 column products)
elif obs.obs_type.lower() == 'sat_grid_clm':
if len(keys) > 1:
Expand All @@ -1165,6 +1196,7 @@ def pair_data(self, time_interval=None):
p.obj = paired_obsgrid
label = '{}_{}'.format(p.obs,p.model)
self.paired[label] = p

elif obs.sat_type == 'mopitt_l3':
from .util import satellite_utilities as sutil
if mod.apply_ak:
Expand All @@ -1186,6 +1218,7 @@ def pair_data(self, time_interval=None):
self.paired[label] = p
else:
print("Pairing without averaging kernel has not been enabled for this dataset")

def concat_pairs(self):
"""Read and concatenate all observation and model time interval pair data,
populating the :attr:`paired` dict.
Expand Down Expand Up @@ -1279,6 +1312,10 @@ def plotting(self):
# Adjust the modvar as done in pairing script, if the species name in obs and model are the same.
if obsvar == modvar:
modvar = modvar + '_new'

# Adjust the modvar for satelitte no2 trop. column paring. M.Li
if obsvar == 'nitrogendioxide_tropospheric_column':
modvar = modvar + 'trpcol'

# for pt_sfc data, convert to pandas dataframe, format, and trim
if obs_type in ["sat_swath_sfc", "sat_swath_clm",
Expand Down Expand Up @@ -1489,10 +1526,12 @@ def plotting(self):
vmin = None
vmax = None
# Select time to use as index.

# 2024-03-01 MEB needs to only apply if pandas. fails for xarray
if isinstance(pairdf,pd.core.frame.DataFrame):
pairdf = pairdf.set_index(grp_dict['data_proc']['ts_select_time'])
# Specify ts_avg_window if noted in yaml file. #qzr++

if 'ts_avg_window' in grp_dict['data_proc'].keys():
a_w = grp_dict['data_proc']['ts_avg_window']
else:
Expand Down Expand Up @@ -1582,6 +1621,7 @@ def plotting(self):
altitude_yax2 = grp_dict['data_proc']['altitude_yax2']
ax = airplots.add_yax2_altitude(ax, pairdf, altitude_yax2, text_kwargs, vmin_y2, vmax_y2)
savefig(outname + '.png', logo_height=150)

del (ax, fig_dict, plot_dict, text_dict, obs_dict, obs_plot_dict) # Clear axis for next plot.

# At the end save the plot.
Expand Down Expand Up @@ -1811,6 +1851,12 @@ def plotting(self):
plt.close() # Close the current figure

elif plot_type.lower() == 'boxplot':
# squeeze the xarray for boxplot, M.Li
if obs_type in ["sat_swath_sfc", "sat_swath_clm", "sat_grid_sfc", "sat_grid_clm", "sat_swath_prof"]:
pairdf_sel = pairdf.squeeze()
else:
pairdf_sel = pairdf

if set_yaxis == True:
if all(k in obs_plot_dict for k in ('vmin_plot', 'vmax_plot')):
vmin = obs_plot_dict['vmin_plot']
Expand All @@ -1824,10 +1870,10 @@ def plotting(self):
vmax = None
# First for p_index = 0 create the obs box plot data array.
if p_index == 0:
comb_bx, label_bx = splots.calculate_boxplot(pairdf, pairdf_reg, column=obsvar,
comb_bx, label_bx = splots.calculate_boxplot(pairdf_sel, pairdf_reg, column=obsvar,
label=p.obs, plot_dict=obs_dict)
# Then add the models to this dataarray.
comb_bx, label_bx = splots.calculate_boxplot(pairdf, pairdf_reg, column=modvar, label=p.model,
comb_bx, label_bx = splots.calculate_boxplot(pairdf_sel, pairdf_reg, column=modvar, label=p.model,
plot_dict=plot_dict, comb_bx=comb_bx,
label_bx=label_bx)
# For the last p_index make the plot.
Expand Down Expand Up @@ -1866,7 +1912,7 @@ def plotting(self):
comb_bx, label_bx,region_bx = splots.calculate_multi_boxplot(pairdf, pairdf_reg,region_name=region_name, column=obsvar,
label=p.obs, plot_dict=obs_dict)
# Then add the models to this dataarray.
comb_bx, label_bx,region_bx = splots.calculate_multi_boxplot(pairdf, pairdf_reg, region_name= region_name,column=modvar, label=p.model,
comb_bx, label_bx,region_bx = splots.calculate_multi_boxplot(pairdf, pairdf_reg, region_name= region_name,column=modvar, label=p.model,
plot_dict=plot_dict, comb_bx=comb_bx,
label_bx=label_bx)

Expand Down Expand Up @@ -2195,9 +2241,18 @@ def stats(self):
# Adjust the modvar as done in pairing script, if the species name in obs and model are the same.
if obsvar == modvar:
modvar = modvar + '_new'
# for satellite no2 trop. columns paired data, M.Li
if obsvar == 'nitrogendioxide_tropospheric_column':
modvar = modvar + 'trpcol'

# convert to dataframe
pairdf_all = p.obj.to_dataframe(dim_order=["time", "x"])
# handle different dimensios, M.Li
if ('y' in p.obj.dims) and ('x' in p.obj.dims):
pairdf_all = p.obj.to_dataframe(dim_order=["x", "y"])
elif ('y' in p.obj.dims) and ('time' in p.obj.dims):
pairdf_all = p.obj.to_dataframe(dim_order=["time", "y"])
else:
pairdf_all = p.obj.to_dataframe(dim_order=["time", "x"])

# Select only the analysis time window.
pairdf_all = pairdf_all.loc[self.start_time : self.end_time]
Expand Down
Loading
Loading