NOAA-CSL · rrbuchholz · Jun 26, 2024 · Jan 8, 2024 · Jan 8, 2024 · Mar 28, 2024
diff --git a/examples/jupyter_notebooks/MM_trp_no2_l2_func.ipynb b/examples/jupyter_notebooks/MM_trp_no2_l2_func.ipynb
diff --git a/examples/jupyter_notebooks/MM_trp_no2_l2_plot.ipynb b/examples/jupyter_notebooks/MM_trp_no2_l2_plot.ipynb
diff --git a/examples/jupyter_notebooks/MM_trp_no2_l2_plot_wavk.ipynb b/examples/jupyter_notebooks/MM_trp_no2_l2_plot_wavk.ipynb
diff --git a/examples/yaml/control_tropomi_l2_no2.yaml b/examples/yaml/control_tropomi_l2_no2.yaml
@@ -1,23 +1,174 @@
 analysis:
-  start_time: '2022-04-30'
-  end_time:   '2022-05-01'
+  start_time: '2019-07-15'
+  end_time:   '2019-07-16'
   debug: True
+  output_dir: /Users/mengli/Work/melodies-monet/outdata
+  output_dir_save: /Users/mengli/Work/melodies-monet/outdata/save_intermediate
+  output_dir_read: /Users/mengli/Work/melodies-monet/outdata/read_intermediate
+  save:
+    paired:
+      method: 'netcdf'
+      prefix: '201907'
+      data: all
+  read:
+    paired: 
+      method: 'netcdf'
+      filenames: 
+        {tropomi_l2_no2_wrfchem_v4.2: ['201907_tropomi_l2_no2_wrfchem_v4.2.nc4']}
 
 obs:
   tropomi_l2_no2:
     debug: True
-    filename: /Users/mengli/Work/melodies-monet/obsdata/tropomi_no2/*
+    filename: /Users/mengli/Work/melodies-monet/obsdata/tropomi_no2/20190715/*
+    #filename: /Volumes/Meng/TROPOMI/20190715/*
     obs_type: sat_swath_clm
+    sat_type: tropomi_l2_no2
     variables:
         qa_value:
             quality_flag_min: 0.7
+            var_applied: ['nitrogendioxide_tropospheric_column']
+            fillvalue: 9.96921e+36
         nitrogendioxide_tropospheric_column:       
             scale: 6.022141e+19 # unit convert form mol_perm2to molec_percm2,6.022141e+19
             fillvalue: 9.96921e+36
-        #averaging_kernel: None
-        #air_mass_factor_troposphere: None
-        #tm5_tropopause_layer_index: None
-        #tm5_constant_a: None
-        #tm5_constant_b: None
-
-
+            ylabel_plot: 'NO2 trop. columns'
+            #ty_scale: 3.0 #opt
+            vmin_plot: 0.0 # optMin for y-axis during plotting. To apply to a plot, change restrict_yaxis = True.
+            vmax_plot: 1.0e+16
+            #vdiff_plot: 15.0 # Opt +/- range to use in bias plots. To apply to a plot, change restrict_yaxis = True.
+            nlevels_plot: 23  # Opt number of levels used in colorbar for contourf plot.
+            regulatory: False
+        averaging_kernel:
+            fillvalue: 9.96921e+36
+        air_mass_factor_total:
+            fillvalue: 9.96921e+36
+        air_mass_factor_troposphere:
+            fillvalue: 9.96921e+36
+        latitude: None 
+        longitude: None 
+
+        preslev: # pressure of the vertical layer center
+            tm5_constant_a:    
+                group: ['PRODUCT']
+                maximum: 9.0e+36
+            tm5_constant_b:
+                group: ['PRODUCT']
+                maximum: 9.0e+36
+            surface_pressure:
+                group: ['PRODUCT/SUPPORT_DATA/INPUT_DATA/']
+                maximum: 9.0e+36
+            tm5_tropopause_layer_index:
+                group: ['PRODUCT']
+        #latitude_bounds:
+        #    group: ['PRODUCT/SUPPORT_DATA/GEOLOCATIONS/'] 
+        #longitude_bounds:
+        #   group: ['PRODUCT/SUPPORT_DATA/GEOLOCATIONS/']
+
+model:
+  wrfchem_v4.2: 
+    files: /Users/mengli/Work/melodies-monet/modeldata/wrfchem/0715/*
+    #files: /Volumes/Meng/WRF_Chem/0715/*
+    mod_type: 'wrfchem'
+    apply_ak: True
+    mod_kwargs: 
+      mech: 'racm_esrl_vcp'
+    mapping: #model species name : obs species name
+      tropomi_l2_no2:
+        no2: nitrogendioxide_tropospheric_column
+    projection: ~
+    plot_kwargs: #Opt
+      color: 'dodgerblue'
+      marker: '^'
+      linestyle: ':' 
+
+plots:
+  plot_grp1:
+    type: 'timeseries' # plot type
+    fig_kwargs: #Opt to define figure options
+      figsize: [12,6] # figure size if multiple plots
+    default_plot_kwargs: # Opt to define defaults for all plots. Model kwargs overwrite these.
+      linewidth: 2.0
+      markersize: 10.
+    text_kwargs: #Opt
+      fontsize: 18.
+    domain_type: ['all'] #List of domain types: 'all' or any domain in obs file. (e.g., airnow: epa_region, state_name, siteid, etc.) 
+    domain_name: ['CONUS'] #List of domain names. If domain_type = all domain_name is used in plot title.
+    data: ['tropomi_l2_no2_wrfchem_v4.2'] # make this a list of pairs in obs_model where the obs is the obs label and model is the model_label
+    data_proc:
+      #See 'altitde_yax2' list below for secondary y-axis options
+      #altitude_variable: altitude  
+      #altitude_ticks: 1000  # Altitude tick interval in meters (for secondary y-axis for altitude (m))
+      rem_obs_nan: True # True: Remove all points where model or obs variable is NaN. False: Remove only points where model variable is NaN.
+      ts_select_time: 'time' #Time used for avg and plotting: Options: 'time' for UTC or 'time_local'
+      ts_avg_window:  'H'# pandas resample rule (e.g., 'H', 'D'). No averaging is done if ts_avg_window is null or not specified.
+      set_axis: False #If select True, add vmin_plot and vmax_plot for each variable in obs.
+      #vmin2, vmax2 filter not needed as filter_dict option added in 'altitude_yax2' to subset the paireddf as per altitude secondary-axis limits
+      #vmin2: #0  #Optional
+      #vmax2: #5000 #12000 #Optional #Subset limits for secondary y-axis (altitude_variable) 
+  plot_grp2:
+    type: 'gridded_spatial_bias' # plot type
+    fig_kwargs: #For all spatial plots, specify map_kwargs here too.
+      states: True
+      figsize: [10, 5] # figure size 
+    text_kwargs: #Opt
+      fontsize: 16.
+    domain_type: ['all'] #List of domain types: 'all' or any domain in obs file. (e.g., airnow: epa_region, state_name, siteid, etc.) 
+    domain_name: ['CONUS'] #List of domain names. If domain_type = all domain_name is used in plot title.
+    data: ['tropomi_l2_no2_wrfchem_v4.2'] # make this a list of pairs in obs_model where the obs is the obs label and model is the model_label
+    data_proc:
+      #filter_dict: {'state_name':{'value':['CA','NY'],'oper':'isin'},'WS':{'value':1,'oper':'<'}}
+      #filter_string: state_name in ['CA','NY'] and WS < 1 #  Uses pandas query method.
+      #rem_obs_by_nan_pct: {'group_var': 'siteid','pct_cutoff': 25,'times':'hourly'} # Groups by group_var, then removes all instances of groupvar where obs variable is > pct_cutoff % nan values
+      rem_obs_nan: True # True: Remove all points where model or obs variable is NaN. False: Remove only points where model variable is NaN.
+      set_axis: True #If select True, add vdiff_plot for each variable in obs.
+  plot_grp3:
+    type: 'taylor' # plot type
+    fig_kwargs: #Opt to define figure options
+      figsize: [8,8] # figure size if multiple plots
+    default_plot_kwargs: # Opt to define defaults for all plots. Model kwargs overwrite these.
+      markersize: 10.
+    text_kwargs: #Opt
+      fontsize: 16.
+    domain_type: ['all'] #List of domain types: 'all' or any domain in obs file. (e.g., airnow: epa_region, state_name, siteid, etc.) 
+    domain_name: ['CONUS'] #List of domain names. If domain_type = all domain_name is used in plot title.
+    data: ['tropomi_l2_no2_wrfchem_v4.2'] # make this a list of pairs in obs_model where the obs is the obs label and model is the model_label
+    data_proc:
+      #filter_dict: {'state_name':{'value':['CA','NY'],'oper':'isin'},'WS':{'value':1,'oper':'<'}}
+      #filter_string: state_name in ['CA','NY'] and WS < 1 #  Uses pandas query method.
+      #rem_obs_by_nan_pct: {'group_var': 'siteid','pct_cutoff': 25,'times':'hourly'} # Groups by group_var, then removes all instances of groupvar where obs variable is > pct_cutoff % nan values
+      rem_obs_nan: True # True: Remove all points where model or obs variable is NaN. False: Remove only points where model variable is NaN.
+      set_axis: False #If select True, add ty_scale for each variable in obs.
+  plot_grp4:
+    type: 'boxplot' # plot type
+    fig_kwargs: 
+      figsize: [8, 6] # figure size 
+    default_plot_kwargs: # Opt to define defaults for all plots. Model kwargs overwrite these.
+      markersize: 10.
+    text_kwargs: #Opt
+      fontsize: 20.
+    domain_type: ['all'] #List of domain types: 'all' or any domain in obs file. (e.g., airnow: epa_region, state_name, siteid, etc.) 
+    domain_name: ['CONUS'] #List of domain names. If domain_type = all domain_name is used in plot title.
+    data: ['tropomi_l2_no2_wrfchem_v4.2'] # make this a list of pairs in obs_model where the obs is the obs label and model is the model_label
+    data_proc:
+      #filter_dict: {'state_name':{'value':['CA','NY'],'oper':'isin'},'WS':{'value':1,'oper':'<'}}
+      #filter_string: state_name in ['CA','NY'] and WS < 1 #  Uses pandas query method.
+      #rem_obs_by_nan_pct: {'group_var': 'siteid','pct_cutoff': 25,'times':'hourly'} # Groups by group_var, then removes all instances of groupvar where obs variable is > pct_cutoff % nan values
+      rem_obs_nan: True # True: Remove all points where model or obs variable is NaN. False: Remove only points where model variable is NaN.
+      set_axis: True #If select True, add vmin_plot and vmax_plot for each variable in obs.
+
+stats:
+  #Stats require positive numbers, so if you want to calculate temperature use Kelvin!
+  stat_list: ['MB','NMB', 'R2', 'RMSE'] #List stats to calculate. Dictionary of definitions included in plots/proc_stats.py Only stats listed below are currently working.
+  #Full calc list ['STDO', 'STDP', 'MdnNB','MdnNE','NMdnGE', 'NO','NOP','NP','MO','MP', 'MdnO', 'MdnP', 'RM', 'RMdn', 'MB', 'MdnB', 'NMB', 'NMdnB', 'FB', 'ME','MdnE','NME', 'NMdnE', 'FE', 'R2', 'RMSE','d1','E1', 'IOA', 'AC']
+  round_output: 2 #Opt, defaults to rounding to 3rd decimal place.
+  output_table: True #Always outputs a .txt file. Optional to also output as a table.
+  output_table_kwargs: #Opt
+    figsize: [12, 6] # figure size
+    fontsize: 12.
+    xscale: 1.4
+    yscale: 1.4
+    edges: 'horizontal'
+  domain_type: ['all'] #List of domain types: 'all' or any domain in obs file. (e.g., airnow: epa_region, state_name, siteid, etc.)
+  domain_name: ['CONUS'] #List of domain names. If domain_type = all domain_name is used in plot title.
+  data: ['tropomi_l2_no2_wrfchem_v4.2'] # make this a list of pairs in obs_model where the obs is the obs labeland model is the model_label
+
diff --git a/melodies_monet/driver.py b/melodies_monet/driver.py
@@ -287,9 +287,9 @@ def open_sat_obs(self,time_interval=None):
                 self.obj = modis_l2.read_mfdataset(
                     self.file, self.variable_dict, debug=self.debug)
             elif self.sat_type == 'tropomi_l2_no2':
-                from monetio import tropomi_l2_no2
+                #from monetio import tropomi_l2_no2
                 print('Reading TROPOMI L2 NO2')
-                self.obj = tropomi_l2_no2.read_trpdataset(
+                self.obj = mio.sat._tropomi_l2_no2_mm.read_trpdataset(
                     self.file, self.variable_dict, debug=self.debug)
             else:
                 print('file reader not implemented for {} observation'.format(self.sat_type))
@@ -1144,6 +1144,37 @@ def pair_data(self, time_interval=None):
                         p.obj = paired_data 
                         label = '{}_{}'.format(p.obs,p.model)
                         self.paired[label] = p
+
+                    if obs.label == 'tropomi_l2_no2':
+                        from .util import sat_l2_swath_utility as sutil
+                        from .util import cal_mod_no2col as mutil
+
+                        # calculate model no2 trop. columns. M.Li
+                        model_obj = mutil.cal_model_no2columns(mod.obj)
+                        #obs_dat = obs.obj.sel(time=slice(self.start_time.date(),self.end_time.date())).copy()
+
+                        if mod.apply_ak == True:
+                            paired_data = sutil.trp_interp_swatogrd_ak(obs.obj, model_obj)
+                        else:
+                            paired_data = sutil.trp_interp_swatogrd(obs.obj, model_obj)
+
+                        self.models[model_label].obj = model_obj
+
+                        p = pair()
+
+                        paired_data = paired_data.reset_index("y") # for saving
+                        paired_data_cp = paired_data.sel(time=slice(self.start_time.date(),self.end_time.date())).copy()
+
+                        p.type = obs.obs_type
+                        p.obs = obs.label
+                        p.model = mod.label
+                        p.model_vars = keys
+                        p.obs_vars = obs_vars
+                        p.obj = paired_data_cp 
+                        label = '{}_{}'.format(p.obs,p.model)
+
+                        self.paired[label] = p
+
                 # if sat_grid_clm (satellite l3 column products)
                 elif obs.obs_type.lower() == 'sat_grid_clm':
                     if len(keys) > 1: 
@@ -1165,6 +1196,7 @@ def pair_data(self, time_interval=None):
                         p.obj = paired_obsgrid
                         label = '{}_{}'.format(p.obs,p.model)
                         self.paired[label] = p
+
                     elif obs.sat_type == 'mopitt_l3':
                         from .util import satellite_utilities as sutil
                         if mod.apply_ak: 
@@ -1186,6 +1218,7 @@ def pair_data(self, time_interval=None):
                             self.paired[label] = p
                         else:
                             print("Pairing without averaging kernel has not been enabled for this dataset")
+
     def concat_pairs(self):
         """Read and concatenate all observation and model time interval pair data,
         populating the :attr:`paired` dict.
@@ -1279,6 +1312,10 @@ def plotting(self):
                         # Adjust the modvar as done in pairing script, if the species name in obs and model are the same.
                         if obsvar == modvar:
                             modvar = modvar + '_new'
+
+                        # Adjust the modvar for satelitte no2 trop. column paring. M.Li
+                        if obsvar == 'nitrogendioxide_tropospheric_column':
+                            modvar = modvar + 'trpcol'
 
                         # for pt_sfc data, convert to pandas dataframe, format, and trim
                         if obs_type in ["sat_swath_sfc", "sat_swath_clm", 
@@ -1489,10 +1526,12 @@ def plotting(self):
                                 vmin = None
                                 vmax = None
                             # Select time to use as index.
+
                             # 2024-03-01 MEB needs to only apply if pandas. fails for xarray
                             if isinstance(pairdf,pd.core.frame.DataFrame):
                                 pairdf = pairdf.set_index(grp_dict['data_proc']['ts_select_time'])
                             # Specify ts_avg_window if noted in yaml file. #qzr++
+
                             if 'ts_avg_window' in grp_dict['data_proc'].keys():
                                 a_w = grp_dict['data_proc']['ts_avg_window']
                             else:
@@ -1582,6 +1621,7 @@ def plotting(self):
                                     altitude_yax2 = grp_dict['data_proc']['altitude_yax2']
                                     ax = airplots.add_yax2_altitude(ax, pairdf, altitude_yax2, text_kwargs, vmin_y2, vmax_y2)
                                 savefig(outname + '.png', logo_height=150)
+
                                 del (ax, fig_dict, plot_dict, text_dict, obs_dict, obs_plot_dict)  # Clear axis for next plot.
 
                             # At the end save the plot.
@@ -1811,6 +1851,12 @@ def plotting(self):
                             plt.close()  # Close the current figure
 
                         elif plot_type.lower() == 'boxplot':
+                            # squeeze the xarray for boxplot, M.Li
+                            if obs_type in  ["sat_swath_sfc", "sat_swath_clm", "sat_grid_sfc", "sat_grid_clm", "sat_swath_prof"]:
+                                pairdf_sel = pairdf.squeeze()
+                            else: 
+                                pairdf_sel = pairdf
+
                             if set_yaxis == True:
                                 if all(k in obs_plot_dict for k in ('vmin_plot', 'vmax_plot')):
                                     vmin = obs_plot_dict['vmin_plot']
@@ -1824,10 +1870,10 @@ def plotting(self):
                                 vmax = None
                             # First for p_index = 0 create the obs box plot data array.
                             if p_index == 0:
-                                comb_bx, label_bx = splots.calculate_boxplot(pairdf, pairdf_reg, column=obsvar,   
+                                comb_bx, label_bx = splots.calculate_boxplot(pairdf_sel, pairdf_reg, column=obsvar,   
                                                                                        label=p.obs, plot_dict=obs_dict)
                             # Then add the models to this dataarray.
-                            comb_bx, label_bx = splots.calculate_boxplot(pairdf, pairdf_reg, column=modvar, label=p.model,  
+                            comb_bx, label_bx = splots.calculate_boxplot(pairdf_sel, pairdf_reg, column=modvar, label=p.model,  
                                                                                     plot_dict=plot_dict, comb_bx=comb_bx,
                                                                                     label_bx=label_bx)
                             # For the last p_index make the plot.
@@ -1866,7 +1912,7 @@ def plotting(self):
                                 comb_bx, label_bx,region_bx = splots.calculate_multi_boxplot(pairdf, pairdf_reg,region_name=region_name, column=obsvar, 
                                                                              label=p.obs, plot_dict=obs_dict)
                             # Then add the models to this dataarray.
-                            comb_bx, label_bx,region_bx = splots.calculate_multi_boxplot(pairdf, pairdf_reg, region_name= region_name,column=modvar, label=p.model,   
+                            comb_bx, label_bx,region_bx = splots.calculate_multi_boxplot(pairdf, pairdf_reg, region_name= region_name,column=modvar, label=p.model, 
                                                                          plot_dict=plot_dict, comb_bx=comb_bx,
                                                                          label_bx=label_bx)
 
@@ -2195,9 +2241,18 @@ def stats(self):
                         # Adjust the modvar as done in pairing script, if the species name in obs and model are the same.
                         if obsvar == modvar:
                             modvar = modvar + '_new'
+                        # for satellite no2 trop. columns paired data, M.Li
+                        if obsvar == 'nitrogendioxide_tropospheric_column':
+                            modvar = modvar + 'trpcol' 
 
                         # convert to dataframe
-                        pairdf_all = p.obj.to_dataframe(dim_order=["time", "x"])
+                        # handle different dimensios, M.Li
+                        if ('y' in p.obj.dims) and ('x' in p.obj.dims):
+                            pairdf_all = p.obj.to_dataframe(dim_order=["x", "y"])
+                        elif ('y' in p.obj.dims) and ('time' in p.obj.dims):
+                            pairdf_all = p.obj.to_dataframe(dim_order=["time", "y"])
+                        else:
+                            pairdf_all = p.obj.to_dataframe(dim_order=["time", "x"])
 
                         # Select only the analysis time window.
                         pairdf_all = pairdf_all.loc[self.start_time : self.end_time]