Skip to content

Commit

Permalink
Merge pull request #176 from NOAA-CSL/develop_grid
Browse files Browse the repository at this point in the history
Develop grid
  • Loading branch information
dwfncar authored Oct 11, 2023
2 parents 8e8d180 + e288e40 commit 58b6546
Show file tree
Hide file tree
Showing 10 changed files with 492 additions and 74 deletions.
5 changes: 5 additions & 0 deletions docs/background/gridded_datasets.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Gridded Datasets
================

Gridded datasets

5 changes: 5 additions & 0 deletions docs/background/time_chunking.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Time Chunking
================

Time chunking

2 changes: 2 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ MONETIO please refer to:
background/supported_analyses
background/supported_plots
background/supported_stats
background/time_chunking
background/gridded_datasets

.. toctree::
:hidden:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
analysis:
start_time: '2020-01-01'
end_time: '2020-12-31'
time_interval: 'MS'
output_dir: $HOME/Plots
debug: True
regrid: False
target_grid: $HOME/Data/Grids/cam_grid.nc
time_chunking_with_gridded_data: True

obs:

MOD08_M3:
data_format: gridded_eos
datadir: $HOME/Data/MOD08_M3
obs_type: gridded_data
filename: MOD08_M3.AYYYYDDD.061.*_regrid.nc
regrid:
base_grid: $HOME/Data/Grids/modis_l3_grid.nc
method: bilinear
variables:
AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean:
fillvalue: -9999
scale: 0.001
units: none

model:

MERRA2:
data_format: netcdf
mod_type: merra2
datadir: $HOME/Data/MERRA2
files: MERRA2_*.tavgM_2d_aer_Nx.YYYYMM_MM_TOTEXTTAU_regrid.nc4
regrid:
base_grid: $HOME/Data/Grids/merra2_grid.nc
method: bilinear
variables:
fillvalue: 1.e+15
scale: 1.0
units: none
mapping:
MOD08_M3:
TOTEXTTAU: AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean

Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from melodies_monet import driver

an = driver.analysis()
an.control = 'control_time_chunking_with_gridded_data.yaml'
an.read_control()
an.setup_regridders()

for time_interval in an.time_intervals:

print(time_interval)

an.open_obs(time_interval=time_interval)
an.open_models(time_interval=time_interval)

print(an.obs)
for obs in an.obs:
print(an.obs[obs].obj)
print(an.models)
for mod in an.models:
print(an.models[mod].obj)
213 changes: 139 additions & 74 deletions melodies_monet/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def __repr__(self):
")"
)

def open_obs(self, time_interval=None):
def open_obs(self, time_interval=None, control_dict=None):
"""Open the observational data, store data in observation pair,
and apply mask and scaling.
Expand All @@ -149,30 +149,47 @@ def open_obs(self, time_interval=None):
from glob import glob
from numpy import sort
from . import tutorial
from .util import analysis_util
from .util import read_grid_util

time_chunking_with_gridded_data \
= 'time_chunking_with_gridded_data' in control_dict['analysis'].keys() \
and control_dict['analysis']['time_chunking_with_gridded_data']

if time_chunking_with_gridded_data:
date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j')
print('obs time chunk %s' % date_str)
obs_vars = analysis_util.get_obs_vars(control_dict)
print(obs_vars)
obs_datasets, filenames = read_grid_util.read_grid_obs(
control_dict, obs_vars, date_str, obs=self.obs)
print(filenames)
self.obj = obs_datasets[self.obs]

if self.file.startswith("example:"):
example_id = ":".join(s.strip() for s in self.file.split(":")[1:])
files = [tutorial.fetch_example(example_id)]
else:
files = sort(glob(self.file))
if self.file.startswith("example:"):
example_id = ":".join(s.strip() for s in self.file.split(":")[1:])
files = [tutorial.fetch_example(example_id)]
else:
files = sort(glob(self.file))

assert len(files) >= 1, "need at least one"
assert len(files) >= 1, "need at least one"

_, extension = os.path.splitext(files[0])
try:
if extension in {'.nc', '.ncf', '.netcdf', '.nc4'}:
if len(files) > 1:
self.obj = xr.open_mfdataset(files)
_, extension = os.path.splitext(files[0])
try:
if extension in {'.nc', '.ncf', '.netcdf', '.nc4'}:
if len(files) > 1:
self.obj = xr.open_mfdataset(files)
else:
self.obj = xr.open_dataset(files[0])
elif extension in ['.ict', '.icarrt']:
assert len(files) == 1, "monetio.icarrt.add_data can only read one file"
self.obj = mio.icarrt.add_data(files[0])
else:
self.obj = xr.open_dataset(files[0])
elif extension in ['.ict', '.icarrt']:
assert len(files) == 1, "monetio.icarrt.add_data can only read one file"
self.obj = mio.icarrt.add_data(files[0])
else:
raise ValueError(f'extension {extension!r} currently unsupported')
except Exception as e:
print('something happened opening file:', e)
return
raise ValueError(f'extension {extension!r} currently unsupported')
except Exception as e:
print('something happened opening file:', e)
return

self.mask_and_scale() # mask and scale values from the control values
self.filter_obs()
Expand Down Expand Up @@ -321,7 +338,7 @@ def glob_files(self):
if self.file_pm25_str is not None:
self.files_pm25 = sort(glob(self.file_pm25_str))

def open_model_files(self, time_interval=None):
def open_model_files(self, time_interval=None, control_dict=None):
"""Open the model files, store data in :class:`model` instance attributes,
and apply mask and scaling.
Expand All @@ -339,6 +356,13 @@ def open_model_files(self, time_interval=None):
-------
None
"""
from .util import analysis_util
from .util import read_grid_util
from .util import regrid_util

time_chunking_with_gridded_data \
= 'time_chunking_with_gridded_data' in control_dict['analysis'].keys() \
and control_dict['analysis']['time_chunking_with_gridded_data']

self.glob_files()
# Calculate species to input into MONET, so works for all mechanisms in wrfchem
Expand All @@ -347,59 +371,69 @@ def open_model_files(self, time_interval=None):
for obs_map in self.mapping:
list_input_var = list_input_var + list(set(self.mapping[obs_map].keys()) - set(list_input_var))
#Only certain models need this option for speeding up i/o.
if 'cmaq' in self.model.lower():
print('**** Reading CMAQ model output...')
self.mod_kwargs.update({'var_list' : list_input_var})
if self.files_vert is not None:
self.mod_kwargs.update({'fname_vert' : self.files_vert})
if self.files_surf is not None:
self.mod_kwargs.update({'fname_surf' : self.files_surf})
if len(self.files) > 1:
self.mod_kwargs.update({'concatenate_forecasts' : True})
self.obj = mio.models._cmaq_mm.open_mfdataset(self.files,**self.mod_kwargs)
elif 'wrfchem' in self.model.lower():
print('**** Reading WRF-Chem model output...')
self.mod_kwargs.update({'var_list' : list_input_var})
self.obj = mio.models._wrfchem_mm.open_mfdataset(self.files,**self.mod_kwargs)
elif 'rrfs' in self.model.lower():
print('**** Reading RRFS-CMAQ model output...')
if self.files_pm25 is not None:
self.mod_kwargs.update({'fname_pm25' : self.files_pm25})
self.mod_kwargs.update({'var_list' : list_input_var})
self.obj = mio.models._rrfs_cmaq_mm.open_mfdataset(self.files,**self.mod_kwargs)
elif 'gsdchem' in self.model.lower():
print('**** Reading GSD-Chem model output...')
if len(self.files) > 1:
self.obj = mio.fv3chem.open_mfdataset(self.files,**self.mod_kwargs)
else:
self.obj = mio.fv3chem.open_dataset(self.files,**self.mod_kwargs)
elif 'cesm_fv' in self.model.lower():
print('**** Reading CESM FV model output...')
self.mod_kwargs.update({'var_list' : list_input_var})
self.obj = mio.models._cesm_fv_mm.open_mfdataset(self.files,**self.mod_kwargs)
# CAM-chem-SE grid or MUSICAv0
elif 'cesm_se' in self.model.lower():
print('**** Reading CESM SE model output...')
self.mod_kwargs.update({'var_list' : list_input_var})
if self.scrip_file.startswith("example:"):
from . import tutorial
example_id = ":".join(s.strip() for s in self.scrip_file.split(":")[1:])
self.scrip_file = tutorial.fetch_example(example_id)
self.mod_kwargs.update({'scrip_file' : self.scrip_file})
self.obj = mio.models._cesm_se_mm.open_mfdataset(self.files,**self.mod_kwargs)
#self.obj, self.obj_scrip = read_cesm_se.open_mfdataset(self.files,**self.mod_kwargs)
#self.obj.monet.scrip = self.obj_scrip
elif 'raqms' in self.model.lower():
if len(self.files) > 1:
self.obj = mio.raqms.open_mfdataset(self.files,**self.mod_kwargs)
else:
self.obj = mio.raqms.open_dataset(self.files,**self.mod_kwargs)

# move above
if time_chunking_with_gridded_data:
date_str = time_interval[0].strftime('%Y-%m-%b-%d-%j')
print('model time chunk %s' % date_str)
model_datasets, filenames = read_grid_util.read_grid_models(
control_dict, date_str, model=self.label)
print(filenames)
self.obj = model_datasets[self.label]
else:
print('**** Reading Unspecified model output. Take Caution...')
if len(self.files) > 1:
self.obj = xr.open_mfdataset(self.files,**self.mod_kwargs)
if 'cmaq' in self.model.lower():
print('**** Reading CMAQ model output...')
self.mod_kwargs.update({'var_list' : list_input_var})
if self.files_vert is not None:
self.mod_kwargs.update({'fname_vert' : self.files_vert})
if self.files_surf is not None:
self.mod_kwargs.update({'fname_surf' : self.files_surf})
if len(self.files) > 1:
self.mod_kwargs.update({'concatenate_forecasts' : True})
self.obj = mio.models._cmaq_mm.open_mfdataset(self.files,**self.mod_kwargs)
elif 'wrfchem' in self.model.lower():
print('**** Reading WRF-Chem model output...')
self.mod_kwargs.update({'var_list' : list_input_var})
self.obj = mio.models._wrfchem_mm.open_mfdataset(self.files,**self.mod_kwargs)
elif 'rrfs' in self.model.lower():
print('**** Reading RRFS-CMAQ model output...')
if self.files_pm25 is not None:
self.mod_kwargs.update({'fname_pm25' : self.files_pm25})
self.mod_kwargs.update({'var_list' : list_input_var})
self.obj = mio.models._rrfs_cmaq_mm.open_mfdataset(self.files,**self.mod_kwargs)
elif 'gsdchem' in self.model.lower():
print('**** Reading GSD-Chem model output...')
if len(self.files) > 1:
self.obj = mio.fv3chem.open_mfdataset(self.files,**self.mod_kwargs)
else:
self.obj = mio.fv3chem.open_dataset(self.files,**self.mod_kwargs)
elif 'cesm_fv' in self.model.lower():
print('**** Reading CESM FV model output...')
self.mod_kwargs.update({'var_list' : list_input_var})
self.obj = mio.models._cesm_fv_mm.open_mfdataset(self.files,**self.mod_kwargs)
# CAM-chem-SE grid or MUSICAv0
elif 'cesm_se' in self.model.lower():
print('**** Reading CESM SE model output...')
self.mod_kwargs.update({'var_list' : list_input_var})
if self.scrip_file.startswith("example:"):
from . import tutorial
example_id = ":".join(s.strip() for s in self.scrip_file.split(":")[1:])
self.scrip_file = tutorial.fetch_example(example_id)
self.mod_kwargs.update({'scrip_file' : self.scrip_file})
self.obj = mio.models._cesm_se_mm.open_mfdataset(self.files,**self.mod_kwargs)
#self.obj, self.obj_scrip = read_cesm_se.open_mfdataset(self.files,**self.mod_kwargs)
#self.obj.monet.scrip = self.obj_scrip
elif 'raqms' in self.model.lower():
if len(self.files) > 1:
self.obj = mio.raqms.open_mfdataset(self.files,**self.mod_kwargs)
else:
self.obj = mio.raqms.open_dataset(self.files,**self.mod_kwargs)
else:
self.obj = xr.open_dataset(self.files[0],**self.mod_kwargs)
print('**** Reading Unspecified model output. Take Caution...')
if len(self.files) > 1:
self.obj = xr.open_mfdataset(self.files,**self.mod_kwargs)
else:
self.obj = xr.open_dataset(self.files[0],**self.mod_kwargs)
self.mask_and_scale()

def mask_and_scale(self):
Expand Down Expand Up @@ -458,6 +492,11 @@ def __init__(self):
self.debug = False
self.save = None
self.read = None
self.time_chunking_with_gridded_data = False # Default to False
self.regrid = False # Default to False
self.target_grid = None
self.obs_regridders = None
self.model_regridders = None

def __repr__(self):
return (
Expand Down Expand Up @@ -528,6 +567,14 @@ def read_control(self, control=None):
if 'read' in self.control_dict['analysis'].keys():
self.read = self.control_dict['analysis']['read']

# set time_chunking_with_gridded_data option, regrid option, and target_grid
if 'time_chunking_with_gridded_data' in self.control_dict['analysis'].keys():
self.time_chunking_with_gridded_data = self.control_dict['analysis']['time_chunking_with_gridded_data']
if 'regrid' in self.control_dict['analysis'].keys():
self.regrid = self.control_dict['analysis']['regrid']
if 'target_grid' in self.control_dict['analysis'].keys():
self.target_grid = self.control_dict['analysis']['target_grid']

# generate time intervals for time chunking
if 'time_interval' in self.control_dict['analysis'].keys():
time_stamps = pd.date_range(
Expand Down Expand Up @@ -601,6 +648,18 @@ def read_analysis(self):
elif self.read[attr]['method']=='netcdf':
read_saved_data(analysis=self,filenames=self.read[attr]['filenames'], method='netcdf', attr=attr)

def setup_regridders(self):
"""Create an obs xesmf.Regridder from base and target grids specified in the control_dict
Returns
-------
None
"""
from .util import regrid_util
if self.regrid:
self.obs_regridders = regrid_util.setup_regridder(self.control_dict, config_group='obs')
self.model_regridders = regrid_util.setup_regridder(self.control_dict, config_group='model')

def open_models(self, time_interval=None):
"""Open all models listed in the input yaml file and create a :class:`model`
object for each of them, populating the :attr:`models` dict.
Expand Down Expand Up @@ -682,9 +741,10 @@ def open_models(self, time_interval=None):
m.proj = ccrs.Projection(proj_in)

# open the model
m.open_model_files(time_interval=time_interval)
m.open_model_files(time_interval=time_interval, control_dict=self.control_dict)
self.models[m.label] = m


def open_obs(self, time_interval=None):
"""Open all observations listed in the input yaml file and create an
:class:`observation` instance for each of them,
Expand All @@ -700,6 +760,10 @@ def open_obs(self, time_interval=None):
-------
None
"""
from .util import analysis_util
from .util import read_grid_util
from .util import regrid_util

if 'obs' in self.control_dict:
for obs in self.control_dict['obs']:
o = observation()
Expand All @@ -712,9 +776,10 @@ def open_obs(self, time_interval=None):
self.control_dict['obs'][obs]['filename'])
if 'variables' in self.control_dict['obs'][obs].keys():
o.variable_dict = self.control_dict['obs'][obs]['variables']
o.open_obs(time_interval=time_interval)
o.open_obs(time_interval=time_interval, control_dict=self.control_dict)
self.obs[o.label] = o


def pair_data(self, time_interval=None):
"""Pair all observations and models in the analysis class
(i.e., those listed in the input yaml file) together,
Expand Down
Loading

0 comments on commit 58b6546

Please sign in to comment.