From e3a8816990b05f8626f6b5e0e400a3e465292d0f Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Fri, 26 Jul 2024 15:13:18 -0700 Subject: [PATCH 1/7] depth z should be negative! --- .../models/opendrift/utils.py | 211 ++++++++++++++++++ tests/test_manager.py | 15 +- 2 files changed, 222 insertions(+), 4 deletions(-) create mode 100644 particle_tracking_manager/models/opendrift/utils.py diff --git a/particle_tracking_manager/models/opendrift/utils.py b/particle_tracking_manager/models/opendrift/utils.py new file mode 100644 index 0000000..382c221 --- /dev/null +++ b/particle_tracking_manager/models/opendrift/utils.py @@ -0,0 +1,211 @@ +# following https://fsspec.github.io/kerchunk/test_example.html# +# env xp-inventory on lakes02 +# also installed ipython, h5py, cf_xarray, pynco +# copied to /mnt/vault/ciofs/HINDCAST/ciofs_kerchunk_to2012.json + +# import kerchunk.hdf +import fsspec +from pathlib import Path +from kerchunk.combine import MultiZarrToZarr +# from kerchunk.df import refs_to_dataframe +# import ujson +# import numpy as np + +def make_ciofs_kerchunk(start, end): + """_summary_ + + Parameters + ---------- + start, end : str + Should be something like "2004_0001" for YYYY_0DDD where DDD is dayofyear + to match the files in the directory, which are by year and day of year. + + Returns + ------- + kerchunk output + _description_ + """ + + output_dir_single_files = "/home/kristen/projects/kerchunk_setup/ciofs" + + fs2 = fsspec.filesystem('') # local file system to save final jsons to + + # select the single file Jsons to combine + json_list = sorted(fs2.glob(f"{output_dir_single_files}/*.json")) # combine single json files + json_list = [j for j in json_list if Path(j).stem>=start and Path(j).stem<=end] + + # Multi-file JSONs + # This code uses the output generated above to create a single ensemble dataset, + # with one set of references pointing to all of the chunks in the individual files. + # `coo_map = {"ocean_time": "cf:ocean_time"}` is necessary so that both the time + # values and units are read and interpreted instead of just the values. + + def fix_fill_values(out): + """Fix problem when fill_value and scara both equal 0.0. + + If the fill value and the scalar value are both 0, nan is filled instead. This fixes that. + """ + + for k in list(out): + if isinstance(out[k], str) and '"fill_value":0.0' in out[k]: + out[k] = out[k].replace('"fill_value":0.0', '"fill_value":"NaN"') + return out + + def postprocess(out): + out = fix_fill_values(out) + return out + + + mzz = MultiZarrToZarr( + json_list, + concat_dims=["ocean_time"], + identical_dims= ['lat_rho', 'lon_rho', "lon_psi", "lat_psi", + "lat_u", "lon_u", "lat_v", "lon_v", + "Akk_bak","Akp_bak","Akt_bak","Akv_bak","Cs_r","Cs_w", + "FSobc_in","FSobc_out","Falpha","Fbeta","Fgamma","Lm2CLM", + "Lm3CLM", "LnudgeM2CLM", "LnudgeM3CLM", "LnudgeTCLM", + "LsshCLM", "LtracerCLM", "LtracerSrc", "LuvSrc", + "LwSrc", "M2nudg", "M2obc_in", "M2obc_out", "M3nudg", + "M3obc_in", "M3obc_out", "Tcline", "Tnudg","Tobc_in", "Tobc_out", + "Vstretching", "Vtransform", "Znudg", "Zob", "Zos", "angle", + "dstart", "dt", "dtfast", "el", "f", "gamma2", "grid", "h", + "hc", "mask_psi", "mask_rho", "mask_u", "mask_v", "nHIS", "nRST", + "nSTA", "ndefHIS", "ndtfast", "ntimes", "pm", "pn", "rdrg", + "rdrg2", "rho0", "spherical", "theta_b", "theta_s", "xl", + ], + coo_map = {"ocean_time": "cf:ocean_time", + }, + postprocess=postprocess, + ) + + + # to keep in memory + out = mzz.translate() + + return out + + +def make_nwgoa_kerchunk(start, end): + """_summary_ + + Parameters + ---------- + start, end : str + Should be something like "1999-01-02" for YYYY-MM-DD + + Returns + ------- + kerchunk output + _description_ + """ + + # this version of the daily json files has the grid file merged + output_dir_single_files = "/home/kristen/projects/kerchunk_setup/nwgoa_with_grids" + + fs2 = fsspec.filesystem('') # local file system to save final jsons to + + # select the single file Jsons to combine + json_list = sorted(fs2.glob(f"{output_dir_single_files}/nwgoa*.json")) # combine single json files + json_list = [j for j in json_list if Path(j).stem.split("nwgoa_")[1]>=start and Path(j).stem.split("nwgoa_")[1]<=end] + + # Merge one of the model output files and the grid file as shown here, then + # saved the merged file, then used that in the MultiZarr below to get this to work + # https://fsspec.github.io/kerchunk/tutorial.html#merging-variables-across-jsons + # json_list += fs2.glob(f"{output_dir_single_files}/Cook_Inlet_grid_1.json") # combine single json files + + + + # include grid file + # grid_url_classic = Path("/mnt/depot/data/packrat/prod/aoos/nwgoa/Cook_Inlet_grid_1.nc") + # outf = out_base / Path(grid_url_classic.stem).with_suffix(".json") + # if not outf.exists(): + # refs_static = NetCDF3ToZarr(str(grid_url_classic)) + # with fs2.open(outf, 'wb') as f: + # f.write(ujson.dumps(refs_static.translate()).encode()); + + + # # I merged one of the model output files and the grid file as shown here, then + # # saved the merged file, then used that in the MultiZarr below to get this to work + # # https://fsspec.github.io/kerchunk/tutorial.html#merging-variables-across-jsons + # json_list = fs2.glob(f"{output_dir_single_files}/nwgoa_1999-01-02.json") # combine single json files + # json_list += fs2.glob(f"{output_dir_single_files}/Cook_Inlet_grid_1.json") # combine single json files + + # d = merge_vars(json_list) # returns dict, as does translate() + + +# # write merged version +# outf = out_base / pathlib.Path("nwgoa_1999-01-02.json") +# if not outf.exists(): +# # refs_static = NetCDF3ToZarr(str(grid_url_classic)) +# with fs2.open(outf, 'wb') as f: +# f.write(ujson.dumps(d).encode()); + + + + # Multi-file JSONs + # This code uses the output generated above to create a single ensemble dataset, + # with one set of references pointing to all of the chunks in the individual files. + # `coo_map = {"ocean_time": "cf:ocean_time"}` is necessary so that both the time + # values and units are read and interpreted instead of just the values. + + + # account for double compression + # Look at individual variables in the files to see what needs to be changed with + # h5dump -d ocean_time -p /mnt/depot/data/packrat/prod/aoos/nwgoa/processed/1999/nwgoa_1999-02-01.nc + def preprocess(refs): + for k in list(refs): + if k.endswith('/.zarray'): + refs[k] = refs[k].replace('"filters":[{"elementsize":8,"id":"shuffle"}]', + '"filters":[{"elementsize":8,"id":"shuffle"},{"id": "zlib", "level":8}]') + refs[k] = refs[k].replace('"filters":[{"elementsize":4,"id":"shuffle"}]', + '"filters":[{"elementsize":4,"id":"shuffle"},{"id": "zlib", "level":8}]') + return refs + + import zarr + def add_time_attr(out): + out_ = zarr.open(out) + out_.ocean_time.attrs["axis"] = "T" + return out + + def postprocess(out): + out = add_time_attr(out) + return out + + + mzz = MultiZarrToZarr( + json_list, + concat_dims=["ocean_time"], + identical_dims= ['lat_rho', 'lon_rho', "lon_psi", "lat_psi", + "lat_u", "lon_u", "lat_v", "lon_v", + "Akk_bak","Akp_bak","Akt_bak","Akv_bak","Cs_r","Cs_w", + "FSobc_in","FSobc_out","Falpha","Fbeta","Fgamma","Lm2CLM", + "Lm3CLM", "LnudgeM2CLM", "LnudgeM3CLM", "LnudgeTCLM", + "LsshCLM", "LtracerCLM", "LtracerSrc", "LuvSrc", + "LwSrc", "M2nudg", "M2obc_in", "M2obc_out", "M3nudg", + "M3obc_in", "M3obc_out", "Tcline", "Tnudg","Tobc_in", "Tobc_out", + "Vstretching", "Vtransform", "Znudg", "Zob", "Zos", "angle", + "dstart", "dt", "dtfast", "el", "f", "gamma2", "grid", "h", + "hc", "mask_psi", "mask_rho", "mask_u", "mask_v", "nHIS", "nRST", + "nSTA", "ndefHIS", "ndtfast", "ntimes", "pm", "pn", "rdrg", + "rdrg2", "rho0", "spherical", "theta_b", "theta_s", "xl", + "Charnok_alpha", "CrgBan_cw", "JLTS", "JPRJ", "LuvSponge", "P1", "P2", "P3", "P4", + "PLAT", "PLONG", "ROTA", "XOFF", "YOFF", "Zos_hsig_alpha", "depthmax", "depthmin", + "dfdy", "dmde", "dndx", "f0", "gls_Kmin", "gls_Pmin", "gls_c1", "gls_c2", + "gls_c3m", "gls_c3p", "gls_cmu0", "gls_m", "gls_n", "gls_p", "gls_sigk", "gls_sigp", + "h_mask", "hraw", "nAVG", "ndefAVG", "nl_visc2", "ntsAVG", "sz_alpha", "wtype_grid", + "x_psi", "x_rho", "x_u", "x_v", "y_psi", "y_rho", "y_u", "y_v", + ], + coo_map = {"ocean_time": "cf:ocean_time", + # "eta_rho": list(np.arange(1044)) + }, + preprocess=preprocess, + postprocess=postprocess, + ) + + # to keep in memory + out = mzz.translate() + # import pdb; pdb.set_trace() + # merge_vars([out, fs2.glob(f"{output_dir_single_files}/Cook_Inlet_grid_1.json")]) + # d = merge_vars(json_list) # returns dict, as does translate() + + return out diff --git a/tests/test_manager.py b/tests/test_manager.py index ecd36ee..f0dd42e 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -12,6 +12,13 @@ import particle_tracking_manager as ptm +def test_z_sign(): + """z should be negative""" + + with pytest.raises(ValueError): + m = ptm.OpenDriftModel(z=1) + + def test_order(): """Have to configure before seeding.""" @@ -207,7 +214,7 @@ def test_setattr_oceanmodel_lon0_360(): def test_setattr_surface_only(): """Test setting surface_only attribute.""" - manager = ptm.OpenDriftModel(do3D=True, z=1, vertical_mixing=True) + manager = ptm.OpenDriftModel(do3D=True, z=-1, vertical_mixing=True) manager.surface_only = True assert manager.do3D == False assert manager.z == 0 @@ -297,7 +304,7 @@ def test_surface_only_true(self): self.m.surface_only = True self.m.do3D = True self.assertEqual(self.m.do3D, False) - self.m.z = 10 + self.m.z = -10 self.assertEqual(self.m.z, 0) self.m.vertical_mixing = True self.assertEqual(self.m.vertical_mixing, False) @@ -319,8 +326,8 @@ def test_seed_seafloor_true(self): self.assertIsNone(self.m.z) def test_z_set(self): - self.m.z = 10 - self.assertEqual(self.m.z, 10) + self.m.z = -10 + self.assertEqual(self.m.z, -10) self.assertFalse(self.m.seed_seafloor) def test_has_added_reader_true_ocean_model_set(self): From c34abe6c27cfa46db74a736fce8bf488589589cc Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Fri, 26 Jul 2024 15:14:27 -0700 Subject: [PATCH 2/7] added start_time_end which adds OpenDrift capability for starting drifters over linear time frame --- particle_tracking_manager/the_manager_config.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/particle_tracking_manager/the_manager_config.json b/particle_tracking_manager/the_manager_config.json index 7962fa6..595371e 100644 --- a/particle_tracking_manager/the_manager_config.json +++ b/particle_tracking_manager/the_manager_config.json @@ -56,6 +56,15 @@ "description": "Start time for drifter simulation.", "ptm_level": 1 }, + "start_time_end": { + "type": "datetime.datetime", + "default": "None", + "min": "datetime.datetime(1999,1,1)", + "max": "pd.Timestamp.now() + pd.Timedelta(\"48H\")", + "units": "time", + "description": "If used, this creates a range of start times for drifters, starting with `start_time` and ending with `start_time_end`. Drifters will be initialized linearly between the two start times.", + "ptm_level": 2 + }, "run_forward": { "type": "bool", "default": true, From 5812946106108bfc408076e312c68ee39e954a44 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Fri, 26 Jul 2024 15:15:13 -0700 Subject: [PATCH 3/7] same adds to manager but also for log file each run --- particle_tracking_manager/the_manager.py | 43 +++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/particle_tracking_manager/the_manager.py b/particle_tracking_manager/the_manager.py index cc8c5f8..4dc6577 100644 --- a/particle_tracking_manager/the_manager.py +++ b/particle_tracking_manager/the_manager.py @@ -61,6 +61,7 @@ class ParticleTrackingManager: Depth of initial drifter locations, by default 0 but taken from the default in the model. Values are overridden if ``surface_only==True`` to 0 and to the seabed if ``seed_seafloor`` is True. + Depth is negative downward in OpenDrift. seed_seafloor : bool, optional Set to True to seed drifters vertically at the seabed, default is False. If True then value of z is set to None and ignored. @@ -68,6 +69,10 @@ class ParticleTrackingManager: Number of drifters to simulate. Default is 100. start_time : Optional[str,datetime.datetime,pd.Timestamp], optional Start time of simulation, by default None + start_time_end : Optional[str,datetime.datetime,pd.Timestamp], optional + If not None, this creates a range of start times for drifters, starting with + `start_time` and ending with `start_time_end`. Drifters will be initialized linearly + between the two start times. Default None. run_forward : bool, optional True to run forward in time, False to run backward, by default True time_step : int, optional @@ -130,7 +135,7 @@ class ParticleTrackingManager: dataset before inputting to PTM. Setting this to True may save computation time but will be less accurate, especially in the tidal flat regions of the model. output_file : Optional[str], optional - Name of output file to save, by default None. If None, default is set in the model. + Name of output file to save, by default None. If None, default is set in the model. With ".nc" suffix. Notes ----- @@ -145,6 +150,7 @@ class ParticleTrackingManager: surface_only: Optional[bool] z: Optional[Union[int, float]] start_time: Optional[Union[str, datetime.datetime, pd.Timestamp]] + start_time_end: Optional[Union[str, datetime.datetime, pd.Timestamp]] steps: Optional[int] time_step: int duration: Optional[datetime.timedelta] @@ -165,6 +171,7 @@ def __init__( seed_seafloor: bool = config_ptm["seed_seafloor"]["default"], number: int = config_ptm["number"]["default"], start_time: Optional[Union[str, datetime.datetime, pd.Timestamp]] = None, + start_time_end: Optional[Union[str, datetime.datetime, pd.Timestamp]] = None, run_forward: bool = config_ptm["run_forward"]["default"], time_step: int = config_ptm["time_step"]["default"], time_step_output: Optional[int] = config_ptm["time_step_output"]["default"], @@ -219,6 +226,29 @@ def __init__( self.kw = kw + if self.__dict__["output_file"] is None: + self.__dict__[ + "output_file" + ] = f"output-results_{datetime.datetime.now():%Y-%m-%dT%H%M:%SZ}.nc" + + ## set up log for this simulation + # Create a file handler + assert self.__dict__["output_file"] is not None + logfile_name = self.__dict__["output_file"].replace(".nc", ".log") + self.file_handler = logging.FileHandler(logfile_name) + + # Create a formatter and add it to the handler + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + self.file_handler.setFormatter(formatter) + + # Add the handler to the logger + self.logger.addHandler(self.file_handler) + + self.logger.info(f"filename: {logfile_name}") + ## + def __setattr_model__(self, name: str, value) -> None: """Implement this in model class to add specific __setattr__ there too.""" pass @@ -341,6 +371,12 @@ def __setattr__(self, name: str, value) -> None: self.z = 0 self.vertical_mixing = False + # z should be a negative value, representing below the surface + if name == "z" and value > 0: + raise ValueError( + "z should be a negative value, representing below the surface." + ) + # in case any of these are reset by user after surface_only is already set # if surface_only is True, do3D must be False if name == "do3D" and value and self.surface_only: @@ -501,6 +537,11 @@ def run(self): ) self.run_drifters() + + # Remove the handler at the end of the loop + self.logger.removeHandler(self.file_handler) + self.file_handler.close() + self.has_run = True def run_all(self): From f9ad4e012cd98d265ac5898e817eed62541ba2e8 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Fri, 26 Jul 2024 15:15:39 -0700 Subject: [PATCH 4/7] small fix to hist plot --- particle_tracking_manager/plotting.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/particle_tracking_manager/plotting.py b/particle_tracking_manager/plotting.py index 9bed02b..c6e567e 100644 --- a/particle_tracking_manager/plotting.py +++ b/particle_tracking_manager/plotting.py @@ -3,14 +3,23 @@ import opendrift -def plot_dest(o, filename): - """This is copied from an opendrift example.""" +def plot_dest(o, filename=None, **kwargs): + """This is copied from an opendrift example. + + ...and subsequently modified. + + Parameters + ---------- + kwargs : dict + Additional keyword arguments to pass to the plotting function. + """ import cmocean cmap = cmocean.tools.crop_by_percent(cmocean.cm.amp, 20, which="max", N=None) od = opendrift.open_xarray(o.outfile_name) + density = od.get_histogram(pixelsize_m=5000).isel(time=-1).isel(origin_marker=0) density = density.where(density > 0) density = density / density.sum() * 100 @@ -26,4 +35,5 @@ def plot_dest(o, filename): cmap=cmap, fast=True, filename=filename, + **kwargs ) From 87c15418541b93a6cc3af12269d1229623e0b718 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Fri, 26 Jul 2024 15:18:04 -0700 Subject: [PATCH 5/7] fixed times, added local model kerchunk files now local model output for CIOFS and NWGOA is generated from utils functions which is faster, way faster for CIOFS. --- .../models/opendrift/opendrift.py | 44 +++++++++++++++---- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/particle_tracking_manager/models/opendrift/opendrift.py b/particle_tracking_manager/models/opendrift/opendrift.py index edeaedf..1873997 100644 --- a/particle_tracking_manager/models/opendrift/opendrift.py +++ b/particle_tracking_manager/models/opendrift/opendrift.py @@ -26,6 +26,7 @@ from ...cli import is_None from ...the_manager import _KNOWN_MODELS, ParticleTrackingManager +from .utils import make_ciofs_kerchunk, make_nwgoa_kerchunk # from .cli import is_None @@ -661,8 +662,11 @@ def run_add_reader( "hraw", "snow_thick", ] + start = f"{self.start_time.year}-{str(self.start_time.month).zfill(2)}-{str(self.start_time.day).zfill(2)}" + end = f"{self.end_time.year}-{str(self.end_time.month).zfill(2)}-{str(self.end_time.day).zfill(2)}" + loc_local = make_nwgoa_kerchunk(start=start, end=end) - loc_local = "/mnt/depot/data/packrat/prod/aoos/nwgoa/processed/nwgoa_kerchunk.parq" + # loc_local = "/mnt/depot/data/packrat/prod/aoos/nwgoa/processed/nwgoa_kerchunk.parq" loc_remote = ( "http://xpublish-nwgoa.srv.axds.co/datasets/nwgoa_all/zarr/" ) @@ -674,8 +678,14 @@ def run_add_reader( "wetdry_mask_psi", ] if self.ocean_model == "CIOFS": - - loc_local = "/mnt/vault/ciofs/HINDCAST/ciofs_kerchunk.parq" + start = f"{self.start_time.year}_{str(self.start_time.dayofyear - 1).zfill(4)}" + end = ( + f"{self.end_time.year}_{str(self.end_time.dayofyear).zfill(4)}" + ) + loc_local = make_ciofs_kerchunk(start=start, end=end) + # loc_local = make_ciofs_kerchunk(start="2005_0052", end="2005_0068") + # loc_local = "/mnt/vault/ciofs/HINDCAST/ciofs_kerchunk_2005.parq" + # loc_local = "/mnt/vault/ciofs/HINDCAST/ciofs_kerchunk.parq" loc_remote = "http://xpublish-ciofs.srv.axds.co/datasets/ciofs_hindcast/zarr/" elif self.ocean_model == "CIOFSOP": @@ -762,11 +772,19 @@ def run_add_reader( dt_model = float( ds.ocean_time[1] - ds.ocean_time[0] ) # time step of the model output in seconds - start_time_num = (self.start_time - units_date).total_seconds() + # want to include the next ocean model output before the first drifter simulation time + # in case it starts before model times + start_time_num = ( + self.start_time - units_date + ).total_seconds() - dt_model # want to include the next ocean model output after the last drifter simulation time end_time_num = (self.end_time - units_date).total_seconds() + dt_model ds = ds.sel(ocean_time=slice(start_time_num, end_time_num)) self.logger.info("Narrowed model output to simulation time") + if len(ds.ocean_time) == 0: + raise ValueError( + "No model output left for simulation time. Check start_time and end_time." + ) else: raise ValueError( "start_time and end_time must be set to narrow model output to simulation time" @@ -823,10 +841,19 @@ def seed_kws(self): "drift:truncate_ocean_model_below_m", ] + if self.start_time_end is not None: + # time can be a list to start drifters linearly in time + time = [ + self.start_time.to_pydatetime(), + self.start_time_end.to_pydatetime(), + ] + elif self.start_time is not None: + time = self.start_time.to_pydatetime() + else: + time = None + _seed_kws = { - "time": self.start_time.to_pydatetime() - if self.start_time is not None - else None, + "time": time, "z": self.z, } @@ -861,7 +888,6 @@ def run_seed(self): """Actually seed drifters for model.""" if self.seed_flag == "elements": - self.o.seed_elements(**self.seed_kws) elif self.seed_flag == "geojson": @@ -900,7 +926,7 @@ def run_drifters(self): output_file_initial = ( f"{self.output_file}_initial" - or f"output-results_{datetime.datetime.utcnow():%Y-%m-%dT%H%M:%SZ}.nc" + or f"output-results_{datetime.datetime.now():%Y-%m-%dT%H%M:%SZ}.nc" ) self.o.run( From 07b8dbe5df3d9a384ccb8de691a8426e351fdb98 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Fri, 26 Jul 2024 15:29:28 -0700 Subject: [PATCH 6/7] updated whats new --- docs/whats_new.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/whats_new.md b/docs/whats_new.md index c93ebb1..0d4853d 100644 --- a/docs/whats_new.md +++ b/docs/whats_new.md @@ -1,5 +1,13 @@ # What's New +## v0.9.0 (July 26, 2024) + +* Added utilities to generate kerchunk files on the fly for the time period of the simulation length for CIOFS and NWGOA. This has majorly sped up CIOFS simulations and modestly sped up NWGOA simulations. +* depth z should be negative! Fixed this in tests. +* added `start_time_end`, which adds OpenDrift capability for starting drifters over linear time frame +* fixed so unique log file is output for each simulation even if run in a script, and has the same name as `output_file`. +* small fix to hist plot + ## v0.8.4 (April 24, 2024) * updated the `ptm_level` of a bunch of config parameters From e5b1536f7b900a718d38d44edb7e0bca4e81b068 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Fri, 26 Jul 2024 15:34:50 -0700 Subject: [PATCH 7/7] precommit --- docs/whats_new.md | 2 +- .../models/opendrift/utils.py | 351 ++++++++++++------ 2 files changed, 245 insertions(+), 108 deletions(-) diff --git a/docs/whats_new.md b/docs/whats_new.md index 0d4853d..0039602 100644 --- a/docs/whats_new.md +++ b/docs/whats_new.md @@ -6,7 +6,7 @@ * depth z should be negative! Fixed this in tests. * added `start_time_end`, which adds OpenDrift capability for starting drifters over linear time frame * fixed so unique log file is output for each simulation even if run in a script, and has the same name as `output_file`. -* small fix to hist plot +* small fix to histogram plot ## v0.8.4 (April 24, 2024) diff --git a/particle_tracking_manager/models/opendrift/utils.py b/particle_tracking_manager/models/opendrift/utils.py index 382c221..7c4443a 100644 --- a/particle_tracking_manager/models/opendrift/utils.py +++ b/particle_tracking_manager/models/opendrift/utils.py @@ -3,13 +3,12 @@ # also installed ipython, h5py, cf_xarray, pynco # copied to /mnt/vault/ciofs/HINDCAST/ciofs_kerchunk_to2012.json -# import kerchunk.hdf -import fsspec from pathlib import Path + +import fsspec + from kerchunk.combine import MultiZarrToZarr -# from kerchunk.df import refs_to_dataframe -# import ujson -# import numpy as np + def make_ciofs_kerchunk(start, end): """_summary_ @@ -28,21 +27,23 @@ def make_ciofs_kerchunk(start, end): output_dir_single_files = "/home/kristen/projects/kerchunk_setup/ciofs" - fs2 = fsspec.filesystem('') # local file system to save final jsons to + fs2 = fsspec.filesystem("") # local file system to save final jsons to # select the single file Jsons to combine - json_list = sorted(fs2.glob(f"{output_dir_single_files}/*.json")) # combine single json files - json_list = [j for j in json_list if Path(j).stem>=start and Path(j).stem<=end] + json_list = sorted( + fs2.glob(f"{output_dir_single_files}/*.json") + ) # combine single json files + json_list = [j for j in json_list if Path(j).stem >= start and Path(j).stem <= end] # Multi-file JSONs - # This code uses the output generated above to create a single ensemble dataset, + # This code uses the output generated above to create a single ensemble dataset, # with one set of references pointing to all of the chunks in the individual files. # `coo_map = {"ocean_time": "cf:ocean_time"}` is necessary so that both the time # values and units are read and interpreted instead of just the values. def fix_fill_values(out): """Fix problem when fill_value and scara both equal 0.0. - + If the fill value and the scalar value are both 0, nan is filled instead. This fixes that. """ @@ -52,33 +53,94 @@ def fix_fill_values(out): return out def postprocess(out): + """postprocess function to fix fill values""" out = fix_fill_values(out) return out - mzz = MultiZarrToZarr( json_list, concat_dims=["ocean_time"], - identical_dims= ['lat_rho', 'lon_rho', "lon_psi", "lat_psi", - "lat_u", "lon_u", "lat_v", "lon_v", - "Akk_bak","Akp_bak","Akt_bak","Akv_bak","Cs_r","Cs_w", - "FSobc_in","FSobc_out","Falpha","Fbeta","Fgamma","Lm2CLM", - "Lm3CLM", "LnudgeM2CLM", "LnudgeM3CLM", "LnudgeTCLM", - "LsshCLM", "LtracerCLM", "LtracerSrc", "LuvSrc", - "LwSrc", "M2nudg", "M2obc_in", "M2obc_out", "M3nudg", - "M3obc_in", "M3obc_out", "Tcline", "Tnudg","Tobc_in", "Tobc_out", - "Vstretching", "Vtransform", "Znudg", "Zob", "Zos", "angle", - "dstart", "dt", "dtfast", "el", "f", "gamma2", "grid", "h", - "hc", "mask_psi", "mask_rho", "mask_u", "mask_v", "nHIS", "nRST", - "nSTA", "ndefHIS", "ndtfast", "ntimes", "pm", "pn", "rdrg", - "rdrg2", "rho0", "spherical", "theta_b", "theta_s", "xl", - ], - coo_map = {"ocean_time": "cf:ocean_time", - }, + identical_dims=[ + "lat_rho", + "lon_rho", + "lon_psi", + "lat_psi", + "lat_u", + "lon_u", + "lat_v", + "lon_v", + "Akk_bak", + "Akp_bak", + "Akt_bak", + "Akv_bak", + "Cs_r", + "Cs_w", + "FSobc_in", + "FSobc_out", + "Falpha", + "Fbeta", + "Fgamma", + "Lm2CLM", + "Lm3CLM", + "LnudgeM2CLM", + "LnudgeM3CLM", + "LnudgeTCLM", + "LsshCLM", + "LtracerCLM", + "LtracerSrc", + "LuvSrc", + "LwSrc", + "M2nudg", + "M2obc_in", + "M2obc_out", + "M3nudg", + "M3obc_in", + "M3obc_out", + "Tcline", + "Tnudg", + "Tobc_in", + "Tobc_out", + "Vstretching", + "Vtransform", + "Znudg", + "Zob", + "Zos", + "angle", + "dstart", + "dt", + "dtfast", + "el", + "f", + "gamma2", + "grid", + "h", + "hc", + "mask_psi", + "mask_rho", + "mask_u", + "mask_v", + "nHIS", + "nRST", + "nSTA", + "ndefHIS", + "ndtfast", + "ntimes", + "pm", + "pn", + "rdrg", + "rdrg2", + "rho0", + "spherical", + "theta_b", + "theta_s", + "xl", + ], + coo_map={ + "ocean_time": "cf:ocean_time", + }, postprocess=postprocess, ) - # to keep in memory out = mzz.translate() @@ -91,7 +153,7 @@ def make_nwgoa_kerchunk(start, end): Parameters ---------- start, end : str - Should be something like "1999-01-02" for YYYY-MM-DD + Should be something like "1999-01-02" for YYYY-MM-DD Returns ------- @@ -102,110 +164,185 @@ def make_nwgoa_kerchunk(start, end): # this version of the daily json files has the grid file merged output_dir_single_files = "/home/kristen/projects/kerchunk_setup/nwgoa_with_grids" - fs2 = fsspec.filesystem('') # local file system to save final jsons to + fs2 = fsspec.filesystem("") # local file system to save final jsons to # select the single file Jsons to combine - json_list = sorted(fs2.glob(f"{output_dir_single_files}/nwgoa*.json")) # combine single json files - json_list = [j for j in json_list if Path(j).stem.split("nwgoa_")[1]>=start and Path(j).stem.split("nwgoa_")[1]<=end] - - # Merge one of the model output files and the grid file as shown here, then - # saved the merged file, then used that in the MultiZarr below to get this to work - # https://fsspec.github.io/kerchunk/tutorial.html#merging-variables-across-jsons - # json_list += fs2.glob(f"{output_dir_single_files}/Cook_Inlet_grid_1.json") # combine single json files - - - - # include grid file - # grid_url_classic = Path("/mnt/depot/data/packrat/prod/aoos/nwgoa/Cook_Inlet_grid_1.nc") - # outf = out_base / Path(grid_url_classic.stem).with_suffix(".json") - # if not outf.exists(): - # refs_static = NetCDF3ToZarr(str(grid_url_classic)) - # with fs2.open(outf, 'wb') as f: - # f.write(ujson.dumps(refs_static.translate()).encode()); - - - # # I merged one of the model output files and the grid file as shown here, then - # # saved the merged file, then used that in the MultiZarr below to get this to work - # # https://fsspec.github.io/kerchunk/tutorial.html#merging-variables-across-jsons - # json_list = fs2.glob(f"{output_dir_single_files}/nwgoa_1999-01-02.json") # combine single json files - # json_list += fs2.glob(f"{output_dir_single_files}/Cook_Inlet_grid_1.json") # combine single json files - - # d = merge_vars(json_list) # returns dict, as does translate() - - -# # write merged version -# outf = out_base / pathlib.Path("nwgoa_1999-01-02.json") -# if not outf.exists(): -# # refs_static = NetCDF3ToZarr(str(grid_url_classic)) -# with fs2.open(outf, 'wb') as f: -# f.write(ujson.dumps(d).encode()); - - - - # Multi-file JSONs - # This code uses the output generated above to create a single ensemble dataset, - # with one set of references pointing to all of the chunks in the individual files. - # `coo_map = {"ocean_time": "cf:ocean_time"}` is necessary so that both the time - # values and units are read and interpreted instead of just the values. - - - # account for double compression + json_list = sorted( + fs2.glob(f"{output_dir_single_files}/nwgoa*.json") + ) # combine single json files + json_list = [ + j + for j in json_list + if Path(j).stem.split("nwgoa_")[1] >= start + and Path(j).stem.split("nwgoa_")[1] <= end + ] + + # account for double compression # Look at individual variables in the files to see what needs to be changed with # h5dump -d ocean_time -p /mnt/depot/data/packrat/prod/aoos/nwgoa/processed/1999/nwgoa_1999-02-01.nc def preprocess(refs): + """preprocess function to fix fill values""" for k in list(refs): - if k.endswith('/.zarray'): - refs[k] = refs[k].replace('"filters":[{"elementsize":8,"id":"shuffle"}]', - '"filters":[{"elementsize":8,"id":"shuffle"},{"id": "zlib", "level":8}]') - refs[k] = refs[k].replace('"filters":[{"elementsize":4,"id":"shuffle"}]', - '"filters":[{"elementsize":4,"id":"shuffle"},{"id": "zlib", "level":8}]') + if k.endswith("/.zarray"): + refs[k] = refs[k].replace( + '"filters":[{"elementsize":8,"id":"shuffle"}]', + '"filters":[{"elementsize":8,"id":"shuffle"},{"id": "zlib", "level":8}]', + ) + refs[k] = refs[k].replace( + '"filters":[{"elementsize":4,"id":"shuffle"}]', + '"filters":[{"elementsize":4,"id":"shuffle"},{"id": "zlib", "level":8}]', + ) return refs import zarr + def add_time_attr(out): + """add time attributes to the ocean_time variable""" out_ = zarr.open(out) out_.ocean_time.attrs["axis"] = "T" return out def postprocess(out): + """postprocess function to fix fill values""" out = add_time_attr(out) return out - mzz = MultiZarrToZarr( json_list, concat_dims=["ocean_time"], - identical_dims= ['lat_rho', 'lon_rho', "lon_psi", "lat_psi", - "lat_u", "lon_u", "lat_v", "lon_v", - "Akk_bak","Akp_bak","Akt_bak","Akv_bak","Cs_r","Cs_w", - "FSobc_in","FSobc_out","Falpha","Fbeta","Fgamma","Lm2CLM", - "Lm3CLM", "LnudgeM2CLM", "LnudgeM3CLM", "LnudgeTCLM", - "LsshCLM", "LtracerCLM", "LtracerSrc", "LuvSrc", - "LwSrc", "M2nudg", "M2obc_in", "M2obc_out", "M3nudg", - "M3obc_in", "M3obc_out", "Tcline", "Tnudg","Tobc_in", "Tobc_out", - "Vstretching", "Vtransform", "Znudg", "Zob", "Zos", "angle", - "dstart", "dt", "dtfast", "el", "f", "gamma2", "grid", "h", - "hc", "mask_psi", "mask_rho", "mask_u", "mask_v", "nHIS", "nRST", - "nSTA", "ndefHIS", "ndtfast", "ntimes", "pm", "pn", "rdrg", - "rdrg2", "rho0", "spherical", "theta_b", "theta_s", "xl", - "Charnok_alpha", "CrgBan_cw", "JLTS", "JPRJ", "LuvSponge", "P1", "P2", "P3", "P4", - "PLAT", "PLONG", "ROTA", "XOFF", "YOFF", "Zos_hsig_alpha", "depthmax", "depthmin", - "dfdy", "dmde", "dndx", "f0", "gls_Kmin", "gls_Pmin", "gls_c1", "gls_c2", - "gls_c3m", "gls_c3p", "gls_cmu0", "gls_m", "gls_n", "gls_p", "gls_sigk", "gls_sigp", - "h_mask", "hraw", "nAVG", "ndefAVG", "nl_visc2", "ntsAVG", "sz_alpha", "wtype_grid", - "x_psi", "x_rho", "x_u", "x_v", "y_psi", "y_rho", "y_u", "y_v", - ], - coo_map = {"ocean_time": "cf:ocean_time", - # "eta_rho": list(np.arange(1044)) - }, + identical_dims=[ + "lat_rho", + "lon_rho", + "lon_psi", + "lat_psi", + "lat_u", + "lon_u", + "lat_v", + "lon_v", + "Akk_bak", + "Akp_bak", + "Akt_bak", + "Akv_bak", + "Cs_r", + "Cs_w", + "FSobc_in", + "FSobc_out", + "Falpha", + "Fbeta", + "Fgamma", + "Lm2CLM", + "Lm3CLM", + "LnudgeM2CLM", + "LnudgeM3CLM", + "LnudgeTCLM", + "LsshCLM", + "LtracerCLM", + "LtracerSrc", + "LuvSrc", + "LwSrc", + "M2nudg", + "M2obc_in", + "M2obc_out", + "M3nudg", + "M3obc_in", + "M3obc_out", + "Tcline", + "Tnudg", + "Tobc_in", + "Tobc_out", + "Vstretching", + "Vtransform", + "Znudg", + "Zob", + "Zos", + "angle", + "dstart", + "dt", + "dtfast", + "el", + "f", + "gamma2", + "grid", + "h", + "hc", + "mask_psi", + "mask_rho", + "mask_u", + "mask_v", + "nHIS", + "nRST", + "nSTA", + "ndefHIS", + "ndtfast", + "ntimes", + "pm", + "pn", + "rdrg", + "rdrg2", + "rho0", + "spherical", + "theta_b", + "theta_s", + "xl", + "Charnok_alpha", + "CrgBan_cw", + "JLTS", + "JPRJ", + "LuvSponge", + "P1", + "P2", + "P3", + "P4", + "PLAT", + "PLONG", + "ROTA", + "XOFF", + "YOFF", + "Zos_hsig_alpha", + "depthmax", + "depthmin", + "dfdy", + "dmde", + "dndx", + "f0", + "gls_Kmin", + "gls_Pmin", + "gls_c1", + "gls_c2", + "gls_c3m", + "gls_c3p", + "gls_cmu0", + "gls_m", + "gls_n", + "gls_p", + "gls_sigk", + "gls_sigp", + "h_mask", + "hraw", + "nAVG", + "ndefAVG", + "nl_visc2", + "ntsAVG", + "sz_alpha", + "wtype_grid", + "x_psi", + "x_rho", + "x_u", + "x_v", + "y_psi", + "y_rho", + "y_u", + "y_v", + ], + coo_map={ + "ocean_time": "cf:ocean_time", + # "eta_rho": list(np.arange(1044)) + }, preprocess=preprocess, postprocess=postprocess, ) # to keep in memory out = mzz.translate() - # import pdb; pdb.set_trace() - # merge_vars([out, fs2.glob(f"{output_dir_single_files}/Cook_Inlet_grid_1.json")]) - # d = merge_vars(json_list) # returns dict, as does translate() return out