diff --git a/docs/whats_new.md b/docs/whats_new.md index c93ebb1..0039602 100644 --- a/docs/whats_new.md +++ b/docs/whats_new.md @@ -1,5 +1,13 @@ # What's New +## v0.9.0 (July 26, 2024) + +* Added utilities to generate kerchunk files on the fly for the time period of the simulation length for CIOFS and NWGOA. This has majorly sped up CIOFS simulations and modestly sped up NWGOA simulations. +* depth z should be negative! Fixed this in tests. +* added `start_time_end`, which adds OpenDrift capability for starting drifters over linear time frame +* fixed so unique log file is output for each simulation even if run in a script, and has the same name as `output_file`. +* small fix to histogram plot + ## v0.8.4 (April 24, 2024) * updated the `ptm_level` of a bunch of config parameters diff --git a/particle_tracking_manager/models/opendrift/opendrift.py b/particle_tracking_manager/models/opendrift/opendrift.py index edeaedf..1873997 100644 --- a/particle_tracking_manager/models/opendrift/opendrift.py +++ b/particle_tracking_manager/models/opendrift/opendrift.py @@ -26,6 +26,7 @@ from ...cli import is_None from ...the_manager import _KNOWN_MODELS, ParticleTrackingManager +from .utils import make_ciofs_kerchunk, make_nwgoa_kerchunk # from .cli import is_None @@ -661,8 +662,11 @@ def run_add_reader( "hraw", "snow_thick", ] + start = f"{self.start_time.year}-{str(self.start_time.month).zfill(2)}-{str(self.start_time.day).zfill(2)}" + end = f"{self.end_time.year}-{str(self.end_time.month).zfill(2)}-{str(self.end_time.day).zfill(2)}" + loc_local = make_nwgoa_kerchunk(start=start, end=end) - loc_local = "/mnt/depot/data/packrat/prod/aoos/nwgoa/processed/nwgoa_kerchunk.parq" + # loc_local = "/mnt/depot/data/packrat/prod/aoos/nwgoa/processed/nwgoa_kerchunk.parq" loc_remote = ( "http://xpublish-nwgoa.srv.axds.co/datasets/nwgoa_all/zarr/" ) @@ -674,8 +678,14 @@ def run_add_reader( "wetdry_mask_psi", ] if self.ocean_model == "CIOFS": - - loc_local = "/mnt/vault/ciofs/HINDCAST/ciofs_kerchunk.parq" + start = f"{self.start_time.year}_{str(self.start_time.dayofyear - 1).zfill(4)}" + end = ( + f"{self.end_time.year}_{str(self.end_time.dayofyear).zfill(4)}" + ) + loc_local = make_ciofs_kerchunk(start=start, end=end) + # loc_local = make_ciofs_kerchunk(start="2005_0052", end="2005_0068") + # loc_local = "/mnt/vault/ciofs/HINDCAST/ciofs_kerchunk_2005.parq" + # loc_local = "/mnt/vault/ciofs/HINDCAST/ciofs_kerchunk.parq" loc_remote = "http://xpublish-ciofs.srv.axds.co/datasets/ciofs_hindcast/zarr/" elif self.ocean_model == "CIOFSOP": @@ -762,11 +772,19 @@ def run_add_reader( dt_model = float( ds.ocean_time[1] - ds.ocean_time[0] ) # time step of the model output in seconds - start_time_num = (self.start_time - units_date).total_seconds() + # want to include the next ocean model output before the first drifter simulation time + # in case it starts before model times + start_time_num = ( + self.start_time - units_date + ).total_seconds() - dt_model # want to include the next ocean model output after the last drifter simulation time end_time_num = (self.end_time - units_date).total_seconds() + dt_model ds = ds.sel(ocean_time=slice(start_time_num, end_time_num)) self.logger.info("Narrowed model output to simulation time") + if len(ds.ocean_time) == 0: + raise ValueError( + "No model output left for simulation time. Check start_time and end_time." + ) else: raise ValueError( "start_time and end_time must be set to narrow model output to simulation time" @@ -823,10 +841,19 @@ def seed_kws(self): "drift:truncate_ocean_model_below_m", ] + if self.start_time_end is not None: + # time can be a list to start drifters linearly in time + time = [ + self.start_time.to_pydatetime(), + self.start_time_end.to_pydatetime(), + ] + elif self.start_time is not None: + time = self.start_time.to_pydatetime() + else: + time = None + _seed_kws = { - "time": self.start_time.to_pydatetime() - if self.start_time is not None - else None, + "time": time, "z": self.z, } @@ -861,7 +888,6 @@ def run_seed(self): """Actually seed drifters for model.""" if self.seed_flag == "elements": - self.o.seed_elements(**self.seed_kws) elif self.seed_flag == "geojson": @@ -900,7 +926,7 @@ def run_drifters(self): output_file_initial = ( f"{self.output_file}_initial" - or f"output-results_{datetime.datetime.utcnow():%Y-%m-%dT%H%M:%SZ}.nc" + or f"output-results_{datetime.datetime.now():%Y-%m-%dT%H%M:%SZ}.nc" ) self.o.run( diff --git a/particle_tracking_manager/models/opendrift/utils.py b/particle_tracking_manager/models/opendrift/utils.py new file mode 100644 index 0000000..7c4443a --- /dev/null +++ b/particle_tracking_manager/models/opendrift/utils.py @@ -0,0 +1,348 @@ +# following https://fsspec.github.io/kerchunk/test_example.html# +# env xp-inventory on lakes02 +# also installed ipython, h5py, cf_xarray, pynco +# copied to /mnt/vault/ciofs/HINDCAST/ciofs_kerchunk_to2012.json + +from pathlib import Path + +import fsspec + +from kerchunk.combine import MultiZarrToZarr + + +def make_ciofs_kerchunk(start, end): + """_summary_ + + Parameters + ---------- + start, end : str + Should be something like "2004_0001" for YYYY_0DDD where DDD is dayofyear + to match the files in the directory, which are by year and day of year. + + Returns + ------- + kerchunk output + _description_ + """ + + output_dir_single_files = "/home/kristen/projects/kerchunk_setup/ciofs" + + fs2 = fsspec.filesystem("") # local file system to save final jsons to + + # select the single file Jsons to combine + json_list = sorted( + fs2.glob(f"{output_dir_single_files}/*.json") + ) # combine single json files + json_list = [j for j in json_list if Path(j).stem >= start and Path(j).stem <= end] + + # Multi-file JSONs + # This code uses the output generated above to create a single ensemble dataset, + # with one set of references pointing to all of the chunks in the individual files. + # `coo_map = {"ocean_time": "cf:ocean_time"}` is necessary so that both the time + # values and units are read and interpreted instead of just the values. + + def fix_fill_values(out): + """Fix problem when fill_value and scara both equal 0.0. + + If the fill value and the scalar value are both 0, nan is filled instead. This fixes that. + """ + + for k in list(out): + if isinstance(out[k], str) and '"fill_value":0.0' in out[k]: + out[k] = out[k].replace('"fill_value":0.0', '"fill_value":"NaN"') + return out + + def postprocess(out): + """postprocess function to fix fill values""" + out = fix_fill_values(out) + return out + + mzz = MultiZarrToZarr( + json_list, + concat_dims=["ocean_time"], + identical_dims=[ + "lat_rho", + "lon_rho", + "lon_psi", + "lat_psi", + "lat_u", + "lon_u", + "lat_v", + "lon_v", + "Akk_bak", + "Akp_bak", + "Akt_bak", + "Akv_bak", + "Cs_r", + "Cs_w", + "FSobc_in", + "FSobc_out", + "Falpha", + "Fbeta", + "Fgamma", + "Lm2CLM", + "Lm3CLM", + "LnudgeM2CLM", + "LnudgeM3CLM", + "LnudgeTCLM", + "LsshCLM", + "LtracerCLM", + "LtracerSrc", + "LuvSrc", + "LwSrc", + "M2nudg", + "M2obc_in", + "M2obc_out", + "M3nudg", + "M3obc_in", + "M3obc_out", + "Tcline", + "Tnudg", + "Tobc_in", + "Tobc_out", + "Vstretching", + "Vtransform", + "Znudg", + "Zob", + "Zos", + "angle", + "dstart", + "dt", + "dtfast", + "el", + "f", + "gamma2", + "grid", + "h", + "hc", + "mask_psi", + "mask_rho", + "mask_u", + "mask_v", + "nHIS", + "nRST", + "nSTA", + "ndefHIS", + "ndtfast", + "ntimes", + "pm", + "pn", + "rdrg", + "rdrg2", + "rho0", + "spherical", + "theta_b", + "theta_s", + "xl", + ], + coo_map={ + "ocean_time": "cf:ocean_time", + }, + postprocess=postprocess, + ) + + # to keep in memory + out = mzz.translate() + + return out + + +def make_nwgoa_kerchunk(start, end): + """_summary_ + + Parameters + ---------- + start, end : str + Should be something like "1999-01-02" for YYYY-MM-DD + + Returns + ------- + kerchunk output + _description_ + """ + + # this version of the daily json files has the grid file merged + output_dir_single_files = "/home/kristen/projects/kerchunk_setup/nwgoa_with_grids" + + fs2 = fsspec.filesystem("") # local file system to save final jsons to + + # select the single file Jsons to combine + json_list = sorted( + fs2.glob(f"{output_dir_single_files}/nwgoa*.json") + ) # combine single json files + json_list = [ + j + for j in json_list + if Path(j).stem.split("nwgoa_")[1] >= start + and Path(j).stem.split("nwgoa_")[1] <= end + ] + + # account for double compression + # Look at individual variables in the files to see what needs to be changed with + # h5dump -d ocean_time -p /mnt/depot/data/packrat/prod/aoos/nwgoa/processed/1999/nwgoa_1999-02-01.nc + def preprocess(refs): + """preprocess function to fix fill values""" + for k in list(refs): + if k.endswith("/.zarray"): + refs[k] = refs[k].replace( + '"filters":[{"elementsize":8,"id":"shuffle"}]', + '"filters":[{"elementsize":8,"id":"shuffle"},{"id": "zlib", "level":8}]', + ) + refs[k] = refs[k].replace( + '"filters":[{"elementsize":4,"id":"shuffle"}]', + '"filters":[{"elementsize":4,"id":"shuffle"},{"id": "zlib", "level":8}]', + ) + return refs + + import zarr + + def add_time_attr(out): + """add time attributes to the ocean_time variable""" + out_ = zarr.open(out) + out_.ocean_time.attrs["axis"] = "T" + return out + + def postprocess(out): + """postprocess function to fix fill values""" + out = add_time_attr(out) + return out + + mzz = MultiZarrToZarr( + json_list, + concat_dims=["ocean_time"], + identical_dims=[ + "lat_rho", + "lon_rho", + "lon_psi", + "lat_psi", + "lat_u", + "lon_u", + "lat_v", + "lon_v", + "Akk_bak", + "Akp_bak", + "Akt_bak", + "Akv_bak", + "Cs_r", + "Cs_w", + "FSobc_in", + "FSobc_out", + "Falpha", + "Fbeta", + "Fgamma", + "Lm2CLM", + "Lm3CLM", + "LnudgeM2CLM", + "LnudgeM3CLM", + "LnudgeTCLM", + "LsshCLM", + "LtracerCLM", + "LtracerSrc", + "LuvSrc", + "LwSrc", + "M2nudg", + "M2obc_in", + "M2obc_out", + "M3nudg", + "M3obc_in", + "M3obc_out", + "Tcline", + "Tnudg", + "Tobc_in", + "Tobc_out", + "Vstretching", + "Vtransform", + "Znudg", + "Zob", + "Zos", + "angle", + "dstart", + "dt", + "dtfast", + "el", + "f", + "gamma2", + "grid", + "h", + "hc", + "mask_psi", + "mask_rho", + "mask_u", + "mask_v", + "nHIS", + "nRST", + "nSTA", + "ndefHIS", + "ndtfast", + "ntimes", + "pm", + "pn", + "rdrg", + "rdrg2", + "rho0", + "spherical", + "theta_b", + "theta_s", + "xl", + "Charnok_alpha", + "CrgBan_cw", + "JLTS", + "JPRJ", + "LuvSponge", + "P1", + "P2", + "P3", + "P4", + "PLAT", + "PLONG", + "ROTA", + "XOFF", + "YOFF", + "Zos_hsig_alpha", + "depthmax", + "depthmin", + "dfdy", + "dmde", + "dndx", + "f0", + "gls_Kmin", + "gls_Pmin", + "gls_c1", + "gls_c2", + "gls_c3m", + "gls_c3p", + "gls_cmu0", + "gls_m", + "gls_n", + "gls_p", + "gls_sigk", + "gls_sigp", + "h_mask", + "hraw", + "nAVG", + "ndefAVG", + "nl_visc2", + "ntsAVG", + "sz_alpha", + "wtype_grid", + "x_psi", + "x_rho", + "x_u", + "x_v", + "y_psi", + "y_rho", + "y_u", + "y_v", + ], + coo_map={ + "ocean_time": "cf:ocean_time", + # "eta_rho": list(np.arange(1044)) + }, + preprocess=preprocess, + postprocess=postprocess, + ) + + # to keep in memory + out = mzz.translate() + + return out diff --git a/particle_tracking_manager/plotting.py b/particle_tracking_manager/plotting.py index 9bed02b..c6e567e 100644 --- a/particle_tracking_manager/plotting.py +++ b/particle_tracking_manager/plotting.py @@ -3,14 +3,23 @@ import opendrift -def plot_dest(o, filename): - """This is copied from an opendrift example.""" +def plot_dest(o, filename=None, **kwargs): + """This is copied from an opendrift example. + + ...and subsequently modified. + + Parameters + ---------- + kwargs : dict + Additional keyword arguments to pass to the plotting function. + """ import cmocean cmap = cmocean.tools.crop_by_percent(cmocean.cm.amp, 20, which="max", N=None) od = opendrift.open_xarray(o.outfile_name) + density = od.get_histogram(pixelsize_m=5000).isel(time=-1).isel(origin_marker=0) density = density.where(density > 0) density = density / density.sum() * 100 @@ -26,4 +35,5 @@ def plot_dest(o, filename): cmap=cmap, fast=True, filename=filename, + **kwargs ) diff --git a/particle_tracking_manager/the_manager.py b/particle_tracking_manager/the_manager.py index cc8c5f8..4dc6577 100644 --- a/particle_tracking_manager/the_manager.py +++ b/particle_tracking_manager/the_manager.py @@ -61,6 +61,7 @@ class ParticleTrackingManager: Depth of initial drifter locations, by default 0 but taken from the default in the model. Values are overridden if ``surface_only==True`` to 0 and to the seabed if ``seed_seafloor`` is True. + Depth is negative downward in OpenDrift. seed_seafloor : bool, optional Set to True to seed drifters vertically at the seabed, default is False. If True then value of z is set to None and ignored. @@ -68,6 +69,10 @@ class ParticleTrackingManager: Number of drifters to simulate. Default is 100. start_time : Optional[str,datetime.datetime,pd.Timestamp], optional Start time of simulation, by default None + start_time_end : Optional[str,datetime.datetime,pd.Timestamp], optional + If not None, this creates a range of start times for drifters, starting with + `start_time` and ending with `start_time_end`. Drifters will be initialized linearly + between the two start times. Default None. run_forward : bool, optional True to run forward in time, False to run backward, by default True time_step : int, optional @@ -130,7 +135,7 @@ class ParticleTrackingManager: dataset before inputting to PTM. Setting this to True may save computation time but will be less accurate, especially in the tidal flat regions of the model. output_file : Optional[str], optional - Name of output file to save, by default None. If None, default is set in the model. + Name of output file to save, by default None. If None, default is set in the model. With ".nc" suffix. Notes ----- @@ -145,6 +150,7 @@ class ParticleTrackingManager: surface_only: Optional[bool] z: Optional[Union[int, float]] start_time: Optional[Union[str, datetime.datetime, pd.Timestamp]] + start_time_end: Optional[Union[str, datetime.datetime, pd.Timestamp]] steps: Optional[int] time_step: int duration: Optional[datetime.timedelta] @@ -165,6 +171,7 @@ def __init__( seed_seafloor: bool = config_ptm["seed_seafloor"]["default"], number: int = config_ptm["number"]["default"], start_time: Optional[Union[str, datetime.datetime, pd.Timestamp]] = None, + start_time_end: Optional[Union[str, datetime.datetime, pd.Timestamp]] = None, run_forward: bool = config_ptm["run_forward"]["default"], time_step: int = config_ptm["time_step"]["default"], time_step_output: Optional[int] = config_ptm["time_step_output"]["default"], @@ -219,6 +226,29 @@ def __init__( self.kw = kw + if self.__dict__["output_file"] is None: + self.__dict__[ + "output_file" + ] = f"output-results_{datetime.datetime.now():%Y-%m-%dT%H%M:%SZ}.nc" + + ## set up log for this simulation + # Create a file handler + assert self.__dict__["output_file"] is not None + logfile_name = self.__dict__["output_file"].replace(".nc", ".log") + self.file_handler = logging.FileHandler(logfile_name) + + # Create a formatter and add it to the handler + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + self.file_handler.setFormatter(formatter) + + # Add the handler to the logger + self.logger.addHandler(self.file_handler) + + self.logger.info(f"filename: {logfile_name}") + ## + def __setattr_model__(self, name: str, value) -> None: """Implement this in model class to add specific __setattr__ there too.""" pass @@ -341,6 +371,12 @@ def __setattr__(self, name: str, value) -> None: self.z = 0 self.vertical_mixing = False + # z should be a negative value, representing below the surface + if name == "z" and value > 0: + raise ValueError( + "z should be a negative value, representing below the surface." + ) + # in case any of these are reset by user after surface_only is already set # if surface_only is True, do3D must be False if name == "do3D" and value and self.surface_only: @@ -501,6 +537,11 @@ def run(self): ) self.run_drifters() + + # Remove the handler at the end of the loop + self.logger.removeHandler(self.file_handler) + self.file_handler.close() + self.has_run = True def run_all(self): diff --git a/particle_tracking_manager/the_manager_config.json b/particle_tracking_manager/the_manager_config.json index 7962fa6..595371e 100644 --- a/particle_tracking_manager/the_manager_config.json +++ b/particle_tracking_manager/the_manager_config.json @@ -56,6 +56,15 @@ "description": "Start time for drifter simulation.", "ptm_level": 1 }, + "start_time_end": { + "type": "datetime.datetime", + "default": "None", + "min": "datetime.datetime(1999,1,1)", + "max": "pd.Timestamp.now() + pd.Timedelta(\"48H\")", + "units": "time", + "description": "If used, this creates a range of start times for drifters, starting with `start_time` and ending with `start_time_end`. Drifters will be initialized linearly between the two start times.", + "ptm_level": 2 + }, "run_forward": { "type": "bool", "default": true, diff --git a/tests/test_manager.py b/tests/test_manager.py index ecd36ee..f0dd42e 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -12,6 +12,13 @@ import particle_tracking_manager as ptm +def test_z_sign(): + """z should be negative""" + + with pytest.raises(ValueError): + m = ptm.OpenDriftModel(z=1) + + def test_order(): """Have to configure before seeding.""" @@ -207,7 +214,7 @@ def test_setattr_oceanmodel_lon0_360(): def test_setattr_surface_only(): """Test setting surface_only attribute.""" - manager = ptm.OpenDriftModel(do3D=True, z=1, vertical_mixing=True) + manager = ptm.OpenDriftModel(do3D=True, z=-1, vertical_mixing=True) manager.surface_only = True assert manager.do3D == False assert manager.z == 0 @@ -297,7 +304,7 @@ def test_surface_only_true(self): self.m.surface_only = True self.m.do3D = True self.assertEqual(self.m.do3D, False) - self.m.z = 10 + self.m.z = -10 self.assertEqual(self.m.z, 0) self.m.vertical_mixing = True self.assertEqual(self.m.vertical_mixing, False) @@ -319,8 +326,8 @@ def test_seed_seafloor_true(self): self.assertIsNone(self.m.z) def test_z_set(self): - self.m.z = 10 - self.assertEqual(self.m.z, 10) + self.m.z = -10 + self.assertEqual(self.m.z, -10) self.assertFalse(self.m.seed_seafloor) def test_has_added_reader_true_ocean_model_set(self):