Skip to content

Commit

Permalink
Add new L2M processing
Browse files Browse the repository at this point in the history
  • Loading branch information
ghiggi committed Dec 23, 2024
1 parent 51c8df4 commit 5494f92
Show file tree
Hide file tree
Showing 14 changed files with 1,141 additions and 323 deletions.
8 changes: 4 additions & 4 deletions disdrodb/api/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def has_available_data(
sample_interval=None,
rolling=None,
# Option for L2M
distribution=None,
model_name=None,
):
"""Return ``True`` if data are available for the given product and station."""
# Define product directory
Expand All @@ -180,7 +180,7 @@ def has_available_data(
sample_interval=sample_interval,
rolling=rolling,
# Option for L2M
distribution=distribution,
model_name=model_name,
# Directory options
check_exists=False,
)
Expand All @@ -204,7 +204,7 @@ def check_data_availability(
sample_interval=None,
rolling=None,
# Option for L2M
distribution=None,
model_name=None,
):
"""Check the station product data directory has files inside. If not, raise an error."""
if not has_available_data(
Expand All @@ -217,7 +217,7 @@ def check_data_availability(
sample_interval=sample_interval,
rolling=rolling,
# Option for L2M
distribution=distribution,
model_name=model_name,
):
msg = f"The {product} station data directory of {data_source} {campaign_name} {station_name} is empty !"
logger.error(msg)
Expand Down
12 changes: 6 additions & 6 deletions disdrodb/api/create_directories.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def create_product_directory(
sample_interval=None,
rolling=None,
# Option for L2M
distribution=None,
model_name=None,
):
"""Initialize the directory structure for a DISDRODB product.
Expand Down Expand Up @@ -298,7 +298,7 @@ def create_product_directory(
sample_interval=sample_interval,
rolling=rolling,
# Option for L2M
distribution=distribution,
model_name=model_name,
)

# Check metadata file is available
Expand All @@ -321,7 +321,7 @@ def create_product_directory(
sample_interval=sample_interval,
rolling=rolling,
# Option for L2M
distribution=distribution,
model_name=model_name,
)

# Create required directory (if it doesn't exist)
Expand All @@ -338,7 +338,7 @@ def create_product_directory(
sample_interval=sample_interval,
rolling=rolling,
# Option for L2M
distribution=distribution,
model_name=model_name,
)

# If product files are already available:
Expand All @@ -361,7 +361,7 @@ def create_logs_directory(
sample_interval=None,
rolling=None,
# Option for L2M
distribution=None,
model_name=None,
):
"""Initialize the logs directory structure for a DISDRODB product."""
# Define logs directory
Expand All @@ -375,7 +375,7 @@ def create_logs_directory(
sample_interval=sample_interval,
rolling=rolling,
# Option for L2M
distribution=distribution,
model_name=model_name,
)

# Ensure empty log directory
Expand Down
8 changes: 4 additions & 4 deletions disdrodb/api/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def get_filepaths(
campaign_name,
station_name,
product,
distribution=None,
model_name=None,
sample_interval=None,
rolling=None,
debugging_mode: bool = False,
Expand All @@ -89,8 +89,8 @@ def get_filepaths(
rolling : bool, optional
Whether the dataset has been resampled by aggregating or rolling.
It must be specified only for product L2E and L2M !
distribution : str
The model of the statistical distribution for the DSD.
model_name : str
The model name of the statistical distribution for the DSD.
It must be specified only for product L2M !
debugging_mode : bool, optional
If ``True``, it select maximum 3 files for debugging purposes.
Expand All @@ -116,7 +116,7 @@ def get_filepaths(
sample_interval=sample_interval,
rolling=rolling,
# Options for L2M
distribution=distribution,
model_name=model_name,
)

# Define glob pattern
Expand Down
60 changes: 18 additions & 42 deletions disdrodb/api/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,13 +310,6 @@ def check_sample_interval(sample_interval):
raise ValueError("'sample_interval' must be an integer.")


def check_distribution(distribution):
"""Check distribution argument validity."""
valid_distributions = ["gamma", "normalized_gamma", "lognormal", "exponential"]
if distribution not in valid_distributions:
raise ValueError(f"Invalid 'distribution' {distribution}. Valid values are {valid_distributions}")


def check_rolling(rolling):
"""Check rolling argument validity."""
if not isinstance(rolling, bool):
Expand All @@ -325,7 +318,7 @@ def check_rolling(rolling):

def define_product_dir_tree(
product,
distribution=None,
model_name=None,
sample_interval=None,
rolling=None,
):
Expand All @@ -341,8 +334,8 @@ def define_product_dir_tree(
rolling : bool, optional
Whether the dataset has been resampled by aggregating or rolling.
It must be specified only for product L2E and L2M !
distribution : str
The model of the statistical distribution for the DSD.
model_name : str
The custom model name of the fitted statistical distribution.
It must be specified only for product L2M !
Returns
Expand All @@ -362,10 +355,8 @@ def define_product_dir_tree(
if product == "L2M":
check_rolling(rolling)
check_sample_interval(sample_interval)
check_distribution(distribution)
sample_interval_acronym = define_accumulation_acronym(seconds=sample_interval, rolling=rolling)
distribution_acronym = get_distribution_acronym(distribution)
return os.path.join(product, distribution_acronym, sample_interval_acronym)
return os.path.join(product, model_name, sample_interval_acronym)
raise ValueError(f"The product {product} is not defined.")


Expand Down Expand Up @@ -422,7 +413,7 @@ def define_data_dir_new(
data_source,
campaign_name,
station_name,
distribution=None,
model_name=None,
sample_interval=None,
rolling=None,
base_dir=None,
Expand Down Expand Up @@ -461,7 +452,7 @@ def define_data_dir_new(
)
product_dir_tree = define_product_dir_tree(
product=product,
distribution=distribution,
model_name=model_name,
sample_interval=sample_interval,
rolling=rolling,
)
Expand All @@ -476,7 +467,7 @@ def define_logs_dir(
data_source,
campaign_name,
station_name,
distribution=None,
model_name=None,
sample_interval=None,
rolling=None,
base_dir=None,
Expand Down Expand Up @@ -521,7 +512,7 @@ def define_logs_dir(
)
product_dir_tree = define_product_dir_tree(
product=product,
distribution=distribution,
model_name=model_name,
sample_interval=sample_interval,
rolling=rolling,
)
Expand All @@ -536,7 +527,7 @@ def define_data_dir(
data_source,
campaign_name,
station_name,
distribution=None,
model_name=None,
sample_interval=None,
rolling=None,
base_dir=None,
Expand Down Expand Up @@ -565,8 +556,8 @@ def define_data_dir(
rolling : bool, optional
Whether the dataset has been resampled by aggregating or rolling.
It must be specified only for product L2E and L2M !
distribution : str
The model of the statistical distribution for the DSD.
model_name : str
The name of the fitted statistical distribution for the DSD.
It must be specified only for product L2M !
Returns
Expand All @@ -592,10 +583,8 @@ def define_data_dir(
elif product == "L2M":
check_rolling(rolling)
check_sample_interval(sample_interval)
check_distribution(distribution)
sample_interval_acronym = define_accumulation_acronym(seconds=sample_interval, rolling=rolling)
distribution_acronym = get_distribution_acronym(distribution)
data_dir = os.path.join(station_dir, distribution_acronym, sample_interval_acronym)
data_dir = os.path.join(station_dir, model_name, sample_interval_acronym)
else:
raise ValueError("TODO") # CHECK Product on top !`
if check_exists:
Expand Down Expand Up @@ -667,17 +656,6 @@ def define_station_dir(
#### Filenames for DISDRODB products


def get_distribution_acronym(distribution):
"""Define DISDRODB L2M distribution acronym."""
acronym_dict = {
"lognorm": "LOGNORM",
"normalized_gamma": "NGAMMA",
"gamma": "GAMMA",
"exponential": "EXP",
}
return acronym_dict[distribution]


def define_accumulation_acronym(seconds, rolling):
"""Define the accumulation acronnym.
Expand All @@ -701,7 +679,7 @@ def define_filename(
sample_interval: Optional[int] = None,
rolling: Optional[bool] = None,
# L2M option
distribution: Optional[str] = None,
model_name: Optional[str] = None,
# Filename options
obj=None,
add_version=True,
Expand All @@ -728,8 +706,8 @@ def define_filename(
rolling : bool, optional
Whether the dataset has been resampled by aggregating or rolling.
It must be specified only for product L2E and L2M !
distribution : str
The model of the statistical distribution for the DSD.
model_name : str
The model name of the fitted statistical distribution for the DSD.
It must be specified only for product L2M !
Returns
Expand All @@ -753,8 +731,7 @@ def define_filename(
sample_interval_acronym = define_accumulation_acronym(seconds=sample_interval, rolling=rolling)
product_acronym = f"L2E.{sample_interval_acronym}"
if product in ["L2M"]:
distribution_acronym = get_distribution_acronym(distribution)
product_acronym = f"L2M_{distribution_acronym}.{sample_interval_acronym}"
product_acronym = f"L2M_{model_name}.{sample_interval_acronym}"

# -----------------------------------------.
# Define base filename
Expand Down Expand Up @@ -950,9 +927,9 @@ def define_l2m_filename(
ds,
campaign_name: str,
station_name: str,
distribution: str,
sample_interval: int,
rolling: bool,
model_name: str,
) -> str:
"""Define L2M file name.
Expand All @@ -973,14 +950,13 @@ def define_l2m_filename(
from disdrodb import PRODUCT_VERSION
from disdrodb.utils.xarray import get_dataset_start_end_time

distribution_acronym = get_distribution_acronym(distribution)
sample_interval_acronym = define_accumulation_acronym(seconds=sample_interval, rolling=rolling)
starting_time, ending_time = get_dataset_start_end_time(ds)
starting_time = pd.to_datetime(starting_time).strftime("%Y%m%d%H%M%S")
ending_time = pd.to_datetime(ending_time).strftime("%Y%m%d%H%M%S")
version = PRODUCT_VERSION
filename = (
f"L2M_{distribution_acronym}.{sample_interval_acronym}.{campaign_name}."
f"L2M_{model_name}.{sample_interval_acronym}.{campaign_name}."
+ f"{station_name}.s{starting_time}.e{ending_time}.{version}.nc"
)
return filename
Loading

0 comments on commit 5494f92

Please sign in to comment.