From 16abdbeddc557eb6275660eca7821ba0f30e47b2 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Sat, 11 Nov 2023 00:46:38 +0100 Subject: [PATCH] Define path.py module --- disdrodb/api/configs.py | 2 +- disdrodb/api/io.py | 247 +--------- disdrodb/api/path.py | 425 ++++++++++++++++++ disdrodb/data_transfer/upload_data.py | 2 +- disdrodb/l0/check_readers.py | 2 +- disdrodb/l0/create_directories.py | 2 +- disdrodb/l0/io.py | 197 +------- disdrodb/l0/l0_processing.py | 20 +- disdrodb/l0/routines.py | 2 +- disdrodb/metadata/io.py | 6 +- disdrodb/tests/conftest.py | 2 +- disdrodb/tests/test_api/test_api_checks.py | 37 +- disdrodb/tests/test_api/test_api_path.py | 133 ++++++ disdrodb/tests/test_l0/test_cmd_processing.py | 2 +- .../tests/test_l0/test_create_directories.py | 2 +- disdrodb/tests/test_l0/test_io.py | 112 +---- disdrodb/tests/test_l0/test_l0b_concat.py | 2 +- .../tests/test_metadata/test_metadata_io.py | 9 +- .../tests/test_utils/test_utils_pandas.py | 33 ++ .../tests/test_utils/test_utils_xarray.py | 50 +++ disdrodb/utils/compression.py | 2 +- disdrodb/utils/pandas.py | 41 ++ disdrodb/utils/xarray.py | 40 ++ disdrodb/utils/yaml.py | 22 +- tutorials/reader_preparation.ipynb | 2 +- 25 files changed, 795 insertions(+), 599 deletions(-) create mode 100644 disdrodb/api/path.py create mode 100644 disdrodb/tests/test_api/test_api_path.py create mode 100644 disdrodb/tests/test_utils/test_utils_pandas.py create mode 100644 disdrodb/tests/test_utils/test_utils_xarray.py create mode 100644 disdrodb/utils/pandas.py create mode 100644 disdrodb/utils/xarray.py diff --git a/disdrodb/api/configs.py b/disdrodb/api/configs.py index ee18c76e..e335b173 100644 --- a/disdrodb/api/configs.py +++ b/disdrodb/api/configs.py @@ -22,7 +22,7 @@ import os from disdrodb.api.checks import check_product, check_sensor_name -from disdrodb.api.io import define_config_dir +from disdrodb.api.path import define_config_dir from disdrodb.utils.yaml import read_yaml logger = logging.getLogger(__name__) diff --git a/disdrodb/api/io.py b/disdrodb/api/io.py index d0719909..47289e23 100644 --- a/disdrodb/api/io.py +++ b/disdrodb/api/io.py @@ -23,252 +23,9 @@ import numpy as np from disdrodb.api.checks import check_product +from disdrodb.api.path import get_disdrodb_path from disdrodb.configs import get_base_dir -from disdrodb.utils.directories import check_directory_exists, count_files, list_directories, list_files - - -def get_disdrodb_path( - base_dir, - product, - data_source="", - campaign_name="", - check_exists=True, -): - """Return the directory in the DISDRODB infrastructure. - - If data_source and campaign_name are not specified it return the product directory. - If data_source is specified, it returns the data_source directory. - If campaign_source is specified, it returns the campaign_name directory. - - Parameters - ---------- - base_dir : str - The disdrodb base directory - product : str - The DISDRODB product. It can be "RAW", "L0A", or "L0B". - data_source : str, optional - The data source. Must be specified if campaign_name is specified. - campaign_name : str, optional - The campaign_name. - check_exists : bool, optional - Whether to check if the directory exists. By default True. - - Returns - ------- - dir_path : str - Directory path - """ - from disdrodb.api.checks import check_base_dir - from disdrodb.utils.directories import check_directory_exists - - # Check base_dir validity - base_dir = check_base_dir(base_dir) - if len(campaign_name) > 0: - if len(data_source) == 0: - raise ValueError("If campaign_name is specified, data_source must be specified.") - - # Get directory - if product.upper() == "RAW": - dir_path = os.path.join(base_dir, "Raw", data_source, campaign_name) - else: - dir_path = os.path.join(base_dir, "Processed", data_source, campaign_name) - if check_exists: - check_directory_exists(dir_path) - return dir_path - - -def define_campaign_dir( - product, - data_source, - campaign_name, - base_dir=None, - check_exists=False, -): - """Return the campaign directory in the DISDRODB infrastructure. - - Parameters - ---------- - product : str - The DISDRODB product. It can be "RAW", "L0A", or "L0B". - data_source : str - The data source. Must be specified if campaign_name is specified. - campaign_name : str - The campaign_name. - base_dir : str, optional - The base directory of DISDRODB, expected in the format ``<...>/DISDRODB``. - If not specified, the path specified in the DISDRODB active configuration will be used. - check_exists : bool, optional - Whether to check if the directory exists. By default False. - - Returns - ------- - station_dir : str - Station data directory path - """ - base_dir = get_base_dir(base_dir) - campaign_dir = get_disdrodb_path( - base_dir=base_dir, - product=product, - data_source=data_source, - campaign_name=campaign_name, - check_exists=check_exists, - ) - return str(campaign_dir) - - -def define_station_dir( - product, - data_source, - campaign_name, - station_name, - base_dir=None, - check_exists=False, -): - """Return the station data directory in the DISDRODB infrastructure. - - Parameters - ---------- - product : str - The DISDRODB product. It can be "RAW", "L0A", or "L0B". - data_source : str - The data source. - campaign_name : str - The campaign name. - station_name : str - The station name. - base_dir : str, optional - The base directory of DISDRODB, expected in the format ``<...>/DISDRODB``. - If not specified, the path specified in the DISDRODB active configuration will be used. - check_exists : bool, optional - Whether to check if the directory exists. By default False. - - Returns - ------- - station_dir : str - Station data directory path - """ - base_dir = get_base_dir(base_dir) - campaign_dir = get_disdrodb_path( - base_dir=base_dir, - product=product, - data_source=data_source, - campaign_name=campaign_name, - check_exists=check_exists, - ) - if product.upper() == "RAW": - station_dir = os.path.join(campaign_dir, "data", station_name) - else: - station_dir = os.path.join(campaign_dir, product, station_name) - if check_exists: - check_directory_exists(station_dir) - return str(station_dir) - - -def define_metadata_dir( - product, - data_source, - campaign_name, - base_dir=None, - check_exists=False, -): - """Return the metadata directory in the DISDRODB infrastructure. - - Parameters - ---------- - product : str - The DISDRODB product. It can be "RAW", "L0A", or "L0B". - data_source : str - The data source. - campaign_name : str - The campaign name. - base_dir : str, optional - The base directory of DISDRODB, expected in the format ``<...>/DISDRODB``. - If not specified, the path specified in the DISDRODB active configuration will be used. - check_exists : bool, optional - Whether to check if the directory exists. By default False. - - Returns - ------- - metadata_dir : str - Station data directory path - """ - base_dir = get_base_dir(base_dir) - campaign_dir = define_campaign_dir( - base_dir=base_dir, - product=product, - data_source=data_source, - campaign_name=campaign_name, - check_exists=check_exists, - ) - if product.upper() == "RAW": - metadata_dir = os.path.join(campaign_dir, "metadata") - else: - metadata_dir = os.path.join(campaign_dir, "metadata") - if check_exists: - check_directory_exists(metadata_dir) - return str(metadata_dir) - - -def define_metadata_filepath( - product, - data_source, - campaign_name, - station_name, - base_dir=None, - check_exists=False, -): - """Return the station metadata filepath in the DISDRODB infrastructure. - - Parameters - ---------- - product : str - The DISDRODB product. It can be "RAW", "L0A", or "L0B". - data_source : str - The data source. - campaign_name : str - The campaign name. - station_name : str - The station name. - base_dir : str, optional - The base directory of DISDRODB, expected in the format ``<...>/DISDRODB``. - If not specified, the path specified in the DISDRODB active configuration will be used. - check_exists : bool, optional - Whether to check if the directory exists. By default False. - - Returns - ------- - metadata_dir : str - Station data directory path - """ - base_dir = get_base_dir(base_dir) - metadata_dir = define_metadata_dir( - base_dir=base_dir, - product=product, - data_source=data_source, - campaign_name=campaign_name, - check_exists=False, - ) - metadata_filepath = os.path.join(metadata_dir, f"{station_name}.yml") - - if check_exists and not os.path.exists(metadata_filepath): - raise ValueError(f"The metadata file for {station_name} at {metadata_filepath} does not exists.") - - return str(metadata_filepath) - - -def define_config_dir(product): - """Define the config directory path of a given DISDRODB product.""" - from disdrodb import __root_path__ - - if product.upper() in ["RAW", "L0A", "L0B"]: - dir_name = "l0" - else: - raise NotImplementedError(f"Product {product} not implemented.") - config_dir = os.path.join(__root_path__, "disdrodb", dir_name, "configs") - return config_dir - - -####---------------------------------------------------------------------------. +from disdrodb.utils.directories import count_files, list_directories, list_files def _get_list_stations_dirs(product, campaign_dir): diff --git a/disdrodb/api/path.py b/disdrodb/api/path.py new file mode 100644 index 00000000..b8b8159f --- /dev/null +++ b/disdrodb/api/path.py @@ -0,0 +1,425 @@ +#!/usr/bin/env python3 + +# -----------------------------------------------------------------------------. +# Copyright (c) 2021-2023 DISDRODB developers +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# -----------------------------------------------------------------------------. +"""Define paths within the DISDRODB infrastructure.""" + +import os +import pandas as pd +import xarray as xr + +from disdrodb.configs import get_base_dir +from disdrodb.utils.directories import check_directory_exists +from disdrodb.api.info import infer_campaign_name_from_path + +####--------------------------------------------------------------------------. +#### Paths from BASE_DIR + +def get_disdrodb_path( + base_dir, + product, + data_source="", + campaign_name="", + check_exists=True, +): + """Return the directory in the DISDRODB infrastructure. + + If data_source and campaign_name are not specified it return the product directory. + If data_source is specified, it returns the data_source directory. + If campaign_source is specified, it returns the campaign_name directory. + + Parameters + ---------- + base_dir : str + The disdrodb base directory + product : str + The DISDRODB product. It can be "RAW", "L0A", or "L0B". + data_source : str, optional + The data source. Must be specified if campaign_name is specified. + campaign_name : str, optional + The campaign_name. + check_exists : bool, optional + Whether to check if the directory exists. By default True. + + Returns + ------- + dir_path : str + Directory path + """ + from disdrodb.api.checks import check_base_dir + from disdrodb.utils.directories import check_directory_exists + + # Check base_dir validity + base_dir = check_base_dir(base_dir) + if len(campaign_name) > 0: + if len(data_source) == 0: + raise ValueError("If campaign_name is specified, data_source must be specified.") + + # Get directory + if product.upper() == "RAW": + dir_path = os.path.join(base_dir, "Raw", data_source, campaign_name) + else: + dir_path = os.path.join(base_dir, "Processed", data_source, campaign_name) + if check_exists: + check_directory_exists(dir_path) + return dir_path + + +def define_campaign_dir( + product, + data_source, + campaign_name, + base_dir=None, + check_exists=False, +): + """Return the campaign directory in the DISDRODB infrastructure. + + Parameters + ---------- + product : str + The DISDRODB product. It can be "RAW", "L0A", or "L0B". + data_source : str + The data source. Must be specified if campaign_name is specified. + campaign_name : str + The campaign_name. + base_dir : str, optional + The base directory of DISDRODB, expected in the format ``<...>/DISDRODB``. + If not specified, the path specified in the DISDRODB active configuration will be used. + check_exists : bool, optional + Whether to check if the directory exists. By default False. + + Returns + ------- + station_dir : str + Station data directory path + """ + base_dir = get_base_dir(base_dir) + campaign_dir = get_disdrodb_path( + base_dir=base_dir, + product=product, + data_source=data_source, + campaign_name=campaign_name, + check_exists=check_exists, + ) + return str(campaign_dir) + + +def define_station_dir( + product, + data_source, + campaign_name, + station_name, + base_dir=None, + check_exists=False, +): + """Return the station data directory in the DISDRODB infrastructure. + + Parameters + ---------- + product : str + The DISDRODB product. It can be "RAW", "L0A", or "L0B". + data_source : str + The data source. + campaign_name : str + The campaign name. + station_name : str + The station name. + base_dir : str, optional + The base directory of DISDRODB, expected in the format ``<...>/DISDRODB``. + If not specified, the path specified in the DISDRODB active configuration will be used. + check_exists : bool, optional + Whether to check if the directory exists. By default False. + + Returns + ------- + station_dir : str + Station data directory path + """ + base_dir = get_base_dir(base_dir) + campaign_dir = get_disdrodb_path( + base_dir=base_dir, + product=product, + data_source=data_source, + campaign_name=campaign_name, + check_exists=check_exists, + ) + if product.upper() == "RAW": + station_dir = os.path.join(campaign_dir, "data", station_name) + else: + station_dir = os.path.join(campaign_dir, product, station_name) + if check_exists: + check_directory_exists(station_dir) + return str(station_dir) + + +def define_metadata_dir( + product, + data_source, + campaign_name, + base_dir=None, + check_exists=False, +): + """Return the metadata directory in the DISDRODB infrastructure. + + Parameters + ---------- + product : str + The DISDRODB product. It can be "RAW", "L0A", or "L0B". + data_source : str + The data source. + campaign_name : str + The campaign name. + base_dir : str, optional + The base directory of DISDRODB, expected in the format ``<...>/DISDRODB``. + If not specified, the path specified in the DISDRODB active configuration will be used. + check_exists : bool, optional + Whether to check if the directory exists. By default False. + + Returns + ------- + metadata_dir : str + Station data directory path + """ + base_dir = get_base_dir(base_dir) + campaign_dir = define_campaign_dir( + base_dir=base_dir, + product=product, + data_source=data_source, + campaign_name=campaign_name, + check_exists=check_exists, + ) + if product.upper() == "RAW": + metadata_dir = os.path.join(campaign_dir, "metadata") + else: + metadata_dir = os.path.join(campaign_dir, "metadata") + if check_exists: + check_directory_exists(metadata_dir) + return str(metadata_dir) + + +def define_metadata_filepath( + product, + data_source, + campaign_name, + station_name, + base_dir=None, + check_exists=False, +): + """Return the station metadata filepath in the DISDRODB infrastructure. + + Parameters + ---------- + product : str + The DISDRODB product. It can be "RAW", "L0A", or "L0B". + data_source : str + The data source. + campaign_name : str + The campaign name. + station_name : str + The station name. + base_dir : str, optional + The base directory of DISDRODB, expected in the format ``<...>/DISDRODB``. + If not specified, the path specified in the DISDRODB active configuration will be used. + check_exists : bool, optional + Whether to check if the directory exists. By default False. + + Returns + ------- + metadata_dir : str + Station data directory path + """ + base_dir = get_base_dir(base_dir) + metadata_dir = define_metadata_dir( + base_dir=base_dir, + product=product, + data_source=data_source, + campaign_name=campaign_name, + check_exists=False, + ) + metadata_filepath = os.path.join(metadata_dir, f"{station_name}.yml") + + if check_exists and not os.path.exists(metadata_filepath): + raise ValueError(f"The metadata file for {station_name} at {metadata_filepath} does not exists.") + + return str(metadata_filepath) + + +def define_config_dir(product): + """Define the config directory path of a given DISDRODB product.""" + from disdrodb import __root_path__ + + if product.upper() in ["RAW", "L0A", "L0B"]: + dir_name = "l0" + else: + raise NotImplementedError(f"Product {product} not implemented.") + config_dir = os.path.join(__root_path__, "disdrodb", dir_name, "configs") + return config_dir + + +####--------------------------------------------------------------------------. +#### Directory/Filepaths L0A and L0B products + + +def define_l0a_station_dir(processed_dir: str, station_name: str) -> str: + """Define L0A directory. + + Parameters + ---------- + processed_dir : str + Path of the processed directory + station_name : str + Name of the station + + Returns + ------- + str + L0A directory path. + """ + station_dir = os.path.join(processed_dir, "L0A", station_name) + return station_dir + + +def define_l0b_station_dir(processed_dir: str, station_name: str) -> str: + """Define L0B directory. + + Parameters + ---------- + processed_dir : str + Path of the processed directory + station_name : int + Name of the station + + Returns + ------- + str + Path of the L0B directory + """ + station_dir = os.path.join(processed_dir, "L0B", station_name) + return station_dir + + +def define_l0a_filename(df, processed_dir, station_name: str) -> str: + """Define L0A file name. + + Parameters + ---------- + df : pd.DataFrame + L0A DataFrame + processed_dir : str + Path of the processed directory + station_name : str + Name of the station + + Returns + ------- + str + L0A file name. + """ + from disdrodb.l0.standards import PRODUCT_VERSION + from disdrodb.utils.pandas import get_dataframe_start_end_time + + starting_time, ending_time = get_dataframe_start_end_time(df) + starting_time = pd.to_datetime(starting_time).strftime("%Y%m%d%H%M%S") + ending_time = pd.to_datetime(ending_time).strftime("%Y%m%d%H%M%S") + campaign_name = infer_campaign_name_from_path(processed_dir).replace(".", "-") + version = PRODUCT_VERSION + filename = f"L0A.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}.{version}.parquet" + return filename + + +def define_l0b_filename(ds, processed_dir, station_name: str) -> str: + """Define L0B file name. + + Parameters + ---------- + ds : xr.Dataset + L0B xarray Dataset + processed_dir : str + Path of the processed directory + station_name : str + Name of the station + + Returns + ------- + str + L0B file name. + """ + from disdrodb.l0.standards import PRODUCT_VERSION + from disdrodb.utils.xarray import get_dataset_start_end_time + + starting_time, ending_time = get_dataset_start_end_time(ds) + starting_time = pd.to_datetime(starting_time).strftime("%Y%m%d%H%M%S") + ending_time = pd.to_datetime(ending_time).strftime("%Y%m%d%H%M%S") + campaign_name = infer_campaign_name_from_path(processed_dir).replace(".", "-") + version = PRODUCT_VERSION + filename = f"L0B.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}.{version}.nc" + return filename + + +def define_l0a_filepath(df: pd.DataFrame, processed_dir: str, station_name: str) -> str: + """Define L0A file path. + + Parameters + ---------- + df : pd.DataFrame + L0A DataFrame. + processed_dir : str + Path of the processed directory. + station_name : str + Name of the station. + + Returns + ------- + str + L0A file path. + """ + filename = define_l0a_filename(df=df, processed_dir=processed_dir, station_name=station_name) + station_dir = define_l0a_station_dir(processed_dir=processed_dir, station_name=station_name) + filepath = os.path.join(station_dir, filename) + return filepath + + +def define_l0b_filepath(ds: xr.Dataset, processed_dir: str, station_name: str, l0b_concat=False) -> str: + """Define L0B file path. + + Parameters + ---------- + ds : xr.Dataset + L0B xarray Dataset. + processed_dir : str + Path of the processed directory. + station_name : str + ID of the station + l0b_concat : bool + If False, the file is specified inside the station directory. + If True, the file is specified outside the station directory. + + Returns + ------- + str + L0B file path. + """ + station_dir = define_l0b_station_dir(processed_dir, station_name) + filename = define_l0b_filename(ds, processed_dir, station_name) + if l0b_concat: + product_dir = os.path.dirname(station_dir) + filepath = os.path.join(product_dir, filename) + else: + filepath = os.path.join(station_dir, filename) + return filepath + +####--------------------------------------------------------------------------. \ No newline at end of file diff --git a/disdrodb/data_transfer/upload_data.py b/disdrodb/data_transfer/upload_data.py index e7f444da..574e722a 100644 --- a/disdrodb/data_transfer/upload_data.py +++ b/disdrodb/data_transfer/upload_data.py @@ -22,7 +22,7 @@ import click -from disdrodb.api.io import define_metadata_filepath +from disdrodb.api.path import define_metadata_filepath from disdrodb.data_transfer.zenodo import upload_archive_to_zenodo, upload_station_to_zenodo from disdrodb.metadata import get_list_metadata from disdrodb.utils.yaml import read_yaml diff --git a/disdrodb/l0/check_readers.py b/disdrodb/l0/check_readers.py index 9dff9c3a..dd3350d8 100644 --- a/disdrodb/l0/check_readers.py +++ b/disdrodb/l0/check_readers.py @@ -24,7 +24,7 @@ import pandas as pd from disdrodb import __root_path__ -from disdrodb.api.io import get_disdrodb_path +from disdrodb.api.path import get_disdrodb_path from disdrodb.l0.l0_reader import get_station_reader_function from disdrodb.utils.directories import list_files diff --git a/disdrodb/l0/create_directories.py b/disdrodb/l0/create_directories.py index e5665938..619125e1 100644 --- a/disdrodb/l0/create_directories.py +++ b/disdrodb/l0/create_directories.py @@ -26,7 +26,7 @@ import shutil from disdrodb.api.info import infer_campaign_name_from_path, infer_data_source_from_path -from disdrodb.api.io import define_metadata_dir, define_station_dir +from disdrodb.api.path import define_metadata_dir, define_station_dir from disdrodb.utils.directories import ( check_directory_exists, copy_file, diff --git a/disdrodb/l0/io.py b/disdrodb/l0/io.py index 407a38d7..916a8a2e 100644 --- a/disdrodb/l0/io.py +++ b/disdrodb/l0/io.py @@ -22,206 +22,14 @@ from typing import Union import pandas as pd -import xarray as xr -from disdrodb.api.info import infer_campaign_name_from_path +from disdrodb.api.path import define_l0a_station_dir from disdrodb.utils.directories import list_files from disdrodb.utils.logger import log_info logger = logging.getLogger(__name__) -####--------------------------------------------------------------------------. -#### Directory/Filepaths L0A and L0B products - - -def _get_dataset_min_max_time(ds: xr.Dataset): - """Retrieves dataset starting and ending time. - - Parameters - ---------- - ds : xr.Dataset - Input dataset - - Returns - ------- - tuple - (starting_time, ending_time) - - """ - - starting_time = ds["time"].values[0] - ending_time = ds["time"].values[-1] - return (starting_time, ending_time) - - -def _get_dataframe_min_max_time(df: pd.DataFrame): - """Retrieves dataframe starting and ending time. - - Parameters - ---------- - df : pd.DataFrame - Input dataframe - - Returns - ------- - tuple - (starting_time, ending_time) - - """ - - starting_time = df["time"].iloc[0] - ending_time = df["time"].iloc[-1] - return (starting_time, ending_time) - - -def get_l0a_dir(processed_dir: str, station_name: str) -> str: - """Define L0A directory. - - Parameters - ---------- - processed_dir : str - Path of the processed directory - station_name : str - Name of the station - - Returns - ------- - str - L0A directory path. - """ - station_dir = os.path.join(processed_dir, "L0A", station_name) - return station_dir - - -def get_l0b_dir(processed_dir: str, station_name: str) -> str: - """Define L0B directory. - - Parameters - ---------- - processed_dir : str - Path of the processed directory - station_name : int - Name of the station - - Returns - ------- - str - Path of the L0B directory - """ - station_dir = os.path.join(processed_dir, "L0B", station_name) - return station_dir - - -def get_l0a_filename(df, processed_dir, station_name: str) -> str: - """Define L0A file name. - - Parameters - ---------- - df : pd.DataFrame - L0A DataFrame - processed_dir : str - Path of the processed directory - station_name : str - Name of the station - - Returns - ------- - str - L0A file name. - """ - from disdrodb.l0.standards import PRODUCT_VERSION - - starting_time, ending_time = _get_dataframe_min_max_time(df) - starting_time = pd.to_datetime(starting_time).strftime("%Y%m%d%H%M%S") - ending_time = pd.to_datetime(ending_time).strftime("%Y%m%d%H%M%S") - campaign_name = infer_campaign_name_from_path(processed_dir).replace(".", "-") - version = PRODUCT_VERSION - filename = f"L0A.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}.{version}.parquet" - return filename - - -def get_l0b_filename(ds, processed_dir, station_name: str) -> str: - """Define L0B file name. - - Parameters - ---------- - ds : xr.Dataset - L0B xarray Dataset - processed_dir : str - Path of the processed directory - station_name : str - Name of the station - - Returns - ------- - str - L0B file name. - """ - from disdrodb.l0.standards import PRODUCT_VERSION - - starting_time, ending_time = _get_dataset_min_max_time(ds) - starting_time = pd.to_datetime(starting_time).strftime("%Y%m%d%H%M%S") - ending_time = pd.to_datetime(ending_time).strftime("%Y%m%d%H%M%S") - campaign_name = infer_campaign_name_from_path(processed_dir).replace(".", "-") - version = PRODUCT_VERSION - filename = f"L0B.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}.{version}.nc" - return filename - - -def get_l0a_filepath(df: pd.DataFrame, processed_dir: str, station_name: str) -> str: - """Define L0A file path. - - Parameters - ---------- - df : pd.DataFrame - L0A DataFrame. - processed_dir : str - Path of the processed directory. - station_name : str - Name of the station. - - Returns - ------- - str - L0A file path. - """ - filename = get_l0a_filename(df=df, processed_dir=processed_dir, station_name=station_name) - station_dir = get_l0a_dir(processed_dir=processed_dir, station_name=station_name) - filepath = os.path.join(station_dir, filename) - return filepath - - -def get_l0b_filepath(ds: xr.Dataset, processed_dir: str, station_name: str, l0b_concat=False) -> str: - """Define L0B file path. - - Parameters - ---------- - ds : xr.Dataset - L0B xarray Dataset. - processed_dir : str - Path of the processed directory. - station_name : str - ID of the station - l0b_concat : bool - If False, the file is specified inside the station directory. - If True, the file is specified outside the station directory. - - Returns - ------- - str - L0B file path. - """ - station_dir = get_l0b_dir(processed_dir, station_name) - filename = get_l0b_filename(ds, processed_dir, station_name) - if l0b_concat: - product_dir = os.path.dirname(station_dir) - filepath = os.path.join(product_dir, filename) - else: - filepath = os.path.join(station_dir, filename) - return filepath - - ####--------------------------------------------------------------------------. #### List Station Files @@ -365,7 +173,7 @@ def get_l0a_filepaths(processed_dir, station_name, debugging_mode): List of L0A file paths. """ - station_dir = get_l0a_dir(processed_dir, station_name) + station_dir = define_l0a_station_dir(processed_dir, station_name) filepaths = list_files(station_dir, glob_pattern="*.parquet", recursive=True) # Filter out filepaths if debugging_mode=True @@ -383,7 +191,6 @@ def get_l0a_filepaths(processed_dir, station_name, debugging_mode): #### DISDRODB L0A product reader -# --> TODO: in L0A processing ! def _read_l0a(filepath: str, verbose: bool = False, debugging_mode: bool = False) -> pd.DataFrame: # Log msg = f" - Reading L0 Apache Parquet file at {filepath} started." diff --git a/disdrodb/l0/l0_processing.py b/disdrodb/l0/l0_processing.py index 11eaec9f..b98eb7bc 100644 --- a/disdrodb/l0/l0_processing.py +++ b/disdrodb/l0/l0_processing.py @@ -29,7 +29,12 @@ from disdrodb.api.checks import check_sensor_name from disdrodb.api.info import infer_path_info_dict -from disdrodb.api.io import get_disdrodb_path +from disdrodb.api.path import ( + define_l0a_filepath, + define_l0b_filepath, + define_l0b_station_dir, + get_disdrodb_path, +) from disdrodb.configs import get_base_dir # Directory @@ -38,10 +43,7 @@ create_initial_directory_structure, ) from disdrodb.l0.io import ( - get_l0a_filepath, get_l0a_filepaths, - get_l0b_dir, - get_l0b_filepath, get_raw_filepaths, read_l0a_dataframe, ) @@ -150,7 +152,7 @@ def _generate_l0a( ##--------------------------------------------------------------------. #### - Write to Parquet - filepath = get_l0a_filepath(df=df, processed_dir=processed_dir, station_name=station_name) + filepath = define_l0a_filepath(df=df, processed_dir=processed_dir, station_name=station_name) write_l0a(df=df, filepath=filepath, force=force, verbose=verbose) ##--------------------------------------------------------------------. @@ -226,7 +228,7 @@ def _generate_l0b( # -----------------------------------------------------------------. # Write L0B netCDF4 dataset - filepath = get_l0b_filepath(ds, processed_dir, station_name) + filepath = define_l0b_filepath(ds, processed_dir, station_name) write_l0b(ds, filepath=filepath, force=force) ##--------------------------------------------------------------------. @@ -301,7 +303,7 @@ def _generate_l0b_from_nc( ) # -----------------------------------------------------------------. # Write L0B netCDF4 dataset - filepath = get_l0b_filepath(ds, processed_dir, station_name) + filepath = define_l0b_filepath(ds, processed_dir, station_name) write_l0b(ds, filepath=filepath, force=force) ##--------------------------------------------------------------------. @@ -804,7 +806,7 @@ def run_l0b_concat(processed_dir, station_name, remove=False, verbose=False): # -------------------------------------------------------------------------. # Retrieve L0B files - station_dir = get_l0b_dir(processed_dir, station_name) + station_dir = define_l0b_station_dir(processed_dir, station_name) filepaths = list_files(station_dir, glob_pattern="*.nc", recursive=True) filepaths = sorted(filepaths) @@ -826,7 +828,7 @@ def run_l0b_concat(processed_dir, station_name, remove=False, verbose=False): # -------------------------------------------------------------------------. # Define the filepath of the concatenated L0B netCDF - single_nc_filepath = get_l0b_filepath(ds, processed_dir, station_name, l0b_concat=True) + single_nc_filepath = define_l0b_filepath(ds, processed_dir, station_name, l0b_concat=True) force = True # TODO add as argument write_l0b(ds, filepath=single_nc_filepath, force=force) diff --git a/disdrodb/l0/routines.py b/disdrodb/l0/routines.py index 2de461ac..5fb29d2a 100644 --- a/disdrodb/l0/routines.py +++ b/disdrodb/l0/routines.py @@ -386,7 +386,7 @@ def run_disdrodb_l0_station( Base directory of DISDRODB. Format: <...>/DISDRODB If None (the default), the disdrodb config variable 'dir' is used. """ - from disdrodb.api.io import get_disdrodb_path + from disdrodb.api.path import get_disdrodb_path # ---------------------------------------------------------------------. t_i = time.time() diff --git a/disdrodb/metadata/io.py b/disdrodb/metadata/io.py index 4f090e13..49476d06 100644 --- a/disdrodb/metadata/io.py +++ b/disdrodb/metadata/io.py @@ -22,7 +22,7 @@ import os from disdrodb.api.info import infer_campaign_name_from_path, infer_data_source_from_path -from disdrodb.api.io import define_metadata_filepath +from disdrodb.api.path import define_metadata_filepath from disdrodb.configs import get_base_dir from disdrodb.metadata.manipulation import sort_metadata_dictionary from disdrodb.metadata.standards import get_valid_metadata_keys @@ -245,7 +245,7 @@ def _get_list_metadata_with_data(base_dir, data_sources=None, campaign_names=Non ####--------------------------------------------------------------------------. #### Default (empty) metadata -def _define_default_metadata_dict() -> dict: +def _create_default_metadata_dict() -> dict: """Get DISDRODB metadata default values. Returns @@ -275,7 +275,7 @@ def write_default_metadata(filepath: str) -> None: File path """ # Get default metadata dict - metadata = _define_default_metadata_dict() + metadata = _create_default_metadata_dict() # Try infer the data_source, campaign_name and station_name from filepath try: diff --git a/disdrodb/tests/conftest.py b/disdrodb/tests/conftest.py index 2de1b6d9..548eda1b 100644 --- a/disdrodb/tests/conftest.py +++ b/disdrodb/tests/conftest.py @@ -43,7 +43,7 @@ def create_fake_metadata_file( campaign_name="CAMPAIGN_NAME", station_name="station_name", ): - from disdrodb.api.io import define_metadata_filepath + from disdrodb.api.path import define_metadata_filepath # Define metadata filepath metadata_filepath = define_metadata_filepath( diff --git a/disdrodb/tests/test_api/test_api_checks.py b/disdrodb/tests/test_api/test_api_checks.py index ef4b7972..6b6880f4 100644 --- a/disdrodb/tests/test_api/test_api_checks.py +++ b/disdrodb/tests/test_api/test_api_checks.py @@ -23,43 +23,48 @@ import pytest -from disdrodb.api import checks +from disdrodb.api.checks import ( + check_base_dir, + check_path, + check_sensor_name, + check_url, +) def test_check_path(): # Test a valid path path = os.path.abspath(__file__) - assert checks.check_path(path) is None + assert check_path(path) is None # Test an invalid path path = "/path/that/does/not/exist" with pytest.raises(FileNotFoundError): - checks.check_path(path) + check_path(path) def test_check_url(): # Test with valid URLs - assert checks.check_url("https://www.example.com") is True - assert checks.check_url("http://example.com/path/to/file.html?param=value") is True - assert checks.check_url("www.example.com") is True - assert checks.check_url("example.com") is True + assert check_url("https://www.example.com") is True + assert check_url("http://example.com/path/to/file.html?param=value") is True + assert check_url("www.example.com") is True + assert check_url("example.com") is True # Test with invalid URLs - assert checks.check_url("ftp://example.com") is False - assert checks.check_url("htp://example.com") is False - assert checks.check_url("http://example.com/path with spaces") is False + assert check_url("ftp://example.com") is False + assert check_url("htp://example.com") is False + assert check_url("http://example.com/path with spaces") is False def test_check_base_dir(): from pathlib import Path - base_dir = os.path("path", "to", "DISDRODB") - assert checks.check_base_dir(base_dir) == base_dir + base_dir = os.path.join("path", "to", "DISDRODB") + assert check_base_dir(base_dir) == base_dir - assert checks.check_base_dir(Path(base_dir)) == base_dir + assert check_base_dir(Path(base_dir)) == base_dir with pytest.raises(ValueError): - checks.check_base_dir("/path/to/DISDRO") + check_base_dir("/path/to/DISDRO") def test_check_sensor_name(): @@ -67,8 +72,8 @@ def test_check_sensor_name(): # Test with an unknown device with pytest.raises(ValueError): - checks.check_sensor_name(sensor_name) + check_sensor_name(sensor_name) # Test with a woronf type with pytest.raises(TypeError): - checks.check_sensor_name(123) + check_sensor_name(123) diff --git a/disdrodb/tests/test_api/test_api_path.py b/disdrodb/tests/test_api/test_api_path.py new file mode 100644 index 00000000..6790fb14 --- /dev/null +++ b/disdrodb/tests/test_api/test_api_path.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 + +# -----------------------------------------------------------------------------. +# Copyright (c) 2021-2023 DISDRODB developers +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# -----------------------------------------------------------------------------. +"""Test DISDRODB path.""" +import datetime +import os + +import numpy as np +import pandas as pd +import pytest +import xarray as xr + +from disdrodb.api.path import ( + define_campaign_dir, + define_l0a_station_dir, + define_l0b_station_dir, + define_l0b_filepath, + define_l0a_filepath, +) + + +PROCESSED_FOLDER_WINDOWS = "\\DISDRODB\\Processed" +PROCESSED_FOLDER_LINUX = "/DISDRODB/Processed" + + +@pytest.mark.parametrize("processed_folder", [PROCESSED_FOLDER_WINDOWS, PROCESSED_FOLDER_LINUX]) +def test_define_l0a_station_dir(processed_folder): + res = ( + define_l0a_station_dir(processed_folder, "STATION_NAME") + .replace(processed_folder, "") + .replace("\\", "") + .replace("/", "") + ) + assert res == "L0ASTATION_NAME" + + +@pytest.mark.parametrize("processed_folder", [PROCESSED_FOLDER_WINDOWS, PROCESSED_FOLDER_LINUX]) +def test_define_l0b_station_dir(processed_folder): + res = ( + define_l0b_station_dir(processed_folder, "STATION_NAME") + .replace(processed_folder, "") + .replace("\\", "") + .replace("/", "") + ) + assert res == "L0BSTATION_NAME" + + +def test_define_l0a_filepath(tmp_path): + from disdrodb.l0.standards import PRODUCT_VERSION + + # Set variables + product = "L0A" + base_dir = tmp_path / "DISDRODB" + data_source = "DATA_SOURCE" + campaign_name = "CAMPAIGN_NAME" + station_name = "STATION_NAME" + start_date = datetime.datetime(2019, 3, 26, 0, 0, 0) + end_date = datetime.datetime(2021, 2, 8, 0, 0, 0) + start_date_str = start_date.strftime("%Y%m%d%H%M%S") + end_date_str = end_date.strftime("%Y%m%d%H%M%S") + + # Set paths + processed_dir = define_campaign_dir( + base_dir=base_dir, product=product, data_source=data_source, campaign_name=campaign_name + ) + + # Create dataframe + df = pd.DataFrame({"time": pd.date_range(start=start_date, end=end_date)}) + + # Test the function + res = define_l0a_filepath(df, processed_dir, station_name) + + # Define expected results + expected_name = ( + f"{product}.{campaign_name.upper()}.{station_name}.s{start_date_str}.e{end_date_str}.{PRODUCT_VERSION}.parquet" + ) + expected_path = os.path.join(processed_dir, product, station_name, expected_name) + assert res == expected_path + + +def test_define_l0b_filepath(tmp_path): + from disdrodb.l0.standards import PRODUCT_VERSION + + # Set variables + + product = "L0B" + base_dir = tmp_path / "DISDRODB" + data_source = "DATA_SOURCE" + campaign_name = "CAMPAIGN_NAME" + station_name = "STATION_NAME" + start_date = datetime.datetime(2019, 3, 26, 0, 0, 0) + end_date = datetime.datetime(2021, 2, 8, 0, 0, 0) + start_date_str = start_date.strftime("%Y%m%d%H%M%S") + end_date_str = end_date.strftime("%Y%m%d%H%M%S") + + # Set paths + processed_dir = define_campaign_dir( + base_dir=base_dir, product=product, data_source=data_source, campaign_name=campaign_name + ) + + # Create xarray object + timesteps = pd.date_range(start=start_date, end=end_date) + data = np.zeros(timesteps.shape) + ds = xr.DataArray( + data=data, + dims=["time"], + coords={"time": pd.date_range(start=start_date, end=end_date)}, + ) + + # Test the function + res = define_l0b_filepath(ds, processed_dir, station_name) + + # Define expected results + expected_name = ( + f"{product}.{campaign_name.upper()}.{station_name}.s{start_date_str}.e{end_date_str}.{PRODUCT_VERSION}.nc" + ) + expected_path = os.path.join(processed_dir, product, station_name, expected_name) + assert res == expected_path diff --git a/disdrodb/tests/test_l0/test_cmd_processing.py b/disdrodb/tests/test_l0/test_cmd_processing.py index 64c78fef..fe31d248 100644 --- a/disdrodb/tests/test_l0/test_cmd_processing.py +++ b/disdrodb/tests/test_l0/test_cmd_processing.py @@ -25,7 +25,7 @@ from click.testing import CliRunner from disdrodb import __root_path__ -from disdrodb.api.io import define_station_dir +from disdrodb.api.path import define_station_dir from disdrodb.utils.directories import list_files BASE_DIR = os.path.join(__root_path__, "disdrodb", "tests", "data", "check_readers", "DISDRODB") diff --git a/disdrodb/tests/test_l0/test_create_directories.py b/disdrodb/tests/test_l0/test_create_directories.py index 8b24e215..ebe65fbe 100644 --- a/disdrodb/tests/test_l0/test_create_directories.py +++ b/disdrodb/tests/test_l0/test_create_directories.py @@ -21,7 +21,7 @@ import pytest -from disdrodb.api.io import ( +from disdrodb.api.path import ( define_campaign_dir, define_metadata_dir, define_metadata_filepath, diff --git a/disdrodb/tests/test_l0/test_io.py b/disdrodb/tests/test_l0/test_io.py index 64031bfb..e77225b1 100644 --- a/disdrodb/tests/test_l0/test_io.py +++ b/disdrodb/tests/test_l0/test_io.py @@ -18,125 +18,15 @@ # -----------------------------------------------------------------------------. """Test DISDRODB L0 Input/Output routines.""" -import datetime import os -import numpy as np import pandas as pd import pytest -import xarray as xr -from disdrodb.api.io import define_campaign_dir +from disdrodb.api.path import define_campaign_dir from disdrodb.l0 import io from disdrodb.tests.conftest import create_fake_raw_data_file -PROCESSED_FOLDER_WINDOWS = "\\DISDRODB\\Processed" -PROCESSED_FOLDER_LINUX = "/DISDRODB/Processed" - - -def test__get_dataset_min_max_time(): - start_date = datetime.datetime(2019, 3, 26, 0, 0, 0) - end_date = datetime.datetime(2021, 2, 8, 0, 0, 0) - df = pd.DataFrame({"time": pd.date_range(start=start_date, end=end_date)}) - res = io._get_dataset_min_max_time(df) - assert all(pd.to_datetime(res, format="%Y-%m-%d") == [start_date, end_date]) - - -@pytest.mark.parametrize("processed_folder", [PROCESSED_FOLDER_WINDOWS, PROCESSED_FOLDER_LINUX]) -def test_get_l0a_dir(processed_folder): - res = ( - io.get_l0a_dir(processed_folder, "STATION_NAME") - .replace(processed_folder, "") - .replace("\\", "") - .replace("/", "") - ) - assert res == "L0ASTATION_NAME" - - -@pytest.mark.parametrize("processed_folder", [PROCESSED_FOLDER_WINDOWS, PROCESSED_FOLDER_LINUX]) -def test_get_l0b_dir(processed_folder): - res = ( - io.get_l0b_dir(processed_folder, "STATION_NAME") - .replace(processed_folder, "") - .replace("\\", "") - .replace("/", "") - ) - assert res == "L0BSTATION_NAME" - - -def test_get_l0a_filepath(tmp_path): - from disdrodb.l0.standards import PRODUCT_VERSION - - # Set variables - product = "L0A" - base_dir = tmp_path / "DISDRODB" - data_source = "DATA_SOURCE" - campaign_name = "CAMPAIGN_NAME" - station_name = "STATION_NAME" - start_date = datetime.datetime(2019, 3, 26, 0, 0, 0) - end_date = datetime.datetime(2021, 2, 8, 0, 0, 0) - start_date_str = start_date.strftime("%Y%m%d%H%M%S") - end_date_str = end_date.strftime("%Y%m%d%H%M%S") - - # Set paths - processed_dir = define_campaign_dir( - base_dir=base_dir, product=product, data_source=data_source, campaign_name=campaign_name - ) - - # Create dataframe - df = pd.DataFrame({"time": pd.date_range(start=start_date, end=end_date)}) - - # Test the function - res = io.get_l0a_filepath(df, processed_dir, station_name) - - # Define expected results - expected_name = ( - f"{product}.{campaign_name.upper()}.{station_name}.s{start_date_str}.e{end_date_str}.{PRODUCT_VERSION}.parquet" - ) - expected_path = os.path.join(processed_dir, product, station_name, expected_name) - assert res == expected_path - - -def test_get_l0b_filepath(tmp_path): - from disdrodb.l0.standards import PRODUCT_VERSION - - # Set variables - - product = "L0B" - base_dir = tmp_path / "DISDRODB" - data_source = "DATA_SOURCE" - campaign_name = "CAMPAIGN_NAME" - station_name = "STATION_NAME" - start_date = datetime.datetime(2019, 3, 26, 0, 0, 0) - end_date = datetime.datetime(2021, 2, 8, 0, 0, 0) - start_date_str = start_date.strftime("%Y%m%d%H%M%S") - end_date_str = end_date.strftime("%Y%m%d%H%M%S") - - # Set paths - processed_dir = define_campaign_dir( - base_dir=base_dir, product=product, data_source=data_source, campaign_name=campaign_name - ) - - # Create xarray object - timesteps = pd.date_range(start=start_date, end=end_date) - data = np.zeros(timesteps.shape) - ds = xr.DataArray( - data=data, - dims=["time"], - coords={"time": pd.date_range(start=start_date, end=end_date)}, - ) - - # Test the function - res = io.get_l0b_filepath(ds, processed_dir, station_name) - - # Define expected results - expected_name = ( - f"{product}.{campaign_name.upper()}.{station_name}.s{start_date_str}.e{end_date_str}.{PRODUCT_VERSION}.nc" - ) - expected_path = os.path.join(processed_dir, product, station_name, expected_name) - assert res == expected_path - - ####--------------------------------------------------------------------------. diff --git a/disdrodb/tests/test_l0/test_l0b_concat.py b/disdrodb/tests/test_l0/test_l0b_concat.py index a252df1b..ee5bbfac 100644 --- a/disdrodb/tests/test_l0/test_l0b_concat.py +++ b/disdrodb/tests/test_l0/test_l0b_concat.py @@ -24,7 +24,7 @@ import pandas as pd import xarray as xr -from disdrodb.api.io import define_campaign_dir +from disdrodb.api.path import define_campaign_dir from disdrodb.l0.l0_processing import run_l0b_concat from disdrodb.l0.routines import run_disdrodb_l0b_concat from disdrodb.tests.conftest import create_fake_metadata_file, create_fake_station_dir diff --git a/disdrodb/tests/test_metadata/test_metadata_io.py b/disdrodb/tests/test_metadata/test_metadata_io.py index 585d541f..82579992 100644 --- a/disdrodb/tests/test_metadata/test_metadata_io.py +++ b/disdrodb/tests/test_metadata/test_metadata_io.py @@ -22,13 +22,12 @@ import pytest -from disdrodb.api.io import define_metadata_filepath +from disdrodb.api.path import define_metadata_filepath from disdrodb.metadata.io import ( - _define_default_metadata_dict, + _create_default_metadata_dict, _get_list_all_metadata, _get_list_metadata_with_data, create_station_metadata, - # _define_default_metadata_dict, get_list_metadata, read_station_metadata, write_default_metadata, @@ -203,7 +202,7 @@ def test_get_list_metadata_file(tmp_path): def test_get_default_metadata(): - assert isinstance(_define_default_metadata_dict(), dict) + assert isinstance(_create_default_metadata_dict(), dict) def test_write_default_metadata(tmp_path): @@ -233,7 +232,7 @@ def test_write_default_metadata(tmp_path): dictionary = read_yaml(str(metadata_filepath)) # Check is the expected dictionary - expected_dict = _define_default_metadata_dict() + expected_dict = _create_default_metadata_dict() expected_dict["data_source"] = data_source expected_dict["campaign_name"] = campaign_name expected_dict["station_name"] = station_name diff --git a/disdrodb/tests/test_utils/test_utils_pandas.py b/disdrodb/tests/test_utils/test_utils_pandas.py new file mode 100644 index 00000000..71343e53 --- /dev/null +++ b/disdrodb/tests/test_utils/test_utils_pandas.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 + +# -----------------------------------------------------------------------------. +# Copyright (c) 2021-2023 DISDRODB developers +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# -----------------------------------------------------------------------------. +"""Test Pandas utility.""" + +import datetime +import pandas as pd + + +from disdrodb.utils.pandas import get_dataframe_start_end_time + + +def test_get_dataframe_start_end_time(): + start_date = datetime.datetime(2019, 3, 26, 0, 0, 0) + end_date = datetime.datetime(2021, 2, 8, 0, 0, 0) + df = pd.DataFrame({"time": pd.date_range(start=start_date, end=end_date)}) + res = get_dataframe_start_end_time(df) + assert all(pd.to_datetime(res, format="%Y-%m-%d") == [start_date, end_date]) \ No newline at end of file diff --git a/disdrodb/tests/test_utils/test_utils_xarray.py b/disdrodb/tests/test_utils/test_utils_xarray.py new file mode 100644 index 00000000..2215305b --- /dev/null +++ b/disdrodb/tests/test_utils/test_utils_xarray.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 + +# -----------------------------------------------------------------------------. +# Copyright (c) 2021-2023 DISDRODB developers +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# -----------------------------------------------------------------------------. +"""Test Xarray utility.""" + +import pytest +import xarray as xr +import numpy as np +import pandas as pd +from disdrodb.utils.xarray import get_dataset_start_end_time + + +def create_test_dataset(): + """Create a mock xarray.Dataset for testing.""" + times = pd.date_range('2023-01-01', periods=10, freq='D') + data = np.random.rand(10, 2, 2) # Random data for the sake of example + ds = xr.Dataset({'my_data': (('time', 'x', 'y'), data)}, + coords={'time': times}) + return ds + + +def test_get_dataset_start_end_time(): + ds = create_test_dataset() + expected_start_time = ds["time"].values[0] + expected_end_time = ds["time"].values[-1] + + start_time, end_time = get_dataset_start_end_time(ds) + + assert start_time == expected_start_time + assert end_time == expected_end_time + + # Test raise if empty dataset + empty_ds = xr.Dataset() + with pytest.raises(KeyError): + get_dataset_start_end_time(empty_ds) diff --git a/disdrodb/utils/compression.py b/disdrodb/utils/compression.py index e03e4405..9c49a9c0 100644 --- a/disdrodb/utils/compression.py +++ b/disdrodb/utils/compression.py @@ -27,7 +27,7 @@ from typing import Optional from disdrodb.api.checks import check_base_dir -from disdrodb.api.io import define_station_dir +from disdrodb.api.path import define_station_dir from disdrodb.utils.directories import list_files diff --git a/disdrodb/utils/pandas.py b/disdrodb/utils/pandas.py new file mode 100644 index 00000000..adeeb431 --- /dev/null +++ b/disdrodb/utils/pandas.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 + +# -----------------------------------------------------------------------------. +# Copyright (c) 2021-2023 DISDRODB developers +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# -----------------------------------------------------------------------------. +"""Pandas utility.""" + +import pandas as pd + + +def get_dataframe_start_end_time(df: pd.DataFrame): + """Retrieves dataframe starting and ending time. + + Parameters + ---------- + df : pd.DataFrame + Input dataframe + + Returns + ------- + tuple + (starting_time, ending_time) + + """ + + starting_time = df["time"].iloc[0] + ending_time = df["time"].iloc[-1] + return (starting_time, ending_time) diff --git a/disdrodb/utils/xarray.py b/disdrodb/utils/xarray.py new file mode 100644 index 00000000..9c63c674 --- /dev/null +++ b/disdrodb/utils/xarray.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + +# -----------------------------------------------------------------------------. +# Copyright (c) 2021-2023 DISDRODB developers +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# -----------------------------------------------------------------------------. +"""Xarray utility.""" + +import xarray as xr + + +def get_dataset_start_end_time(ds: xr.Dataset): + """Retrieves dataset starting and ending time. + + Parameters + ---------- + ds : xr.Dataset + Input dataset + + Returns + ------- + tuple + (starting_time, ending_time) + + """ + starting_time = ds["time"].values[0] + ending_time = ds["time"].values[-1] + return (starting_time, ending_time) \ No newline at end of file diff --git a/disdrodb/utils/yaml.py b/disdrodb/utils/yaml.py index 5e3fd96d..d4fbf350 100644 --- a/disdrodb/utils/yaml.py +++ b/disdrodb/utils/yaml.py @@ -1,9 +1,23 @@ #!/usr/bin/env python3 -""" -Created on Thu Nov 2 15:42:45 2023 -@author: ghiggi -""" +# -----------------------------------------------------------------------------. +# Copyright (c) 2021-2023 DISDRODB developers +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# -----------------------------------------------------------------------------. +"""YAML utility.""" + import yaml diff --git a/tutorials/reader_preparation.ipynb b/tutorials/reader_preparation.ipynb index cbe97bd4..e283ae2b 100644 --- a/tutorials/reader_preparation.ipynb +++ b/tutorials/reader_preparation.ipynb @@ -152,7 +152,7 @@ "from disdrodb.metadata import read_station_metadata\n", "\n", "# Standards\n", - "from disdrodb.api.io import define_campaign_directory\n", + "from disdrodb.api.path import define_campaign_directory\n", "from disdrodb.api.info import infer_path_info_dict\n", "from disdrodb.api.checks import check_sensor_name\n", "from disdrodb.l0.check_standards import check_l0a_column_names\n"