diff --git a/.gitignore b/.gitignore index af0636f2..1f3f0b7f 100644 --- a/.gitignore +++ b/.gitignore @@ -161,6 +161,6 @@ docs\source\reader_preparation.ipynb # Ignore specific folders /disdrodb/tests/temp/ -/disdrodb/tests/data/test_folders_files_creation/* -!disdrodb/tests/data/test_folders_files_creation/.gitkeep +/disdrodb/tests/data/test_dir_creation/* +!disdrodb/tests/data/test_dir_creation/.gitkeep disdrodb-dev diff --git a/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/station_name_1.yml b/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/station_name_1.yml index 8bd8bb11..afb01de8 100644 --- a/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/station_name_1.yml +++ b/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/station_name_1.yml @@ -6,6 +6,7 @@ reader: EPFL/LOCARNO_2018 raw_data_format: raw raw_data_type: raw platform_type: fixed +disdrodb_data_url: '' crs: WGS84 proj4_string: +proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs EPSG: 4326 diff --git a/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/station_name_2.yml b/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/station_name_2.yml index 90972b16..f476d679 100644 --- a/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/station_name_2.yml +++ b/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/station_name_2.yml @@ -6,6 +6,7 @@ reader: EPFL/LOCARNO_2018 raw_data_format: raw raw_data_type: raw platform_type: fixed +disdrodb_data_url: '' crs: WGS84 proj4_string: +proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs EPSG: 4326 diff --git a/disdrodb/__init__.py b/disdrodb/__init__.py index 7fc432bc..4c0a1b2a 100644 --- a/disdrodb/__init__.py +++ b/disdrodb/__init__.py @@ -8,9 +8,9 @@ available_data_sources, available_stations, ) -from disdrodb.api.metadata import read_station_metadata from disdrodb.configs import define_disdrodb_configs as define_configs from disdrodb.docs import open_documentation, open_sensor_documentation +from disdrodb.metadata import read_station_metadata __root_path__ = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) diff --git a/disdrodb/api/checks.py b/disdrodb/api/checks.py index 91fbea30..fae59dae 100644 --- a/disdrodb/api/checks.py +++ b/disdrodb/api/checks.py @@ -95,14 +95,3 @@ def check_sensor_name(sensor_name: str, product_level: str = "l0") -> None: msg = f"{sensor_name} not valid {sensor_name}. Valid values are {sensor_names}." logger.error(msg) raise ValueError(msg) - - -def check_product_level(product_level): - """Check DISDRODB product level validity.""" - if not isinstance(product_level, str): - raise TypeError("'product_level' must be a string.") - product_level = product_level.lower() - valid_product_levels = ["l0"] - if product_level not in valid_product_levels: - raise ValueError(f"{product_level} is an invalid 'product_level'. Valid values are: {valid_product_levels}") - return product_level diff --git a/disdrodb/api/configs.py b/disdrodb/api/configs.py index 06343108..3ed10fab 100644 --- a/disdrodb/api/configs.py +++ b/disdrodb/api/configs.py @@ -21,12 +21,26 @@ import logging import os -from disdrodb.api.checks import check_product_level, check_sensor_name +from disdrodb.api.checks import check_sensor_name from disdrodb.utils.yaml import read_yaml logger = logging.getLogger(__name__) +def _check_product_level(product_level): + """Check DISDRODB product level validity.""" + # Note: in disdrodb.api.io there is another _check_product_level function ! + if not isinstance(product_level, str): + raise TypeError("'product_level' must be a string.") + product_level = product_level.lower() + valid_product_levels = ["l0", "l0a", "l0b"] + if product_level not in valid_product_levels: + raise ValueError(f"{product_level} is an invalid 'product_level'. Valid values are: {valid_product_levels}") + if product_level in ["l0a", "l0b"]: + product_level = "l0" + return product_level + + def _get_config_dir(product_level): """Define the config directory path of a given DISDRODB product level.""" from disdrodb import __root_path__ @@ -57,7 +71,7 @@ def get_sensor_configs_dir(sensor_name: str, product_level: str) -> str: Error if the config directory does not exist. """ check_sensor_name(sensor_name, product_level=product_level) - product_level = check_product_level(product_level) + product_level = _check_product_level(product_level) config_dir_path = _get_config_dir(product_level=product_level) config_sensor_dir_path = os.path.join(config_dir_path, sensor_name) if not os.path.exists(config_sensor_dir_path): @@ -88,7 +102,7 @@ def read_config_file(sensor_name: str, product_level: str, filename: str) -> dic Error if file does not exist. """ check_sensor_name(sensor_name, product_level=product_level) - product_level = check_product_level(product_level) + product_level = _check_product_level(product_level) config_sensor_dir_path = get_sensor_configs_dir(sensor_name, product_level=product_level) config_fpath = os.path.join(config_sensor_dir_path, filename) # Check yaml file exists @@ -112,6 +126,6 @@ def available_sensor_names(product_level: str = "L0") -> sorted: DISDRODB product level. By default, it returns the sensors available for DISDRODB L0 products. """ - product_level = check_product_level(product_level) + product_level = _check_product_level(product_level) config_dir_path = _get_config_dir(product_level=product_level) return sorted(os.listdir(config_dir_path)) diff --git a/disdrodb/api/io.py b/disdrodb/api/io.py index 163b74f7..8ab57866 100644 --- a/disdrodb/api/io.py +++ b/disdrodb/api/io.py @@ -22,7 +22,6 @@ import os import numpy as np -import yaml from disdrodb.configs import get_base_dir @@ -85,6 +84,10 @@ def _get_list_stations_dirs(product_level, campaign_dir): data_path = os.path.join(campaign_dir, "data") else: data_path = os.path.join(campaign_dir, product_level) + # Check if the data directory exists + # - For a fresh disdrodb-data cloned repo, no "data" directories + if not os.path.exists(data_path): + return [] # Get list of directories (stations) list_stations = os.listdir(data_path) list_stations_dir = [os.path.join(data_path, station_name) for station_name in list_stations] @@ -215,26 +218,6 @@ def _get_stations(base_dir, product_level): return list_available_stations -def _get_metadata_fpath(base_dir, product_level, data_source, campaign_name, station_name): - """Get metadata file path a given station.""" - campaign_dir = get_disdrodb_path( - base_dir=base_dir, - product_level=product_level, - data_source=data_source, - campaign_name=campaign_name, - ) - metadata_fpath = os.path.join(campaign_dir, "metadata", station_name + ".yml") - return metadata_fpath - - -def get_metadata_dict(base_dir, product_level, data_source, campaign_name, station_name): - """Get metadata of a given station.""" - metadata_fpath = _get_metadata_fpath(base_dir, product_level, data_source, campaign_name, station_name) - with open(metadata_fpath) as f: - metadata_dict = yaml.safe_load(f) - return metadata_dict - - ####---------------------------------------------------------------------------. #### I/O CHECKS def check_product_level(product_level): diff --git a/disdrodb/configs.py b/disdrodb/configs.py index a142df79..52e6fe89 100644 --- a/disdrodb/configs.py +++ b/disdrodb/configs.py @@ -87,4 +87,5 @@ def get_base_dir(base_dir=None): if base_dir is None: base_dir = disdrodb.config["dir"] + base_dir = str(base_dir) # convert Path to str return base_dir diff --git a/disdrodb/data_transfer/download_data.py b/disdrodb/data_transfer/download_data.py index ddff431f..83a7d3e7 100644 --- a/disdrodb/data_transfer/download_data.py +++ b/disdrodb/data_transfer/download_data.py @@ -26,9 +26,9 @@ import pooch import tqdm -from disdrodb.api.metadata import get_list_metadata from disdrodb.configs import get_base_dir from disdrodb.l0.io import _infer_disdrodb_tree_path +from disdrodb.metadata import get_list_metadata from disdrodb.utils.compression import _unzip_file from disdrodb.utils.yaml import read_yaml @@ -159,7 +159,7 @@ def _download_station_data(metadata_fpath: str, force: bool = False) -> None: """ disdrodb_data_url, station_dir_path = _get_station_url_and_dir_path(metadata_fpath) - if disdrodb_data_url is not None: + if isinstance(disdrodb_data_url, str) and disdrodb_data_url != "": # Download file zip_fpath, to_unzip = _download_file_from_url(disdrodb_data_url, dst_dir_path=station_dir_path, force=force) # Extract the stations files from the downloaded station.zip file diff --git a/disdrodb/data_transfer/upload_data.py b/disdrodb/data_transfer/upload_data.py index 8b554125..254e1633 100644 --- a/disdrodb/data_transfer/upload_data.py +++ b/disdrodb/data_transfer/upload_data.py @@ -23,7 +23,7 @@ import click -from disdrodb.api.metadata import get_list_metadata +from disdrodb.metadata import get_list_metadata from disdrodb.utils.compression import _zip_dir from disdrodb.utils.yaml import read_yaml, write_yaml from disdrodb.utils.zenodo import _create_zenodo_deposition, _upload_file_to_zenodo @@ -95,19 +95,17 @@ def _filter_already_uploaded(metadata_fpaths: List[str]) -> List[str]: """Filter metadata files that already have a remote url specified.""" filtered = [] - for metadata_fpath in metadata_fpaths: metadata_dict = read_yaml(metadata_fpath) if metadata_dict.get("disdrodb_data_url"): print(f"{metadata_fpath} already has a remote url specified. Skipping.") continue filtered.append(metadata_fpath) - return filtered def _upload_data_to_zenodo(metadata_fpaths: List[str], sandbox: bool = False) -> None: - """Upload data to Zenodo. + """Upload data to Zenodo Sandbox. Parameters ---------- diff --git a/disdrodb/l0/__init__.py b/disdrodb/l0/__init__.py index 4e83de66..4183e825 100644 --- a/disdrodb/l0/__init__.py +++ b/disdrodb/l0/__init__.py @@ -1,7 +1,3 @@ -from disdrodb.l0.check_metadata import ( - check_archive_metadata_compliance, - check_archive_metadata_geolocation, -) from disdrodb.l0.l0_processing import ( run_disdrodb_l0, run_disdrodb_l0_station, @@ -9,6 +5,10 @@ run_l0b_from_nc, ) from disdrodb.l0.l0_reader import available_readers +from disdrodb.metadata.check_metadata import ( + check_archive_metadata_compliance, + check_archive_metadata_geolocation, +) __all__ = [ "run_l0a", diff --git a/disdrodb/l0/io.py b/disdrodb/l0/io.py index 204ac41b..6d592860 100644 --- a/disdrodb/l0/io.py +++ b/disdrodb/l0/io.py @@ -639,7 +639,8 @@ def _check_raw_dir_data_subfolders(raw_dir): def _check_raw_dir_metadata(raw_dir, verbose=True): """Check metadata in the raw_dir directory.""" - from disdrodb.l0.metadata import check_metadata_compliance, write_default_metadata + from disdrodb.l0.metadata import write_default_metadata + from disdrodb.metadata.check_metadata import check_metadata_compliance # Get list of stations raw_data_dir = os.path.join(raw_dir, "data") diff --git a/disdrodb/l0/l0_reader.py b/disdrodb/l0/l0_reader.py index 84a89880..763a6708 100644 --- a/disdrodb/l0/l0_reader.py +++ b/disdrodb/l0/l0_reader.py @@ -320,10 +320,10 @@ def _get_reader_from_metadata(metadata): def get_station_reader_function(data_source, campaign_name, station_name, base_dir=None): """Retrieve the reader function from the station metadata.""" - from disdrodb.api.io import get_metadata_dict + from disdrodb.metadata import read_station_metadata # Get metadata - metadata = get_metadata_dict( + metadata = read_station_metadata( base_dir=base_dir, product_level="RAW", data_source=data_source, diff --git a/disdrodb/l0/metadata.py b/disdrodb/l0/metadata.py index 54f236fe..55a3654c 100644 --- a/disdrodb/l0/metadata.py +++ b/disdrodb/l0/metadata.py @@ -20,101 +20,16 @@ import os -import numpy as np - from disdrodb.configs import get_base_dir from disdrodb.l0.io import _infer_campaign_name_from_path, _infer_data_source_from_path +from disdrodb.metadata.manipulation import sort_metadata_dictionary +from disdrodb.metadata.standards import get_valid_metadata_keys from disdrodb.utils.yaml import read_yaml, write_yaml - -####--------------------------------------------------------------------------. -#### Define valid metadata keys -def get_valid_metadata_keys() -> list: - """Get DISDRODB valid metadata list. - - Returns - ------- - list - List of valid metadata keys - """ - list_attrs = [ - ## Mandatory fields - "data_source", - "campaign_name", - "station_name", - "sensor_name", - "reader", - "raw_data_format", # 'txt', 'netcdf' - "platform_type", # 'fixed', 'mobile' - ## Source - "source", - "source_convention", - "source_processing_date", - ## Description - "title", - "description", - "project_name", - "keywords", - "summary", - "history", - "comment", - "station_id", - "location", - "country", - "continent", - ## Deployment Info - "latitude", # in degrees North - "longitude", # in degrees East - "altitude", # in meter above sea level - "deployment_status", # 'ended', 'ongoing' - "deployment mode", # 'land', 'ship', 'truck', 'cable' - "platform_protection", # 'shielded', 'unshielded' - "platform_orientation", # [0-360] from N (clockwise) - ## Sensor info - "sensor_long_name", - "sensor_manufacturer", - "sensor_wavelength", - "sensor_serial_number", - "firmware_iop", - "firmware_dsp", - "firmware_version", - "sensor_beam_length", - "sensor_beam_width", - "sensor_nominal_width", # ? - ## effective_measurement_area ? # 0.54 m^2 - "measurement_interval", # sampling_interval ? [in seconds] - "calibration_sensitivity", - "calibration_certification_date", - "calibration_certification_url", - ## Attribution - "contributors", - "authors", - "authors_url", - "contact", - "contact_information", - "acknowledgement", # acknowledgements? - "references", - "documentation", - "website", - "institution", - "source_repository", - "license", - "doi", - ] - return list_attrs - - ####--------------------------------------------------------------------------. #### Metadata reader & writers -def sort_metadata_dictionary(metadata): - """Sort the keys of the metadata dictionary by valid_metadata_keys list order.""" - list_metadata_keys = get_valid_metadata_keys() - metadata = {k: metadata[k] for k in list_metadata_keys} - return metadata - - def read_metadata(campaign_dir: str, station_name: str) -> dict: """Read YAML metadata file. @@ -149,7 +64,7 @@ def write_metadata(metadata, fpath): ####--------------------------------------------------------------------------. #### Default (empty) metadata -def get_default_metadata_dict() -> dict: +def _get_default_metadata_dict() -> dict: """Get DISDRODB metadata default values. Returns @@ -179,7 +94,7 @@ def write_default_metadata(fpath: str) -> None: File path """ # Get default metadata dict - metadata = get_default_metadata_dict() + metadata = _get_default_metadata_dict() # Try infer the data_source, campaign_name and station_name from fpath try: campaign_name = _infer_campaign_name_from_path(fpath) @@ -213,145 +128,3 @@ def create_campaign_default_metadata( write_default_metadata(fpath=metadata_fpath) print(f"The default metadata were created for stations {station_names}.") return None - - -####--------------------------------------------------------------------------. -#### Check metadata file - - -def get_metadata_missing_keys(metadata): - """Return the DISDRODB metadata keys which are missing.""" - keys = list(metadata.keys()) - valid_keys = get_valid_metadata_keys() - # Identify missing keys - idx_missing_keys = np.where(np.isin(valid_keys, keys, invert=True))[0] - missing_keys = np.array(valid_keys)[idx_missing_keys].tolist() - return missing_keys - - -def get_metadata_invalid_keys(metadata): - """Return the DISDRODB metadata keys which are not valid.""" - keys = list(metadata.keys()) - valid_keys = get_valid_metadata_keys() - # Identify invalid keys - idx_invalid_keys = np.where(np.isin(keys, valid_keys, invert=True))[0] - invalid_keys = np.array(keys)[idx_invalid_keys].tolist() - return invalid_keys - - -def _check_metadata_keys(metadata): - """Check validity of metadata keys.""" - # Check all keys are valid - invalid_keys = get_metadata_invalid_keys(metadata) - if len(invalid_keys) > 0: - raise ValueError(f"Invalid metadata keys: {invalid_keys}") - # Check no keys are missing - missing_keys = get_metadata_missing_keys(metadata) - if len(missing_keys) > 0: - raise ValueError(f"Missing metadata keys: {missing_keys}") - return None - - -def _check_metadata_values(metadata): - """Check validity of metadata values - - If null is specified in the YAML files (or None in the dict) raise error. - """ - for key, value in metadata.items(): - if isinstance(value, type(None)): - raise ValueError(f"The metadata key {key} has None or null value. Use '' instead.") - return None - - -def _check_metadata_campaign_name(metadata, expected_name): - """Check metadata campaign_name.""" - if "campaign_name" not in metadata: - raise ValueError("The metadata file does not contain the 'campaign_name' key.") - campaign_name = metadata["campaign_name"] - if campaign_name == "": - raise ValueError("The 'campaign_name' key in the metadata is empty.") - if campaign_name != expected_name: - raise ValueError( - f"The campaign_name in the metadata is '{campaign_name}' but the campaign directory is '{expected_name}'" - ) - return None - - -def _check_metadata_data_source(metadata, expected_name): - """Check metadata data_source.""" - if "data_source" not in metadata: - raise ValueError("The metadata file does not contain the 'data_source' key.") - data_source = metadata["data_source"] - if data_source == "": - raise ValueError("The 'data_source' key in the metadata is empty.") - if data_source != expected_name: - raise ValueError( - f"The data_source in the metadata is '{data_source}' but the data_source directory is '{expected_name}'" - ) - return None - - -def _check_metadata_station_name(metadata, expected_name): - """Check metadata station name. - - This function does not check that data are available for the station!""" - if "station_name" not in metadata: - raise ValueError("The metadata file does not contain the 'station_name' key.") - station_name = metadata["station_name"] - if not isinstance(station_name, str): - raise ValueError("The 'station_name' key in the metadata is not defined as a string!") - if station_name == "": - raise ValueError("The 'station_name' key in the metadata is empty.") - if station_name != expected_name: - raise ValueError( - f"The station_name in the metadata is '{station_name}' but the metadata file is named '{expected_name}.yml'" - ) - return None - - -def _check_metadata_sensor_name(metadata): - from disdrodb.api.checks import check_sensor_name - - sensor_name = metadata["sensor_name"] - check_sensor_name(sensor_name=sensor_name) - return None - - -def check_metadata_compliance(data_source, campaign_name, station_name, base_dir=None): - """Check DISDRODB metadata compliance.""" - from disdrodb.api.metadata import read_station_metadata - from disdrodb.l0.l0_reader import _check_metadata_reader - - metadata = read_station_metadata( - base_dir=base_dir, - product_level="RAW", - data_source=data_source, - campaign_name=campaign_name, - station_name=station_name, - ) - _check_metadata_keys(metadata) - _check_metadata_values(metadata) - _check_metadata_campaign_name(metadata, expected_name=campaign_name) - _check_metadata_data_source(metadata, expected_name=data_source) - _check_metadata_station_name(metadata, expected_name=station_name) - _check_metadata_sensor_name(metadata) - _check_metadata_reader(metadata) - return None - - -####--------------------------------------------------------------------------. -#### Metadata manipulation tools -def remove_invalid_metadata_keys(metadata): - """Remove invalid keys from the metadata dictionary.""" - invalid_keys = get_metadata_invalid_keys(metadata) - for k in invalid_keys: - _ = metadata.pop(k) - return metadata - - -def add_missing_metadata_keys(metadata): - """Add missing keys to the metadata dictionary.""" - missing_keys = get_metadata_missing_keys(metadata) - for k in missing_keys: - metadata[k] = "" - return metadata diff --git a/disdrodb/metadata/__init__.py b/disdrodb/metadata/__init__.py new file mode 100644 index 00000000..2a950c38 --- /dev/null +++ b/disdrodb/metadata/__init__.py @@ -0,0 +1,4 @@ +from disdrodb.metadata.info import get_archive_metadata_key_value +from disdrodb.metadata.io import get_list_metadata, read_station_metadata + +__all__ = [read_station_metadata, get_list_metadata, get_archive_metadata_key_value] diff --git a/disdrodb/l0/check_metadata.py b/disdrodb/metadata/check_metadata.py similarity index 76% rename from disdrodb/l0/check_metadata.py rename to disdrodb/metadata/check_metadata.py index a6a42286..f920fb46 100644 --- a/disdrodb/l0/check_metadata.py +++ b/disdrodb/metadata/check_metadata.py @@ -21,23 +21,141 @@ import os from typing import Union -from disdrodb.api.metadata import get_list_metadata, read_station_metadata +import numpy as np + from disdrodb.configs import get_base_dir from disdrodb.l0.io import ( _infer_campaign_name_from_path, _infer_data_source_from_path, ) from disdrodb.l0.l0_reader import _check_metadata_reader -from disdrodb.l0.metadata import ( - _check_metadata_campaign_name, - _check_metadata_data_source, - _check_metadata_keys, - _check_metadata_sensor_name, - _check_metadata_station_name, - check_metadata_compliance, -) +from disdrodb.metadata.io import get_list_metadata, read_station_metadata +from disdrodb.metadata.standards import get_valid_metadata_keys from disdrodb.utils.yaml import read_yaml +#### --------------------------------------------------------------------------. +#### Check Station Metadata + + +def get_metadata_missing_keys(metadata): + """Return the DISDRODB metadata keys which are missing.""" + keys = list(metadata.keys()) + valid_keys = get_valid_metadata_keys() + # Identify missing keys + idx_missing_keys = np.where(np.isin(valid_keys, keys, invert=True))[0] + missing_keys = np.array(valid_keys)[idx_missing_keys].tolist() + return missing_keys + + +def get_metadata_invalid_keys(metadata): + """Return the DISDRODB metadata keys which are not valid.""" + keys = list(metadata.keys()) + valid_keys = get_valid_metadata_keys() + # Identify invalid keys + idx_invalid_keys = np.where(np.isin(keys, valid_keys, invert=True))[0] + invalid_keys = np.array(keys)[idx_invalid_keys].tolist() + return invalid_keys + + +def _check_metadata_keys(metadata): + """Check validity of metadata keys.""" + # Check all keys are valid + invalid_keys = get_metadata_invalid_keys(metadata) + if len(invalid_keys) > 0: + raise ValueError(f"Invalid metadata keys: {invalid_keys}") + # Check no keys are missing + missing_keys = get_metadata_missing_keys(metadata) + if len(missing_keys) > 0: + raise ValueError(f"Missing metadata keys: {missing_keys}") + return None + + +def _check_metadata_values(metadata): + """Check validity of metadata values + + If null is specified in the YAML files (or None in the dict) raise error. + """ + for key, value in metadata.items(): + if isinstance(value, type(None)): + raise ValueError(f"The metadata key {key} has None or null value. Use '' instead.") + return None + + +def _check_metadata_campaign_name(metadata, expected_name): + """Check metadata campaign_name.""" + if "campaign_name" not in metadata: + raise ValueError("The metadata file does not contain the 'campaign_name' key.") + campaign_name = metadata["campaign_name"] + if campaign_name == "": + raise ValueError("The 'campaign_name' key in the metadata is empty.") + if campaign_name != expected_name: + raise ValueError( + f"The campaign_name in the metadata is '{campaign_name}' but the campaign directory is '{expected_name}'" + ) + return None + + +def _check_metadata_data_source(metadata, expected_name): + """Check metadata data_source.""" + if "data_source" not in metadata: + raise ValueError("The metadata file does not contain the 'data_source' key.") + data_source = metadata["data_source"] + if data_source == "": + raise ValueError("The 'data_source' key in the metadata is empty.") + if data_source != expected_name: + raise ValueError( + f"The data_source in the metadata is '{data_source}' but the data_source directory is '{expected_name}'" + ) + return None + + +def _check_metadata_station_name(metadata, expected_name): + """Check metadata station name. + + This function does not check that data are available for the station!""" + if "station_name" not in metadata: + raise ValueError("The metadata file does not contain the 'station_name' key.") + station_name = metadata["station_name"] + if not isinstance(station_name, str): + raise ValueError("The 'station_name' key in the metadata is not defined as a string!") + if station_name == "": + raise ValueError("The 'station_name' key in the metadata is empty.") + if station_name != expected_name: + raise ValueError( + f"The station_name in the metadata is '{station_name}' but the metadata file is named '{expected_name}.yml'" + ) + return None + + +def _check_metadata_sensor_name(metadata): + from disdrodb.api.checks import check_sensor_name + + sensor_name = metadata["sensor_name"] + check_sensor_name(sensor_name=sensor_name) + return None + + +def check_metadata_compliance(data_source, campaign_name, station_name, base_dir=None): + """Check DISDRODB metadata compliance.""" + from disdrodb.l0.l0_reader import _check_metadata_reader + + metadata = read_station_metadata( + base_dir=base_dir, + product_level="RAW", + data_source=data_source, + campaign_name=campaign_name, + station_name=station_name, + ) + _check_metadata_keys(metadata) + _check_metadata_values(metadata) + _check_metadata_campaign_name(metadata, expected_name=campaign_name) + _check_metadata_data_source(metadata, expected_name=data_source) + _check_metadata_station_name(metadata, expected_name=station_name) + _check_metadata_sensor_name(metadata) + _check_metadata_reader(metadata) + return None + + #### --------------------------------------------------------------------------. #### Metadata Archive Missing Information @@ -125,54 +243,8 @@ def identify_empty_metadata_keys(metadata_fpaths: list, keys: Union[str, list]) return None -def get_archive_metadata_key_value(key: str, return_tuple: bool = True, base_dir: str = None): - """Return the values of a metadata key for all the archive. - - Parameters - ---------- - base_dir : str - Path to the disdrodb directory. - key : str - Metadata key. - return_tuple : bool, optional - If True, returns a tuple of values with station, campaign and data source name. - If False, returns a list of values without station, campaign and data source name. - The default is True. - base_dir : str (optional) - Base directory of DISDRODB. Format: <...>/DISDRODB - If None (the default), the disdrodb config variable 'dir' is used. - - Returns - ------- - list or tuple - List or tuple of values of the metadata key. - """ - base_dir = get_base_dir(base_dir) - list_metadata_paths = get_list_metadata( - base_dir=base_dir, data_sources=None, campaign_names=None, station_names=None, with_stations_data=False - ) - list_info = [] - for fpath in list_metadata_paths: - data_source = _infer_data_source_from_path(fpath) - campaign_name = _infer_campaign_name_from_path(fpath) - station_name = os.path.basename(fpath).replace(".yml", "") - metadata = read_station_metadata( - base_dir=base_dir, - product_level="RAW", - data_source=data_source, - campaign_name=campaign_name, - station_name=station_name, - ) - value = metadata[key] - info = (data_source, campaign_name, station_name, value) - list_info.append(info) - if not return_tuple: - list_info = [info[3] for info in list_info] - return list_info - - #### --------------------------------------------------------------------------. -#### Metadata Archive Checks +#### Check Metadata Archive def check_archive_metadata_keys(base_dir: str = None) -> bool: diff --git a/disdrodb/metadata/info.py b/disdrodb/metadata/info.py new file mode 100644 index 00000000..3967326d --- /dev/null +++ b/disdrodb/metadata/info.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 + +# -----------------------------------------------------------------------------. +# Copyright (c) 2021-2023 DISDRODB developers +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# -----------------------------------------------------------------------------. +"""Test Metadata Info Extraction.""" +import os + +from disdrodb.configs import get_base_dir +from disdrodb.l0.io import ( + _infer_campaign_name_from_path, + _infer_data_source_from_path, +) +from disdrodb.metadata.io import get_list_metadata, read_station_metadata + + +def get_archive_metadata_key_value(key: str, return_tuple: bool = True, base_dir: str = None): + """Return the values of a metadata key for all the archive. + + Parameters + ---------- + base_dir : str + Path to the disdrodb directory. + key : str + Metadata key. + return_tuple : bool, optional + If True, returns a tuple of values with station, campaign and data source name. + If False, returns a list of values without station, campaign and data source name. + The default is True. + base_dir : str (optional) + Base directory of DISDRODB. Format: <...>/DISDRODB + If None (the default), the disdrodb config variable 'dir' is used. + + Returns + ------- + list or tuple + List or tuple of values of the metadata key. + """ + base_dir = get_base_dir(base_dir) + list_metadata_paths = get_list_metadata( + base_dir=base_dir, data_sources=None, campaign_names=None, station_names=None, with_stations_data=False + ) + list_info = [] + for fpath in list_metadata_paths: + data_source = _infer_data_source_from_path(fpath) + campaign_name = _infer_campaign_name_from_path(fpath) + station_name = os.path.basename(fpath).replace(".yml", "") + metadata = read_station_metadata( + base_dir=base_dir, + product_level="RAW", + data_source=data_source, + campaign_name=campaign_name, + station_name=station_name, + ) + value = metadata[key] + info = (data_source, campaign_name, station_name, value) + list_info.append(info) + if not return_tuple: + list_info = [info[3] for info in list_info] + return list_info diff --git a/disdrodb/api/metadata.py b/disdrodb/metadata/io.py similarity index 96% rename from disdrodb/api/metadata.py rename to disdrodb/metadata/io.py index d8a0995f..78120043 100644 --- a/disdrodb/api/metadata.py +++ b/disdrodb/metadata/io.py @@ -21,10 +21,9 @@ import glob import os -import yaml - from disdrodb.api.io import get_disdrodb_path from disdrodb.configs import get_base_dir +from disdrodb.utils.yaml import read_yaml def read_station_metadata(product_level, data_source, campaign_name, station_name, base_dir=None): @@ -60,16 +59,14 @@ def read_station_metadata(product_level, data_source, campaign_name, station_nam check_exist=True, ) # Define metadata filepath - fpath = os.path.join(campaign_dir, "metadata", f"{station_name}.yml") + metadata_fpath = os.path.join(campaign_dir, "metadata", f"{station_name}.yml") # Check the file exists - if not os.path.exists(fpath): - raise ValueError(f"The metadata file for {station_name} at {fpath} does not exists.") + if not os.path.exists(metadata_fpath): + raise ValueError(f"The metadata file for {station_name} at {metadata_fpath} does not exists.") - # Read the metadata file - with open(fpath) as f: - dictionary = yaml.safe_load(f) - return dictionary + metadata_dict = read_yaml(metadata_fpath) + return metadata_dict def get_list_metadata( diff --git a/disdrodb/metadata/manipulation.py b/disdrodb/metadata/manipulation.py new file mode 100644 index 00000000..c5b42d7e --- /dev/null +++ b/disdrodb/metadata/manipulation.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 + +# -----------------------------------------------------------------------------. +# Copyright (c) 2021-2023 DISDRODB developers +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# -----------------------------------------------------------------------------. +"""Metadata Manipulation Tools.""" + +from disdrodb.metadata.check_metadata import get_metadata_invalid_keys, get_metadata_missing_keys +from disdrodb.metadata.standards import get_valid_metadata_keys + + +def remove_invalid_metadata_keys(metadata): + """Remove invalid keys from the metadata dictionary.""" + invalid_keys = get_metadata_invalid_keys(metadata) + for k in invalid_keys: + _ = metadata.pop(k) + return metadata + + +def add_missing_metadata_keys(metadata): + """Add missing keys to the metadata dictionary.""" + missing_keys = get_metadata_missing_keys(metadata) + for k in missing_keys: + metadata[k] = "" + return metadata + + +def sort_metadata_dictionary(metadata): + """Sort the keys of the metadata dictionary by valid_metadata_keys list order.""" + list_metadata_keys = get_valid_metadata_keys() + metadata = {k: metadata[k] for k in list_metadata_keys} + return metadata diff --git a/disdrodb/metadata/scripts/disdrodb_check_metadata_archive.py b/disdrodb/metadata/scripts/disdrodb_check_metadata_archive.py new file mode 100644 index 00000000..e852be1b --- /dev/null +++ b/disdrodb/metadata/scripts/disdrodb_check_metadata_archive.py @@ -0,0 +1,26 @@ +# -----------------------------------------------------------------------------. +# Copyright (c) 2021-2023 DISDRODB developers +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# -----------------------------------------------------------------------------. +"""Wrapper to check DISDRODB Metadata Archive Compliance from terminal.""" +import click + + +@click.command() +@click.option("--base_dir", type=str, show_default=True, default=None, help="DISDRODB root directory") +def disdrodb_check_metadata_archive(base_dir=None): + from disdrodb.metadata.check_metadata import check_archive_metadata_compliance + + check_archive_metadata_compliance(base_dir=base_dir) diff --git a/disdrodb/metadata/standards.py b/disdrodb/metadata/standards.py new file mode 100644 index 00000000..f0995b0c --- /dev/null +++ b/disdrodb/metadata/standards.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 + +# -----------------------------------------------------------------------------. +# Copyright (c) 2021-2023 DISDRODB developers +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# -----------------------------------------------------------------------------. +"""Define DISDRODB Metadata Standards.""" + + +def get_valid_metadata_keys() -> list: + """Get DISDRODB valid metadata list. + + Returns + ------- + list + List of valid metadata keys + """ + # NOTE: When updating one of these keys, one need to update the yaml in/at: + # - the disdrodb-data repository + # - disdrodb/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/*.yml + # - disdrodb/tests/data/check_readers/DISDRODB/Raw/*/*/metadata/10.yml + # - disdrodb/tests/data/test_dir_creation/metadata/123.yml + # - disdrodb/tests/data/test_dir_creation/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml + # - /disdrodb/tests/data/test_dir_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml + list_attrs = [ + ## Mandatory fields + "data_source", + "campaign_name", + "station_name", + "sensor_name", + "reader", + "raw_data_format", # 'txt', 'netcdf' + "platform_type", # 'fixed', 'mobile' + ## DISDRODB keys + "disdrodb_data_url", + ## Source + "source", + "source_convention", + "source_processing_date", + ## Description + "title", + "description", + "project_name", + "keywords", + "summary", + "history", + "comment", + "station_id", + "location", + "country", + "continent", + ## Deployment Info + "latitude", # in degrees North + "longitude", # in degrees East + "altitude", # in meter above sea level + "deployment_status", # 'ended', 'ongoing' + "deployment mode", # 'land', 'ship', 'truck', 'cable' + "platform_protection", # 'shielded', 'unshielded' + "platform_orientation", # [0-360] from N (clockwise) + ## Sensor info + "sensor_long_name", + "sensor_manufacturer", + "sensor_wavelength", + "sensor_serial_number", + "firmware_iop", + "firmware_dsp", + "firmware_version", + "sensor_beam_length", + "sensor_beam_width", + "sensor_nominal_width", # ? + ## effective_measurement_area ? # 0.54 m^2 + "measurement_interval", # sampling_interval ? [in seconds] + "calibration_sensitivity", + "calibration_certification_date", + "calibration_certification_url", + ## Attribution + "contributors", + "authors", + "authors_url", + "contact", + "contact_information", + "acknowledgement", # acknowledgements? + "references", + "documentation", + "website", + "institution", + "source_repository", + "license", + "doi", + ] + return list_attrs diff --git a/disdrodb/tests/conftest.py b/disdrodb/tests/conftest.py index 46de3101..d8e92ddd 100644 --- a/disdrodb/tests/conftest.py +++ b/disdrodb/tests/conftest.py @@ -8,9 +8,9 @@ import shutil import pytest -import yaml from disdrodb import __root_path__ +from disdrodb.utils.yaml import write_yaml @pytest.fixture @@ -41,8 +41,7 @@ def create_test_config_files(request): os.makedirs(test_folder) test_file_path = os.path.join(test_folder, file_name) - with open(test_file_path, "w") as f: - yaml.dump(dictionary, f) + write_yaml(dictionary, test_file_path) yield os.remove(test_file_path) diff --git a/disdrodb/tests/data/check_readers/DISDRODB/Raw/EPFL/PARSIVEL_2007/metadata/10.yml b/disdrodb/tests/data/check_readers/DISDRODB/Raw/EPFL/PARSIVEL_2007/metadata/10.yml index c3a2c57c..ccb3bea0 100644 --- a/disdrodb/tests/data/check_readers/DISDRODB/Raw/EPFL/PARSIVEL_2007/metadata/10.yml +++ b/disdrodb/tests/data/check_readers/DISDRODB/Raw/EPFL/PARSIVEL_2007/metadata/10.yml @@ -5,6 +5,7 @@ sensor_name: OTT_Parsivel reader: EPFL/PARSIVEL_2007 raw_data_format: txt platform_type: fixed +disdrodb_data_url: '' source: '' source_convention: '' source_processing_date: '' diff --git a/disdrodb/tests/data/test_folders_files_creation/.gitkeep b/disdrodb/tests/data/test_dir_creation/.gitkeep similarity index 100% rename from disdrodb/tests/data/test_folders_files_creation/.gitkeep rename to disdrodb/tests/data/test_dir_creation/.gitkeep diff --git a/disdrodb/tests/data/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/L0A/STATION_NAME/CAMPAIGN_NAME_sSTATIONID_SUFFIX.parquet b/disdrodb/tests/data/test_dir_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/L0A/STATION_NAME/CAMPAIGN_NAME_sSTATIONID_SUFFIX.parquet similarity index 100% rename from disdrodb/tests/data/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/L0A/STATION_NAME/CAMPAIGN_NAME_sSTATIONID_SUFFIX.parquet rename to disdrodb/tests/data/test_dir_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/L0A/STATION_NAME/CAMPAIGN_NAME_sSTATIONID_SUFFIX.parquet diff --git a/disdrodb/tests/data/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/L0B/STATION_NAME/DISDRODB.L0B.Raw.CAMPAIGN_NAME.STATIONID.sensor-name.s20190326000000.e20210208000000.dev.nc b/disdrodb/tests/data/test_dir_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/L0B/STATION_NAME/DISDRODB.L0B.Raw.CAMPAIGN_NAME.STATIONID.sensor-name.s20190326000000.e20210208000000.dev.nc similarity index 100% rename from disdrodb/tests/data/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/L0B/STATION_NAME/DISDRODB.L0B.Raw.CAMPAIGN_NAME.STATIONID.sensor-name.s20190326000000.e20210208000000.dev.nc rename to disdrodb/tests/data/test_dir_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/L0B/STATION_NAME/DISDRODB.L0B.Raw.CAMPAIGN_NAME.STATIONID.sensor-name.s20190326000000.e20210208000000.dev.nc diff --git a/disdrodb/tests/data/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml b/disdrodb/tests/data/test_dir_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml similarity index 100% rename from disdrodb/tests/data/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml rename to disdrodb/tests/data/test_dir_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml diff --git a/disdrodb/tests/data/test_folders_files_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/data/STATION_NAME/data.csv b/disdrodb/tests/data/test_dir_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/data/STATION_NAME/data.csv similarity index 100% rename from disdrodb/tests/data/test_folders_files_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/data/STATION_NAME/data.csv rename to disdrodb/tests/data/test_dir_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/data/STATION_NAME/data.csv diff --git a/disdrodb/tests/data/test_folders_files_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/issue/STATION_NAME.yml b/disdrodb/tests/data/test_dir_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/issue/STATION_NAME.yml similarity index 100% rename from disdrodb/tests/data/test_folders_files_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/issue/STATION_NAME.yml rename to disdrodb/tests/data/test_dir_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/issue/STATION_NAME.yml diff --git a/disdrodb/tests/data/test_folders_files_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml b/disdrodb/tests/data/test_dir_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml similarity index 97% rename from disdrodb/tests/data/test_folders_files_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml rename to disdrodb/tests/data/test_dir_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml index 382e7f2f..5193b4e8 100644 --- a/disdrodb/tests/data/test_folders_files_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml +++ b/disdrodb/tests/data/test_dir_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml @@ -5,6 +5,7 @@ sensor_name: 'OTT_Parsivel' reader: 'EPFL/EPFL_2009' raw_data_format: txt platform_type: fixed +disdrodb_data_url: '' source: '' source_convention: '' source_processing_date: '' diff --git a/disdrodb/tests/test_data_transfer/test_download_data.py b/disdrodb/tests/test_data_transfer/test_download_data.py index c8b1d259..ab0deec2 100644 --- a/disdrodb/tests/test_data_transfer/test_download_data.py +++ b/disdrodb/tests/test_data_transfer/test_download_data.py @@ -1,56 +1,32 @@ -#!/usr/bin/env python3 - -# -----------------------------------------------------------------------------. -# Copyright (c) 2021-2023 DISDRODB developers -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# -----------------------------------------------------------------------------. -"""Test DISDRODB download utility.""" +# #!/usr/bin/env python3 + +# # -----------------------------------------------------------------------------. +# # Copyright (c) 2021-2023 DISDRODB developers +# # +# # This program is free software: you can redistribute it and/or modify +# # it under the terms of the GNU General Public License as published by +# # the Free Software Foundation, either version 3 of the License, or +# # (at your option) any later version. +# # +# # This program is distributed in the hope that it will be useful, +# # but WITHOUT ANY WARRANTY; without even the implied warranty of +# # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# # GNU General Public License for more details. +# # +# # You should have received a copy of the GNU General Public License +# # along with this program. If not, see . +# # -----------------------------------------------------------------------------. +# """Test DISDRODB download utility.""" import os import pytest -import yaml from disdrodb.data_transfer.download_data import ( _download_file_from_url, - _download_station_data, _is_empty_directory, ) - - -def test_download_file_from_url(tmp_path): - # Test download case when empty directory - url = "https://raw.githubusercontent.com/ltelab/disdrodb/main/README.md" - _download_file_from_url(url, tmp_path, force=False) - filename = os.path.basename(url) # README.md - filepath = os.path.join(tmp_path, filename) - assert os.path.isfile(filepath) is True - - # Test download case when directory is not empty and force=False --> avoid download - url = "https://raw.githubusercontent.com/ltelab/disdrodb/main/CODE_OF_CONDUCT.md" - _download_file_from_url(url, tmp_path, force=False) - filename = os.path.basename(url) # README.md - filepath = os.path.join(tmp_path, filename) - assert not os.path.isfile(filepath) - - # Test download case when directory is not empty and force=True --> it download - url = "https://raw.githubusercontent.com/ltelab/disdrodb/main/CODE_OF_CONDUCT.md" - _download_file_from_url(url, tmp_path, force=True) - filename = os.path.basename(url) # README.md - filepath = os.path.join(tmp_path, filename) - assert os.path.isfile(filepath) +from disdrodb.utils.yaml import write_yaml class TestIsEmptyDirectory: @@ -67,13 +43,40 @@ def test_non_directory_path(self, tmp_path): def test_empty_directory(self, tmp_path): # `tmp_path` is a pytest fixture that provides a temporary directory unique to the test invocation - assert _is_empty_directory(tmp_path) is True + assert _is_empty_directory(tmp_path) def test_non_empty_directory(self, tmp_path): # Create a temporary file inside the temporary directory file_path = tmp_path / "test_file.txt" file_path.write_text("This is a test file.") - assert _is_empty_directory(tmp_path) is False + assert not _is_empty_directory(tmp_path) + + +def test_download_file_from_url(tmp_path): + # DUBUG + # tmp_path = "/tmp/empty_2" + # os.makedirs(tmp_path) + + # Test download case when empty directory + url = "https://httpbin.org/stream-bytes/1024" + _download_file_from_url(url, tmp_path, force=False) + filename = os.path.basename(url) # README.md + filepath = os.path.join(tmp_path, filename) + assert os.path.isfile(filepath) + + # Test download case when directory is not empty and force=False --> avoid download + url = "https://httpbin.org/stream-bytes/1025" + _download_file_from_url(url, tmp_path, force=False) + filename = os.path.basename(url) # README.md + filepath = os.path.join(tmp_path, filename) + assert not os.path.isfile(filepath) + + # Test download case when directory is not empty and force=True --> it download + url = "https://httpbin.org/stream-bytes/1026" + _download_file_from_url(url, tmp_path, force=True) + filename = os.path.basename(url) # README.md + filepath = os.path.join(tmp_path, filename) + assert os.path.isfile(filepath) def create_fake_metadata_file( @@ -86,31 +89,36 @@ def create_fake_metadata_file( metadata_dir_path = tmp_path / "DISDRODB" / "Raw" / data_source / campaign_name / "metadata" metadata_dir_path.mkdir(parents=True) metadata_fpath = os.path.join(metadata_dir_path, f"{station_name}.yml") - # create a fake yaml file in temp folder - with open(metadata_fpath, "w") as f: - yaml_dict = {} - yaml_dict["station_name"] = station_name - if with_url: - disdro_repo_path = "https://raw.githubusercontent.com/ltelab/disdrodb/main/" - test_data_path = "disdrodb/tests/data/test_data_download/station_files.zip" - disdrodb_data_url = disdro_repo_path + test_data_path - yaml_dict["disdrodb_data_url"] = disdrodb_data_url - - yaml.dump(yaml_dict, f) + # Define fake metadata dictionary + yaml_dict = {} + yaml_dict["station_name"] = station_name + if with_url: + raw_github_path = "https://raw.githubusercontent.com" + disdro_repo_path = f"{raw_github_path}/ltelab/disdrodb/main" + test_data_path = "disdrodb/tests/data/test_data_download/station_files.zip" + disdrodb_data_url = f"{disdro_repo_path}/{test_data_path}" + yaml_dict["disdrodb_data_url"] = disdrodb_data_url + # Write fake yaml file in temp folder + write_yaml(yaml_dict, metadata_fpath) assert os.path.exists(metadata_fpath) return metadata_fpath -def test_download_station_data(tmp_path): - station_name = "station_name" - metadata_fpath = create_fake_metadata_file(tmp_path, station_name=station_name, with_url=True) - station_dir_path = metadata_fpath.replace("metadata", "data").replace(".yml", "") - _download_station_data(metadata_fpath=metadata_fpath, force=True) - # Assert files in the zip file have been unzipped - assert os.path.isfile(os.path.join(station_dir_path, "station_file1.txt")) - # Assert inner zipped files are not unzipped ! - assert os.path.isfile(os.path.join(station_dir_path, "station_file2.zip")) - # Assert inner directories are there - assert os.path.isdir(os.path.join(station_dir_path, "2020")) - # Assert zip file has been removed - assert not os.path.exists(os.path.join(station_dir_path, "station_files.zip")) +# def test_download_station_data(tmp_path): +# # DUBUG +# # from pathlib import Path +# # tmp_path = Path("/tmp/empty_3") +# # os.makedirs(tmp_path) + +# station_name = "station_name" +# metadata_fpath = create_fake_metadata_file(tmp_path, station_name=station_name, with_url=True) +# station_dir_path = metadata_fpath.replace("metadata", "data").replace(".yml", "") +# _download_station_data(metadata_fpath=metadata_fpath, force=True) +# # Assert files in the zip file have been unzipped +# assert os.path.isfile(os.path.join(station_dir_path, "station_file1.txt")) +# # Assert inner zipped files are not unzipped ! +# assert os.path.isfile(os.path.join(station_dir_path, "station_file2.zip")) +# # Assert inner directories are there +# assert os.path.isdir(os.path.join(station_dir_path, "2020")) +# # Assert zip file has been removed +# assert not os.path.exists(os.path.join(station_dir_path, "station_files.zip")) diff --git a/disdrodb/tests/test_data_transfer/test_upload_data.py b/disdrodb/tests/test_data_transfer/test_upload_data.py index ee70e2bf..552bd51d 100644 --- a/disdrodb/tests/test_data_transfer/test_upload_data.py +++ b/disdrodb/tests/test_data_transfer/test_upload_data.py @@ -26,7 +26,8 @@ import pytest from disdrodb.data_transfer.upload_data import upload_disdrodb_archives -from disdrodb.utils.yaml import read_yaml, write_yaml +from disdrodb.metadata import read_station_metadata +from disdrodb.utils.yaml import write_yaml from disdrodb.utils.zenodo import _create_zenodo_deposition @@ -53,11 +54,6 @@ def create_fake_data_dir(base_dir, data_source, campaign_name, station_name): return data_dir -def get_metadata_dict(base_dir, data_source, campaign_name, station_name): - metadata_fpath = base_dir / "Raw" / data_source / campaign_name / "metadata" / f"{station_name}.yml" - return read_yaml(metadata_fpath) - - def mock_zenodo_api(requests_mock): """Mock Zenodo API.""" @@ -98,11 +94,23 @@ def test_upload_to_zenodo(tmp_path, requests_mock): upload_disdrodb_archives(platform="sandbox.zenodo", base_dir=str(base_dir)) # Check metadata files (1st one should not have changed) - metadata_dict1 = get_metadata_dict(base_dir, data_source, campaign_name, station_name1) + metadata_dict1 = read_station_metadata( + base_dir=base_dir, + product_level="RAW", + data_source=data_source, + campaign_name=campaign_name, + station_name=station_name1, + ) new_station_url1 = metadata_dict1["disdrodb_data_url"] assert new_station_url1 == station_url1 - metadata_dict2 = get_metadata_dict(base_dir, data_source, campaign_name, station_name2) + metadata_dict2 = read_station_metadata( + base_dir=base_dir, + product_level="RAW", + data_source=data_source, + campaign_name=campaign_name, + station_name=station_name2, + ) new_station_url2 = metadata_dict2["disdrodb_data_url"] list_new_station_url2 = new_station_url2.split(os.path.sep) diff --git a/disdrodb/tests/test_l0/test_io.py b/disdrodb/tests/test_l0/test_io.py index 68f90dee..42c54cba 100644 --- a/disdrodb/tests/test_l0/test_io.py +++ b/disdrodb/tests/test_l0/test_io.py @@ -26,10 +26,10 @@ import pandas as pd import pytest import xarray as xr -import yaml from disdrodb import __root_path__ from disdrodb.l0 import io +from disdrodb.utils.yaml import write_yaml TEST_DATA_DIR = os.path.join(__root_path__, "disdrodb", "tests", "data") @@ -42,9 +42,7 @@ def create_fake_metadata_file( subfolder_path.mkdir(parents=True) file_path = os.path.join(subfolder_path, yaml_file_name) # create a fake yaml file in temp folder - with open(file_path, "w") as f: - yaml.dump(yaml_dict, f) - + write_yaml(yaml_dict, file_path) assert os.path.exists(file_path) return file_path @@ -90,7 +88,7 @@ def test_create_initial_directory_structure(tmp_path, mocker): processed_dir = os.path.join(tmp_path, "DISDRODB", "Processed", campaign_name) subfolder_path = tmp_path / "DISDRODB" / "Processed" / campaign_name subfolder_path.mkdir(parents=True) - mocker.patch("disdrodb.l0.metadata.check_metadata_compliance", return_value=None) + mocker.patch("disdrodb.metadata.check_metadata.check_metadata_compliance", return_value=None) io.create_initial_directory_structure( raw_dir=raw_dir, processed_dir=processed_dir, station_name=station_name, force=force @@ -290,7 +288,7 @@ def test_get_l0b_dir(path_process_dir): def test_get_l0a_fpath(): """ Test the naming and the path of the L0A file - Note that this test needs "/data/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/ + Note that this test needs "/data/test_dir_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/ metadata/STATION_NAME.yml" """ from disdrodb.l0.standards import PRODUCT_VERSION @@ -307,7 +305,7 @@ def test_get_l0a_fpath(): # Set paths path_campaign_name = os.path.join( TEST_DATA_DIR, - "test_folders_files_structure", + "test_dir_structure", "DISDRODB", "Processed", data_source, @@ -331,7 +329,7 @@ def test_get_l0a_fpath(): def test_get_l0b_fpath(): """ Test the naming and the path of the L0B file - Note that this test needs "/data/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/ + Note that this test needs "/data/test_dir_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/ metadata/STATION_NAME.yml" """ from disdrodb.l0.standards import PRODUCT_VERSION @@ -348,7 +346,7 @@ def test_get_l0b_fpath(): # Set paths path_campaign_name = os.path.join( TEST_DATA_DIR, - "test_folders_files_structure", + "test_dir_structure", "DISDRODB", "Processed", data_source, @@ -418,7 +416,7 @@ def test_get_raw_file_list(): ####--------------------------------------------------------------------------. folder_name = "folder_creation_deletion_test" -path_file_temp = os.path.join(TEST_DATA_DIR, "test_folders_files_creation", folder_name) +path_file_temp = os.path.join(TEST_DATA_DIR, "test_dir_creation", folder_name) def test_create_directory(tmp_path): @@ -468,7 +466,7 @@ def test_check_raw_dir(): # Set paths raw_dir = os.path.join( TEST_DATA_DIR, - "test_folders_files_structure", + "test_dir_structure", "DISDRODB", "Raw", data_source, @@ -483,7 +481,7 @@ def test_check_campaign_name(): data_source = "DATA_SOURCE" path_raw = os.path.join( TEST_DATA_DIR, - "test_folders_files_structure", + "test_dir_structure", "DISDRODB", "Raw", data_source, @@ -491,7 +489,7 @@ def test_check_campaign_name(): ) path_process = os.path.join( TEST_DATA_DIR, - "test_folders_files_creation", + "test_dir_creation", "DISDRODB", "Processed", data_source, @@ -507,7 +505,7 @@ def test_copy_station_metadata(): station_name = "STATION_NAME" raw_dir = os.path.join( TEST_DATA_DIR, - "test_folders_files_structure", + "test_dir_structure", "DISDRODB", "Raw", data_source, @@ -515,7 +513,7 @@ def test_copy_station_metadata(): ) processed_dir = os.path.join( TEST_DATA_DIR, - "test_folders_files_creation", + "test_dir_creation", "DISDRODB", "Processed", data_source, @@ -553,7 +551,7 @@ def test_copy_station_metadata(): # raw_dir = os.path.join( # TEST_DATA_DIR, -# "test_folders_files_structure", +# "test_dir_structure", # "DISDRODB", # "Raw", # data_source, @@ -561,7 +559,7 @@ def test_copy_station_metadata(): # ) # processed_dir = os.path.join( # TEST_DATA_DIR, -# "test_folders_files_creation", +# "test_dir_creation", # "DISDRODB", # "Processed", # data_source, @@ -602,7 +600,7 @@ def test_copy_station_metadata(): # processed_dir = os.path.join( # TEST_DATA_DIR, -# "test_folders_files_creation", +# "test_dir_creation", # "DISDRODB", # "Processed", # data_source, @@ -639,7 +637,7 @@ def test__read_l0a(): # save dataframe to parquet file path_parquet_file = os.path.join( TEST_DATA_DIR, - "test_folders_files_creation", + "test_dir_creation", "fake_data_sample.parquet", ) df.to_parquet(path_parquet_file, compression="gzip") @@ -662,7 +660,7 @@ def test_read_l0a_dataframe(): # save dataframe to parquet file path_parquet_file = os.path.join( TEST_DATA_DIR, - "test_folders_files_creation", + "test_dir_creation", f"fake_data_sample_{i}.parquet", ) df.to_parquet(path_parquet_file, compression="gzip") diff --git a/disdrodb/tests/test_l0/test_l0_reader.py b/disdrodb/tests/test_l0/test_l0_reader.py index 9170b526..68d889cb 100644 --- a/disdrodb/tests/test_l0/test_l0_reader.py +++ b/disdrodb/tests/test_l0/test_l0_reader.py @@ -22,7 +22,6 @@ import os import pytest -import yaml from disdrodb.l0 import l0_reader from disdrodb.l0.l0_reader import ( @@ -34,6 +33,7 @@ get_reader_from_metadata_reader_key, get_station_reader_function, ) +from disdrodb.utils.yaml import write_yaml # Some test are based on the following reader: DATA_SOURCE = "EPFL" @@ -48,11 +48,8 @@ def create_fake_metadata_file( os.makedirs(subfolder_path, exist_ok=True) file_path = os.path.join(subfolder_path, yaml_file_name) # create a fake yaml file in temp folder - with open(file_path, "w") as f: - yaml.dump(yaml_dict, f) - + write_yaml(yaml_dict, file_path) assert os.path.exists(file_path) - return file_path diff --git a/disdrodb/tests/test_l0/test_l0a_processing.py b/disdrodb/tests/test_l0/test_l0a_processing.py index d23b2c36..4c9a9405 100644 --- a/disdrodb/tests/test_l0/test_l0a_processing.py +++ b/disdrodb/tests/test_l0/test_l0a_processing.py @@ -536,7 +536,7 @@ def test_write_l0a(): # Write parquet file path_parquet_file = os.path.join( TEST_DATA_DIR, - "test_folders_files_creation", + "test_dir_creation", "fake_data_sample.parquet", ) l0a_processing.write_l0a(df, path_parquet_file, True, False) diff --git a/disdrodb/tests/test_l0/test_l0b_concat.py b/disdrodb/tests/test_l0/test_l0b_concat.py index 3bc52425..0c474bd0 100644 --- a/disdrodb/tests/test_l0/test_l0b_concat.py +++ b/disdrodb/tests/test_l0/test_l0b_concat.py @@ -24,9 +24,9 @@ import netCDF4 as nc import numpy as np import xarray as xr -import yaml from disdrodb.l0.l0b_nc_concat import _concatenate_netcdf_files, run_disdrodb_l0b_concat +from disdrodb.utils.yaml import write_yaml def create_dummy_netcdf_file(filename: str, data: tuple): @@ -72,8 +72,7 @@ def create_fake_data_file(tmp_path, data_source, campaign_name, station_name="", assert os.path.exists(metedata_folder_path) file_path = os.path.join(metedata_folder_path, f"{station_name}.yml") - with open(file_path, "w") as f: - yaml.dump({"station_name": station_name}, f) + write_yaml({"station_name": station_name}, file_path) return subfolder_path diff --git a/disdrodb/tests/test_l0/test_metadata.py b/disdrodb/tests/test_l0/test_metadata.py index 59139038..e24cf8e0 100644 --- a/disdrodb/tests/test_l0/test_metadata.py +++ b/disdrodb/tests/test_l0/test_metadata.py @@ -20,15 +20,14 @@ import os -import yaml - from disdrodb import __root_path__ from disdrodb.l0.metadata import ( + _get_default_metadata_dict, create_campaign_default_metadata, - get_default_metadata_dict, read_metadata, write_default_metadata, ) +from disdrodb.utils.yaml import read_yaml TEST_DATA_DIR = os.path.join(__root_path__, "disdrodb", "tests", "data") @@ -70,7 +69,7 @@ def test_create_campaign_default_metadata(tmp_path): def test_get_default_metadata(): - assert isinstance(get_default_metadata_dict(), dict) + assert isinstance(_get_default_metadata_dict(), dict) def create_fake_metadata_folder(tmp_path, data_source="data_source", campaign_name="campaign_name"): @@ -97,11 +96,10 @@ def test_write_default_metadata(tmp_path): assert os.path.exists(fpath) # open it - with open(str(fpath)) as f: - dictionary = yaml.safe_load(f) + dictionary = read_yaml(str(fpath)) # check is the expected dictionary - expected_dict = get_default_metadata_dict() + expected_dict = _get_default_metadata_dict() expected_dict["data_source"] = data_source expected_dict["campaign_name"] = campaign_name expected_dict["station_name"] = station_name @@ -113,7 +111,7 @@ def test_write_default_metadata(tmp_path): def test_read_metadata(): - raw_dir = os.path.join(TEST_DATA_DIR, "test_folders_files_creation") + raw_dir = os.path.join(TEST_DATA_DIR, "test_dir_creation") station_name = "123" metadata_folder_path = os.path.join(raw_dir, "metadata") @@ -127,7 +125,7 @@ def test_read_metadata(): os.remove(metadata_path) # create data - data = get_default_metadata_dict() + data = _get_default_metadata_dict() # create metadata file write_default_metadata(str(metadata_path)) @@ -136,9 +134,3 @@ def test_read_metadata(): function_return = read_metadata(raw_dir, station_name) assert function_return == data - - -def test_check_metadata_compliance(): - # function_return = metadata.check_metadata_compliance() - # function not implemented - assert 1 == 1 diff --git a/disdrodb/tests/test_l0/test_check_metadata.py b/disdrodb/tests/test_metadata/test_check_metadata.py similarity index 82% rename from disdrodb/tests/test_l0/test_check_metadata.py rename to disdrodb/tests/test_metadata/test_check_metadata.py index 355d948f..41dde581 100644 --- a/disdrodb/tests/test_l0/test_check_metadata.py +++ b/disdrodb/tests/test_metadata/test_check_metadata.py @@ -25,8 +25,8 @@ from disdrodb import __root_path__ from disdrodb.api.configs import available_sensor_names -from disdrodb.l0 import metadata -from disdrodb.l0.check_metadata import ( +from disdrodb.l0.l0_reader import available_readers +from disdrodb.metadata.check_metadata import ( check_archive_metadata_campaign_name, check_archive_metadata_compliance, check_archive_metadata_data_source, @@ -36,12 +36,11 @@ check_archive_metadata_sensor_name, check_archive_metadata_station_name, check_metadata_geolocation, - get_archive_metadata_key_value, identify_empty_metadata_keys, identify_missing_metadata_coords, ) -from disdrodb.l0.l0_reader import available_readers -from disdrodb.utils.yaml import read_yaml +from disdrodb.metadata.standards import get_valid_metadata_keys +from disdrodb.utils.yaml import read_yaml, write_yaml TEST_DATA_DIR = os.path.join(__root_path__, "disdrodb", "tests", "data") @@ -100,11 +99,8 @@ def create_fake_metadata_file( subfolder_path.mkdir(parents=True) file_path = os.path.join(subfolder_path, yaml_file_name) # create a fake yaml file in temp folder - with open(file_path, "w") as f: - yaml.dump(yaml_dict, f) - + write_yaml(yaml_dict, file_path) assert os.path.exists(file_path) - return file_path @@ -127,58 +123,11 @@ def test_identify_missing_metadata_keys(tmp_path, capsys): assert not captured.out -def test_get_archive_metadata_key_value(tmp_path): - expected_result = [] - - base_dir = os.path.join(tmp_path, "DISDRODB") - # Test 1 : one config file - yaml_file_name = "station_1.yml" - expected_key = "key1" - expected_value = "value1" - data_source = "data_source" - campaign_name = "campaign_name" - - yaml_dict = {expected_key: expected_value} - create_fake_metadata_file(tmp_path, yaml_file_name, yaml_dict, data_source, campaign_name) - result = get_archive_metadata_key_value(key=expected_key, base_dir=base_dir) - expected_result.append((data_source, campaign_name, os.path.splitext(yaml_file_name)[0], expected_value)) - - assert sorted(result) == sorted(expected_result) - - # Test 2 : two config files - yaml_file_name = "station_2.yml" - expected_key = "key1" - expected_value = "value1" - data_source = "data_source" - campaign_name = "campaign_name" - - yaml_dict = {expected_key: expected_value} - create_fake_metadata_file(tmp_path, yaml_file_name, yaml_dict, data_source, campaign_name) - result = get_archive_metadata_key_value(key=expected_key, base_dir=base_dir) - expected_result.append((data_source, campaign_name, os.path.splitext(yaml_file_name)[0], expected_value)) - - assert sorted(result) == sorted(expected_result) - - # Test 3: test tuple - yaml_file_name = "station_3.yml" - expected_key = "key1" - expected_value = "value1" - data_source = "data_source" - campaign_name = "campaign_name" - yaml_dict = {expected_key: expected_value} - create_fake_metadata_file(tmp_path, yaml_file_name, yaml_dict, data_source, campaign_name) - result = get_archive_metadata_key_value(key=expected_key, base_dir=base_dir, return_tuple=False) - expected_result.append((data_source, campaign_name, os.path.splitext(yaml_file_name)[0], expected_value)) - expected_result = [item[3] for item in expected_result] - - assert sorted(result) == sorted(expected_result) - - def test_check_archive_metadata_keys(tmp_path): base_dir = os.path.join(tmp_path, "DISDRODB") # Test 1 : create a correct metadata file # Get the list of valid metadata keys - list_of_valid_metadata_keys = metadata.get_valid_metadata_keys() + list_of_valid_metadata_keys = get_valid_metadata_keys() yaml_file_name = "station_1.yml" yaml_dict = {i: "value1" for i in list_of_valid_metadata_keys} data_source = "data_source" diff --git a/disdrodb/tests/test_metadata/test_metadata_info.py b/disdrodb/tests/test_metadata/test_metadata_info.py new file mode 100644 index 00000000..bd58186e --- /dev/null +++ b/disdrodb/tests/test_metadata/test_metadata_info.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 + +# -----------------------------------------------------------------------------. +# Copyright (c) 2021-2023 DISDRODB developers +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# -----------------------------------------------------------------------------. +"""Test Metadata Info Extraction.""" + +import os + +from disdrodb.metadata.info import get_archive_metadata_key_value +from disdrodb.utils.yaml import write_yaml + + +def create_fake_metadata_file( + tmp_path, yaml_file_name, yaml_dict, data_source="data_source", campaign_name="campaign_name" +): + subfolder_path = tmp_path / "DISDRODB" / "Raw" / data_source / campaign_name / "metadata" + if not os.path.exists(subfolder_path): + subfolder_path.mkdir(parents=True) + file_path = os.path.join(subfolder_path, yaml_file_name) + # create a fake yaml file in temp folder + write_yaml(yaml_dict, file_path) + assert os.path.exists(file_path) + return file_path + + +def test_get_archive_metadata_key_value(tmp_path): + expected_result = [] + + base_dir = os.path.join(tmp_path, "DISDRODB") + # Test 1 : one config file + yaml_file_name = "station_1.yml" + expected_key = "key1" + expected_value = "value1" + data_source = "data_source" + campaign_name = "campaign_name" + + yaml_dict = {expected_key: expected_value} + create_fake_metadata_file(tmp_path, yaml_file_name, yaml_dict, data_source, campaign_name) + result = get_archive_metadata_key_value(key=expected_key, base_dir=base_dir) + expected_result.append((data_source, campaign_name, os.path.splitext(yaml_file_name)[0], expected_value)) + + assert sorted(result) == sorted(expected_result) + + # Test 2 : two config files + yaml_file_name = "station_2.yml" + expected_key = "key1" + expected_value = "value1" + data_source = "data_source" + campaign_name = "campaign_name" + + yaml_dict = {expected_key: expected_value} + create_fake_metadata_file(tmp_path, yaml_file_name, yaml_dict, data_source, campaign_name) + result = get_archive_metadata_key_value(key=expected_key, base_dir=base_dir) + expected_result.append((data_source, campaign_name, os.path.splitext(yaml_file_name)[0], expected_value)) + + assert sorted(result) == sorted(expected_result) + + # Test 3: test tuple + yaml_file_name = "station_3.yml" + expected_key = "key1" + expected_value = "value1" + data_source = "data_source" + campaign_name = "campaign_name" + yaml_dict = {expected_key: expected_value} + create_fake_metadata_file(tmp_path, yaml_file_name, yaml_dict, data_source, campaign_name) + result = get_archive_metadata_key_value(key=expected_key, base_dir=base_dir, return_tuple=False) + expected_result.append((data_source, campaign_name, os.path.splitext(yaml_file_name)[0], expected_value)) + expected_result = [item[3] for item in expected_result] + + assert sorted(result) == sorted(expected_result) diff --git a/disdrodb/tests/test_api/test_api_metadata.py b/disdrodb/tests/test_metadata/test_metadata_io.py similarity index 94% rename from disdrodb/tests/test_api/test_api_metadata.py rename to disdrodb/tests/test_metadata/test_metadata_io.py index 2d4101c8..55f6010c 100644 --- a/disdrodb/tests/test_api/test_api_metadata.py +++ b/disdrodb/tests/test_metadata/test_metadata_io.py @@ -20,9 +20,8 @@ import os -import yaml - -from disdrodb.api.metadata import _get_list_all_metadata, _get_list_metadata_with_data +from disdrodb.metadata.io import _get_list_all_metadata, _get_list_metadata_with_data +from disdrodb.utils.yaml import write_yaml def create_fake_metadata_file( @@ -33,11 +32,8 @@ def create_fake_metadata_file( subfolder_path.mkdir(parents=True) file_path = os.path.join(subfolder_path, yaml_file_name) # create a fake yaml file in temp folder - with open(file_path, "w") as f: - yaml.dump(yaml_dict, f) - + write_yaml(yaml_dict, file_path) assert os.path.exists(file_path) - return file_path diff --git a/docs/source/metadata_archive.rst b/docs/source/metadata_archive.rst index cb96e2e1..0834fc56 100644 --- a/docs/source/metadata_archive.rst +++ b/docs/source/metadata_archive.rst @@ -53,7 +53,7 @@ To update the DISDRODB Metadata Archive follow these steps: .. code:: bash export DISDRODB_BASE_DIR="/disdrodb-data/DISDRODB" - disdrodb_check_metadata_compliance + disdrodb_check_metadata_archive_compliance .. note:: The ``DISDRODB_BASE_DIR`` environment variable has to be specified only if the DISDRODB root directory had not been specified before. diff --git a/pyproject.toml b/pyproject.toml index 602397d4..ba927c49 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ authors = [ ] description = "This package provides tools to homogenize, process, and analyze global disdrometer data." readme = "README.md" -keywords = ["python", "disdrometer"] +keywords = ["python", "disdrometer","parsivel", "drop size distribution"] classifiers = [ "Development Status :: 1 - Planning", "Intended Audience :: Developers", @@ -48,7 +48,7 @@ dev = [ "pydantic", "pytest-cov", "pytest-mock", - "requests-mock" + "requests-mock", ] [tool.setuptools_scm] @@ -73,6 +73,8 @@ run_disdrodb_l0="disdrodb.l0.scripts.run_disdrodb_l0:run_disdrodb_l0" # Data transfer download_disdrodb_archive="disdrodb.data_transfer.scripts.download_disdrodb_archive:download_disdrodb_archive" upload_disdrodb_archive="disdrodb.data_transfer.scripts.upload_disdrodb_archive:upload_disdrodb_archive" +# Metadata archive +disdrodb_check_metadata_archive="disdrodb.metadata.scripts.disdrodb_check_metadata_archive:disdrodb_check_metadata_archive" [tool.ruff] select = ["F", @@ -100,5 +102,6 @@ preview = true [tool.coverage.run] omit = [ - "disdrodb/l0/readers/*" + "disdrodb/l0/readers/*", + "disdrodb/l0/manuals/*", ]