diff --git a/.gitignore b/.gitignore
index af0636f2..1f3f0b7f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -161,6 +161,6 @@ docs\source\reader_preparation.ipynb
# Ignore specific folders
/disdrodb/tests/temp/
-/disdrodb/tests/data/test_folders_files_creation/*
-!disdrodb/tests/data/test_folders_files_creation/.gitkeep
+/disdrodb/tests/data/test_dir_creation/*
+!disdrodb/tests/data/test_dir_creation/.gitkeep
disdrodb-dev
diff --git a/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/station_name_1.yml b/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/station_name_1.yml
index 8bd8bb11..afb01de8 100644
--- a/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/station_name_1.yml
+++ b/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/station_name_1.yml
@@ -6,6 +6,7 @@ reader: EPFL/LOCARNO_2018
raw_data_format: raw
raw_data_type: raw
platform_type: fixed
+disdrodb_data_url: ''
crs: WGS84
proj4_string: +proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs
EPSG: 4326
diff --git a/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/station_name_2.yml b/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/station_name_2.yml
index 90972b16..f476d679 100644
--- a/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/station_name_2.yml
+++ b/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/station_name_2.yml
@@ -6,6 +6,7 @@ reader: EPFL/LOCARNO_2018
raw_data_format: raw
raw_data_type: raw
platform_type: fixed
+disdrodb_data_url: ''
crs: WGS84
proj4_string: +proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs
EPSG: 4326
diff --git a/disdrodb/__init__.py b/disdrodb/__init__.py
index 7fc432bc..4c0a1b2a 100644
--- a/disdrodb/__init__.py
+++ b/disdrodb/__init__.py
@@ -8,9 +8,9 @@
available_data_sources,
available_stations,
)
-from disdrodb.api.metadata import read_station_metadata
from disdrodb.configs import define_disdrodb_configs as define_configs
from disdrodb.docs import open_documentation, open_sensor_documentation
+from disdrodb.metadata import read_station_metadata
__root_path__ = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
diff --git a/disdrodb/api/checks.py b/disdrodb/api/checks.py
index 91fbea30..fae59dae 100644
--- a/disdrodb/api/checks.py
+++ b/disdrodb/api/checks.py
@@ -95,14 +95,3 @@ def check_sensor_name(sensor_name: str, product_level: str = "l0") -> None:
msg = f"{sensor_name} not valid {sensor_name}. Valid values are {sensor_names}."
logger.error(msg)
raise ValueError(msg)
-
-
-def check_product_level(product_level):
- """Check DISDRODB product level validity."""
- if not isinstance(product_level, str):
- raise TypeError("'product_level' must be a string.")
- product_level = product_level.lower()
- valid_product_levels = ["l0"]
- if product_level not in valid_product_levels:
- raise ValueError(f"{product_level} is an invalid 'product_level'. Valid values are: {valid_product_levels}")
- return product_level
diff --git a/disdrodb/api/configs.py b/disdrodb/api/configs.py
index 06343108..3ed10fab 100644
--- a/disdrodb/api/configs.py
+++ b/disdrodb/api/configs.py
@@ -21,12 +21,26 @@
import logging
import os
-from disdrodb.api.checks import check_product_level, check_sensor_name
+from disdrodb.api.checks import check_sensor_name
from disdrodb.utils.yaml import read_yaml
logger = logging.getLogger(__name__)
+def _check_product_level(product_level):
+ """Check DISDRODB product level validity."""
+ # Note: in disdrodb.api.io there is another _check_product_level function !
+ if not isinstance(product_level, str):
+ raise TypeError("'product_level' must be a string.")
+ product_level = product_level.lower()
+ valid_product_levels = ["l0", "l0a", "l0b"]
+ if product_level not in valid_product_levels:
+ raise ValueError(f"{product_level} is an invalid 'product_level'. Valid values are: {valid_product_levels}")
+ if product_level in ["l0a", "l0b"]:
+ product_level = "l0"
+ return product_level
+
+
def _get_config_dir(product_level):
"""Define the config directory path of a given DISDRODB product level."""
from disdrodb import __root_path__
@@ -57,7 +71,7 @@ def get_sensor_configs_dir(sensor_name: str, product_level: str) -> str:
Error if the config directory does not exist.
"""
check_sensor_name(sensor_name, product_level=product_level)
- product_level = check_product_level(product_level)
+ product_level = _check_product_level(product_level)
config_dir_path = _get_config_dir(product_level=product_level)
config_sensor_dir_path = os.path.join(config_dir_path, sensor_name)
if not os.path.exists(config_sensor_dir_path):
@@ -88,7 +102,7 @@ def read_config_file(sensor_name: str, product_level: str, filename: str) -> dic
Error if file does not exist.
"""
check_sensor_name(sensor_name, product_level=product_level)
- product_level = check_product_level(product_level)
+ product_level = _check_product_level(product_level)
config_sensor_dir_path = get_sensor_configs_dir(sensor_name, product_level=product_level)
config_fpath = os.path.join(config_sensor_dir_path, filename)
# Check yaml file exists
@@ -112,6 +126,6 @@ def available_sensor_names(product_level: str = "L0") -> sorted:
DISDRODB product level.
By default, it returns the sensors available for DISDRODB L0 products.
"""
- product_level = check_product_level(product_level)
+ product_level = _check_product_level(product_level)
config_dir_path = _get_config_dir(product_level=product_level)
return sorted(os.listdir(config_dir_path))
diff --git a/disdrodb/api/io.py b/disdrodb/api/io.py
index 163b74f7..8ab57866 100644
--- a/disdrodb/api/io.py
+++ b/disdrodb/api/io.py
@@ -22,7 +22,6 @@
import os
import numpy as np
-import yaml
from disdrodb.configs import get_base_dir
@@ -85,6 +84,10 @@ def _get_list_stations_dirs(product_level, campaign_dir):
data_path = os.path.join(campaign_dir, "data")
else:
data_path = os.path.join(campaign_dir, product_level)
+ # Check if the data directory exists
+ # - For a fresh disdrodb-data cloned repo, no "data" directories
+ if not os.path.exists(data_path):
+ return []
# Get list of directories (stations)
list_stations = os.listdir(data_path)
list_stations_dir = [os.path.join(data_path, station_name) for station_name in list_stations]
@@ -215,26 +218,6 @@ def _get_stations(base_dir, product_level):
return list_available_stations
-def _get_metadata_fpath(base_dir, product_level, data_source, campaign_name, station_name):
- """Get metadata file path a given station."""
- campaign_dir = get_disdrodb_path(
- base_dir=base_dir,
- product_level=product_level,
- data_source=data_source,
- campaign_name=campaign_name,
- )
- metadata_fpath = os.path.join(campaign_dir, "metadata", station_name + ".yml")
- return metadata_fpath
-
-
-def get_metadata_dict(base_dir, product_level, data_source, campaign_name, station_name):
- """Get metadata of a given station."""
- metadata_fpath = _get_metadata_fpath(base_dir, product_level, data_source, campaign_name, station_name)
- with open(metadata_fpath) as f:
- metadata_dict = yaml.safe_load(f)
- return metadata_dict
-
-
####---------------------------------------------------------------------------.
#### I/O CHECKS
def check_product_level(product_level):
diff --git a/disdrodb/configs.py b/disdrodb/configs.py
index a142df79..52e6fe89 100644
--- a/disdrodb/configs.py
+++ b/disdrodb/configs.py
@@ -87,4 +87,5 @@ def get_base_dir(base_dir=None):
if base_dir is None:
base_dir = disdrodb.config["dir"]
+ base_dir = str(base_dir) # convert Path to str
return base_dir
diff --git a/disdrodb/data_transfer/download_data.py b/disdrodb/data_transfer/download_data.py
index ddff431f..83a7d3e7 100644
--- a/disdrodb/data_transfer/download_data.py
+++ b/disdrodb/data_transfer/download_data.py
@@ -26,9 +26,9 @@
import pooch
import tqdm
-from disdrodb.api.metadata import get_list_metadata
from disdrodb.configs import get_base_dir
from disdrodb.l0.io import _infer_disdrodb_tree_path
+from disdrodb.metadata import get_list_metadata
from disdrodb.utils.compression import _unzip_file
from disdrodb.utils.yaml import read_yaml
@@ -159,7 +159,7 @@ def _download_station_data(metadata_fpath: str, force: bool = False) -> None:
"""
disdrodb_data_url, station_dir_path = _get_station_url_and_dir_path(metadata_fpath)
- if disdrodb_data_url is not None:
+ if isinstance(disdrodb_data_url, str) and disdrodb_data_url != "":
# Download file
zip_fpath, to_unzip = _download_file_from_url(disdrodb_data_url, dst_dir_path=station_dir_path, force=force)
# Extract the stations files from the downloaded station.zip file
diff --git a/disdrodb/data_transfer/upload_data.py b/disdrodb/data_transfer/upload_data.py
index 8b554125..254e1633 100644
--- a/disdrodb/data_transfer/upload_data.py
+++ b/disdrodb/data_transfer/upload_data.py
@@ -23,7 +23,7 @@
import click
-from disdrodb.api.metadata import get_list_metadata
+from disdrodb.metadata import get_list_metadata
from disdrodb.utils.compression import _zip_dir
from disdrodb.utils.yaml import read_yaml, write_yaml
from disdrodb.utils.zenodo import _create_zenodo_deposition, _upload_file_to_zenodo
@@ -95,19 +95,17 @@ def _filter_already_uploaded(metadata_fpaths: List[str]) -> List[str]:
"""Filter metadata files that already have a remote url specified."""
filtered = []
-
for metadata_fpath in metadata_fpaths:
metadata_dict = read_yaml(metadata_fpath)
if metadata_dict.get("disdrodb_data_url"):
print(f"{metadata_fpath} already has a remote url specified. Skipping.")
continue
filtered.append(metadata_fpath)
-
return filtered
def _upload_data_to_zenodo(metadata_fpaths: List[str], sandbox: bool = False) -> None:
- """Upload data to Zenodo.
+ """Upload data to Zenodo Sandbox.
Parameters
----------
diff --git a/disdrodb/l0/__init__.py b/disdrodb/l0/__init__.py
index 4e83de66..4183e825 100644
--- a/disdrodb/l0/__init__.py
+++ b/disdrodb/l0/__init__.py
@@ -1,7 +1,3 @@
-from disdrodb.l0.check_metadata import (
- check_archive_metadata_compliance,
- check_archive_metadata_geolocation,
-)
from disdrodb.l0.l0_processing import (
run_disdrodb_l0,
run_disdrodb_l0_station,
@@ -9,6 +5,10 @@
run_l0b_from_nc,
)
from disdrodb.l0.l0_reader import available_readers
+from disdrodb.metadata.check_metadata import (
+ check_archive_metadata_compliance,
+ check_archive_metadata_geolocation,
+)
__all__ = [
"run_l0a",
diff --git a/disdrodb/l0/io.py b/disdrodb/l0/io.py
index 204ac41b..6d592860 100644
--- a/disdrodb/l0/io.py
+++ b/disdrodb/l0/io.py
@@ -639,7 +639,8 @@ def _check_raw_dir_data_subfolders(raw_dir):
def _check_raw_dir_metadata(raw_dir, verbose=True):
"""Check metadata in the raw_dir directory."""
- from disdrodb.l0.metadata import check_metadata_compliance, write_default_metadata
+ from disdrodb.l0.metadata import write_default_metadata
+ from disdrodb.metadata.check_metadata import check_metadata_compliance
# Get list of stations
raw_data_dir = os.path.join(raw_dir, "data")
diff --git a/disdrodb/l0/l0_reader.py b/disdrodb/l0/l0_reader.py
index 84a89880..763a6708 100644
--- a/disdrodb/l0/l0_reader.py
+++ b/disdrodb/l0/l0_reader.py
@@ -320,10 +320,10 @@ def _get_reader_from_metadata(metadata):
def get_station_reader_function(data_source, campaign_name, station_name, base_dir=None):
"""Retrieve the reader function from the station metadata."""
- from disdrodb.api.io import get_metadata_dict
+ from disdrodb.metadata import read_station_metadata
# Get metadata
- metadata = get_metadata_dict(
+ metadata = read_station_metadata(
base_dir=base_dir,
product_level="RAW",
data_source=data_source,
diff --git a/disdrodb/l0/metadata.py b/disdrodb/l0/metadata.py
index 54f236fe..55a3654c 100644
--- a/disdrodb/l0/metadata.py
+++ b/disdrodb/l0/metadata.py
@@ -20,101 +20,16 @@
import os
-import numpy as np
-
from disdrodb.configs import get_base_dir
from disdrodb.l0.io import _infer_campaign_name_from_path, _infer_data_source_from_path
+from disdrodb.metadata.manipulation import sort_metadata_dictionary
+from disdrodb.metadata.standards import get_valid_metadata_keys
from disdrodb.utils.yaml import read_yaml, write_yaml
-
-####--------------------------------------------------------------------------.
-#### Define valid metadata keys
-def get_valid_metadata_keys() -> list:
- """Get DISDRODB valid metadata list.
-
- Returns
- -------
- list
- List of valid metadata keys
- """
- list_attrs = [
- ## Mandatory fields
- "data_source",
- "campaign_name",
- "station_name",
- "sensor_name",
- "reader",
- "raw_data_format", # 'txt', 'netcdf'
- "platform_type", # 'fixed', 'mobile'
- ## Source
- "source",
- "source_convention",
- "source_processing_date",
- ## Description
- "title",
- "description",
- "project_name",
- "keywords",
- "summary",
- "history",
- "comment",
- "station_id",
- "location",
- "country",
- "continent",
- ## Deployment Info
- "latitude", # in degrees North
- "longitude", # in degrees East
- "altitude", # in meter above sea level
- "deployment_status", # 'ended', 'ongoing'
- "deployment mode", # 'land', 'ship', 'truck', 'cable'
- "platform_protection", # 'shielded', 'unshielded'
- "platform_orientation", # [0-360] from N (clockwise)
- ## Sensor info
- "sensor_long_name",
- "sensor_manufacturer",
- "sensor_wavelength",
- "sensor_serial_number",
- "firmware_iop",
- "firmware_dsp",
- "firmware_version",
- "sensor_beam_length",
- "sensor_beam_width",
- "sensor_nominal_width", # ?
- ## effective_measurement_area ? # 0.54 m^2
- "measurement_interval", # sampling_interval ? [in seconds]
- "calibration_sensitivity",
- "calibration_certification_date",
- "calibration_certification_url",
- ## Attribution
- "contributors",
- "authors",
- "authors_url",
- "contact",
- "contact_information",
- "acknowledgement", # acknowledgements?
- "references",
- "documentation",
- "website",
- "institution",
- "source_repository",
- "license",
- "doi",
- ]
- return list_attrs
-
-
####--------------------------------------------------------------------------.
#### Metadata reader & writers
-def sort_metadata_dictionary(metadata):
- """Sort the keys of the metadata dictionary by valid_metadata_keys list order."""
- list_metadata_keys = get_valid_metadata_keys()
- metadata = {k: metadata[k] for k in list_metadata_keys}
- return metadata
-
-
def read_metadata(campaign_dir: str, station_name: str) -> dict:
"""Read YAML metadata file.
@@ -149,7 +64,7 @@ def write_metadata(metadata, fpath):
####--------------------------------------------------------------------------.
#### Default (empty) metadata
-def get_default_metadata_dict() -> dict:
+def _get_default_metadata_dict() -> dict:
"""Get DISDRODB metadata default values.
Returns
@@ -179,7 +94,7 @@ def write_default_metadata(fpath: str) -> None:
File path
"""
# Get default metadata dict
- metadata = get_default_metadata_dict()
+ metadata = _get_default_metadata_dict()
# Try infer the data_source, campaign_name and station_name from fpath
try:
campaign_name = _infer_campaign_name_from_path(fpath)
@@ -213,145 +128,3 @@ def create_campaign_default_metadata(
write_default_metadata(fpath=metadata_fpath)
print(f"The default metadata were created for stations {station_names}.")
return None
-
-
-####--------------------------------------------------------------------------.
-#### Check metadata file
-
-
-def get_metadata_missing_keys(metadata):
- """Return the DISDRODB metadata keys which are missing."""
- keys = list(metadata.keys())
- valid_keys = get_valid_metadata_keys()
- # Identify missing keys
- idx_missing_keys = np.where(np.isin(valid_keys, keys, invert=True))[0]
- missing_keys = np.array(valid_keys)[idx_missing_keys].tolist()
- return missing_keys
-
-
-def get_metadata_invalid_keys(metadata):
- """Return the DISDRODB metadata keys which are not valid."""
- keys = list(metadata.keys())
- valid_keys = get_valid_metadata_keys()
- # Identify invalid keys
- idx_invalid_keys = np.where(np.isin(keys, valid_keys, invert=True))[0]
- invalid_keys = np.array(keys)[idx_invalid_keys].tolist()
- return invalid_keys
-
-
-def _check_metadata_keys(metadata):
- """Check validity of metadata keys."""
- # Check all keys are valid
- invalid_keys = get_metadata_invalid_keys(metadata)
- if len(invalid_keys) > 0:
- raise ValueError(f"Invalid metadata keys: {invalid_keys}")
- # Check no keys are missing
- missing_keys = get_metadata_missing_keys(metadata)
- if len(missing_keys) > 0:
- raise ValueError(f"Missing metadata keys: {missing_keys}")
- return None
-
-
-def _check_metadata_values(metadata):
- """Check validity of metadata values
-
- If null is specified in the YAML files (or None in the dict) raise error.
- """
- for key, value in metadata.items():
- if isinstance(value, type(None)):
- raise ValueError(f"The metadata key {key} has None or null value. Use '' instead.")
- return None
-
-
-def _check_metadata_campaign_name(metadata, expected_name):
- """Check metadata campaign_name."""
- if "campaign_name" not in metadata:
- raise ValueError("The metadata file does not contain the 'campaign_name' key.")
- campaign_name = metadata["campaign_name"]
- if campaign_name == "":
- raise ValueError("The 'campaign_name' key in the metadata is empty.")
- if campaign_name != expected_name:
- raise ValueError(
- f"The campaign_name in the metadata is '{campaign_name}' but the campaign directory is '{expected_name}'"
- )
- return None
-
-
-def _check_metadata_data_source(metadata, expected_name):
- """Check metadata data_source."""
- if "data_source" not in metadata:
- raise ValueError("The metadata file does not contain the 'data_source' key.")
- data_source = metadata["data_source"]
- if data_source == "":
- raise ValueError("The 'data_source' key in the metadata is empty.")
- if data_source != expected_name:
- raise ValueError(
- f"The data_source in the metadata is '{data_source}' but the data_source directory is '{expected_name}'"
- )
- return None
-
-
-def _check_metadata_station_name(metadata, expected_name):
- """Check metadata station name.
-
- This function does not check that data are available for the station!"""
- if "station_name" not in metadata:
- raise ValueError("The metadata file does not contain the 'station_name' key.")
- station_name = metadata["station_name"]
- if not isinstance(station_name, str):
- raise ValueError("The 'station_name' key in the metadata is not defined as a string!")
- if station_name == "":
- raise ValueError("The 'station_name' key in the metadata is empty.")
- if station_name != expected_name:
- raise ValueError(
- f"The station_name in the metadata is '{station_name}' but the metadata file is named '{expected_name}.yml'"
- )
- return None
-
-
-def _check_metadata_sensor_name(metadata):
- from disdrodb.api.checks import check_sensor_name
-
- sensor_name = metadata["sensor_name"]
- check_sensor_name(sensor_name=sensor_name)
- return None
-
-
-def check_metadata_compliance(data_source, campaign_name, station_name, base_dir=None):
- """Check DISDRODB metadata compliance."""
- from disdrodb.api.metadata import read_station_metadata
- from disdrodb.l0.l0_reader import _check_metadata_reader
-
- metadata = read_station_metadata(
- base_dir=base_dir,
- product_level="RAW",
- data_source=data_source,
- campaign_name=campaign_name,
- station_name=station_name,
- )
- _check_metadata_keys(metadata)
- _check_metadata_values(metadata)
- _check_metadata_campaign_name(metadata, expected_name=campaign_name)
- _check_metadata_data_source(metadata, expected_name=data_source)
- _check_metadata_station_name(metadata, expected_name=station_name)
- _check_metadata_sensor_name(metadata)
- _check_metadata_reader(metadata)
- return None
-
-
-####--------------------------------------------------------------------------.
-#### Metadata manipulation tools
-def remove_invalid_metadata_keys(metadata):
- """Remove invalid keys from the metadata dictionary."""
- invalid_keys = get_metadata_invalid_keys(metadata)
- for k in invalid_keys:
- _ = metadata.pop(k)
- return metadata
-
-
-def add_missing_metadata_keys(metadata):
- """Add missing keys to the metadata dictionary."""
- missing_keys = get_metadata_missing_keys(metadata)
- for k in missing_keys:
- metadata[k] = ""
- return metadata
diff --git a/disdrodb/metadata/__init__.py b/disdrodb/metadata/__init__.py
new file mode 100644
index 00000000..2a950c38
--- /dev/null
+++ b/disdrodb/metadata/__init__.py
@@ -0,0 +1,4 @@
+from disdrodb.metadata.info import get_archive_metadata_key_value
+from disdrodb.metadata.io import get_list_metadata, read_station_metadata
+
+__all__ = [read_station_metadata, get_list_metadata, get_archive_metadata_key_value]
diff --git a/disdrodb/l0/check_metadata.py b/disdrodb/metadata/check_metadata.py
similarity index 76%
rename from disdrodb/l0/check_metadata.py
rename to disdrodb/metadata/check_metadata.py
index a6a42286..f920fb46 100644
--- a/disdrodb/l0/check_metadata.py
+++ b/disdrodb/metadata/check_metadata.py
@@ -21,23 +21,141 @@
import os
from typing import Union
-from disdrodb.api.metadata import get_list_metadata, read_station_metadata
+import numpy as np
+
from disdrodb.configs import get_base_dir
from disdrodb.l0.io import (
_infer_campaign_name_from_path,
_infer_data_source_from_path,
)
from disdrodb.l0.l0_reader import _check_metadata_reader
-from disdrodb.l0.metadata import (
- _check_metadata_campaign_name,
- _check_metadata_data_source,
- _check_metadata_keys,
- _check_metadata_sensor_name,
- _check_metadata_station_name,
- check_metadata_compliance,
-)
+from disdrodb.metadata.io import get_list_metadata, read_station_metadata
+from disdrodb.metadata.standards import get_valid_metadata_keys
from disdrodb.utils.yaml import read_yaml
+#### --------------------------------------------------------------------------.
+#### Check Station Metadata
+
+
+def get_metadata_missing_keys(metadata):
+ """Return the DISDRODB metadata keys which are missing."""
+ keys = list(metadata.keys())
+ valid_keys = get_valid_metadata_keys()
+ # Identify missing keys
+ idx_missing_keys = np.where(np.isin(valid_keys, keys, invert=True))[0]
+ missing_keys = np.array(valid_keys)[idx_missing_keys].tolist()
+ return missing_keys
+
+
+def get_metadata_invalid_keys(metadata):
+ """Return the DISDRODB metadata keys which are not valid."""
+ keys = list(metadata.keys())
+ valid_keys = get_valid_metadata_keys()
+ # Identify invalid keys
+ idx_invalid_keys = np.where(np.isin(keys, valid_keys, invert=True))[0]
+ invalid_keys = np.array(keys)[idx_invalid_keys].tolist()
+ return invalid_keys
+
+
+def _check_metadata_keys(metadata):
+ """Check validity of metadata keys."""
+ # Check all keys are valid
+ invalid_keys = get_metadata_invalid_keys(metadata)
+ if len(invalid_keys) > 0:
+ raise ValueError(f"Invalid metadata keys: {invalid_keys}")
+ # Check no keys are missing
+ missing_keys = get_metadata_missing_keys(metadata)
+ if len(missing_keys) > 0:
+ raise ValueError(f"Missing metadata keys: {missing_keys}")
+ return None
+
+
+def _check_metadata_values(metadata):
+ """Check validity of metadata values
+
+ If null is specified in the YAML files (or None in the dict) raise error.
+ """
+ for key, value in metadata.items():
+ if isinstance(value, type(None)):
+ raise ValueError(f"The metadata key {key} has None or null value. Use '' instead.")
+ return None
+
+
+def _check_metadata_campaign_name(metadata, expected_name):
+ """Check metadata campaign_name."""
+ if "campaign_name" not in metadata:
+ raise ValueError("The metadata file does not contain the 'campaign_name' key.")
+ campaign_name = metadata["campaign_name"]
+ if campaign_name == "":
+ raise ValueError("The 'campaign_name' key in the metadata is empty.")
+ if campaign_name != expected_name:
+ raise ValueError(
+ f"The campaign_name in the metadata is '{campaign_name}' but the campaign directory is '{expected_name}'"
+ )
+ return None
+
+
+def _check_metadata_data_source(metadata, expected_name):
+ """Check metadata data_source."""
+ if "data_source" not in metadata:
+ raise ValueError("The metadata file does not contain the 'data_source' key.")
+ data_source = metadata["data_source"]
+ if data_source == "":
+ raise ValueError("The 'data_source' key in the metadata is empty.")
+ if data_source != expected_name:
+ raise ValueError(
+ f"The data_source in the metadata is '{data_source}' but the data_source directory is '{expected_name}'"
+ )
+ return None
+
+
+def _check_metadata_station_name(metadata, expected_name):
+ """Check metadata station name.
+
+ This function does not check that data are available for the station!"""
+ if "station_name" not in metadata:
+ raise ValueError("The metadata file does not contain the 'station_name' key.")
+ station_name = metadata["station_name"]
+ if not isinstance(station_name, str):
+ raise ValueError("The 'station_name' key in the metadata is not defined as a string!")
+ if station_name == "":
+ raise ValueError("The 'station_name' key in the metadata is empty.")
+ if station_name != expected_name:
+ raise ValueError(
+ f"The station_name in the metadata is '{station_name}' but the metadata file is named '{expected_name}.yml'"
+ )
+ return None
+
+
+def _check_metadata_sensor_name(metadata):
+ from disdrodb.api.checks import check_sensor_name
+
+ sensor_name = metadata["sensor_name"]
+ check_sensor_name(sensor_name=sensor_name)
+ return None
+
+
+def check_metadata_compliance(data_source, campaign_name, station_name, base_dir=None):
+ """Check DISDRODB metadata compliance."""
+ from disdrodb.l0.l0_reader import _check_metadata_reader
+
+ metadata = read_station_metadata(
+ base_dir=base_dir,
+ product_level="RAW",
+ data_source=data_source,
+ campaign_name=campaign_name,
+ station_name=station_name,
+ )
+ _check_metadata_keys(metadata)
+ _check_metadata_values(metadata)
+ _check_metadata_campaign_name(metadata, expected_name=campaign_name)
+ _check_metadata_data_source(metadata, expected_name=data_source)
+ _check_metadata_station_name(metadata, expected_name=station_name)
+ _check_metadata_sensor_name(metadata)
+ _check_metadata_reader(metadata)
+ return None
+
+
#### --------------------------------------------------------------------------.
#### Metadata Archive Missing Information
@@ -125,54 +243,8 @@ def identify_empty_metadata_keys(metadata_fpaths: list, keys: Union[str, list])
return None
-def get_archive_metadata_key_value(key: str, return_tuple: bool = True, base_dir: str = None):
- """Return the values of a metadata key for all the archive.
-
- Parameters
- ----------
- base_dir : str
- Path to the disdrodb directory.
- key : str
- Metadata key.
- return_tuple : bool, optional
- If True, returns a tuple of values with station, campaign and data source name.
- If False, returns a list of values without station, campaign and data source name.
- The default is True.
- base_dir : str (optional)
- Base directory of DISDRODB. Format: <...>/DISDRODB
- If None (the default), the disdrodb config variable 'dir' is used.
-
- Returns
- -------
- list or tuple
- List or tuple of values of the metadata key.
- """
- base_dir = get_base_dir(base_dir)
- list_metadata_paths = get_list_metadata(
- base_dir=base_dir, data_sources=None, campaign_names=None, station_names=None, with_stations_data=False
- )
- list_info = []
- for fpath in list_metadata_paths:
- data_source = _infer_data_source_from_path(fpath)
- campaign_name = _infer_campaign_name_from_path(fpath)
- station_name = os.path.basename(fpath).replace(".yml", "")
- metadata = read_station_metadata(
- base_dir=base_dir,
- product_level="RAW",
- data_source=data_source,
- campaign_name=campaign_name,
- station_name=station_name,
- )
- value = metadata[key]
- info = (data_source, campaign_name, station_name, value)
- list_info.append(info)
- if not return_tuple:
- list_info = [info[3] for info in list_info]
- return list_info
-
-
#### --------------------------------------------------------------------------.
-#### Metadata Archive Checks
+#### Check Metadata Archive
def check_archive_metadata_keys(base_dir: str = None) -> bool:
diff --git a/disdrodb/metadata/info.py b/disdrodb/metadata/info.py
new file mode 100644
index 00000000..3967326d
--- /dev/null
+++ b/disdrodb/metadata/info.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+
+# -----------------------------------------------------------------------------.
+# Copyright (c) 2021-2023 DISDRODB developers
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+# -----------------------------------------------------------------------------.
+"""Test Metadata Info Extraction."""
+import os
+
+from disdrodb.configs import get_base_dir
+from disdrodb.l0.io import (
+ _infer_campaign_name_from_path,
+ _infer_data_source_from_path,
+)
+from disdrodb.metadata.io import get_list_metadata, read_station_metadata
+
+
+def get_archive_metadata_key_value(key: str, return_tuple: bool = True, base_dir: str = None):
+ """Return the values of a metadata key for all the archive.
+
+ Parameters
+ ----------
+ base_dir : str
+ Path to the disdrodb directory.
+ key : str
+ Metadata key.
+ return_tuple : bool, optional
+ If True, returns a tuple of values with station, campaign and data source name.
+ If False, returns a list of values without station, campaign and data source name.
+ The default is True.
+ base_dir : str (optional)
+ Base directory of DISDRODB. Format: <...>/DISDRODB
+ If None (the default), the disdrodb config variable 'dir' is used.
+
+ Returns
+ -------
+ list or tuple
+ List or tuple of values of the metadata key.
+ """
+ base_dir = get_base_dir(base_dir)
+ list_metadata_paths = get_list_metadata(
+ base_dir=base_dir, data_sources=None, campaign_names=None, station_names=None, with_stations_data=False
+ )
+ list_info = []
+ for fpath in list_metadata_paths:
+ data_source = _infer_data_source_from_path(fpath)
+ campaign_name = _infer_campaign_name_from_path(fpath)
+ station_name = os.path.basename(fpath).replace(".yml", "")
+ metadata = read_station_metadata(
+ base_dir=base_dir,
+ product_level="RAW",
+ data_source=data_source,
+ campaign_name=campaign_name,
+ station_name=station_name,
+ )
+ value = metadata[key]
+ info = (data_source, campaign_name, station_name, value)
+ list_info.append(info)
+ if not return_tuple:
+ list_info = [info[3] for info in list_info]
+ return list_info
diff --git a/disdrodb/api/metadata.py b/disdrodb/metadata/io.py
similarity index 96%
rename from disdrodb/api/metadata.py
rename to disdrodb/metadata/io.py
index d8a0995f..78120043 100644
--- a/disdrodb/api/metadata.py
+++ b/disdrodb/metadata/io.py
@@ -21,10 +21,9 @@
import glob
import os
-import yaml
-
from disdrodb.api.io import get_disdrodb_path
from disdrodb.configs import get_base_dir
+from disdrodb.utils.yaml import read_yaml
def read_station_metadata(product_level, data_source, campaign_name, station_name, base_dir=None):
@@ -60,16 +59,14 @@ def read_station_metadata(product_level, data_source, campaign_name, station_nam
check_exist=True,
)
# Define metadata filepath
- fpath = os.path.join(campaign_dir, "metadata", f"{station_name}.yml")
+ metadata_fpath = os.path.join(campaign_dir, "metadata", f"{station_name}.yml")
# Check the file exists
- if not os.path.exists(fpath):
- raise ValueError(f"The metadata file for {station_name} at {fpath} does not exists.")
+ if not os.path.exists(metadata_fpath):
+ raise ValueError(f"The metadata file for {station_name} at {metadata_fpath} does not exists.")
- # Read the metadata file
- with open(fpath) as f:
- dictionary = yaml.safe_load(f)
- return dictionary
+ metadata_dict = read_yaml(metadata_fpath)
+ return metadata_dict
def get_list_metadata(
diff --git a/disdrodb/metadata/manipulation.py b/disdrodb/metadata/manipulation.py
new file mode 100644
index 00000000..c5b42d7e
--- /dev/null
+++ b/disdrodb/metadata/manipulation.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+
+# -----------------------------------------------------------------------------.
+# Copyright (c) 2021-2023 DISDRODB developers
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+# -----------------------------------------------------------------------------.
+"""Metadata Manipulation Tools."""
+
+from disdrodb.metadata.check_metadata import get_metadata_invalid_keys, get_metadata_missing_keys
+from disdrodb.metadata.standards import get_valid_metadata_keys
+
+
+def remove_invalid_metadata_keys(metadata):
+ """Remove invalid keys from the metadata dictionary."""
+ invalid_keys = get_metadata_invalid_keys(metadata)
+ for k in invalid_keys:
+ _ = metadata.pop(k)
+ return metadata
+
+
+def add_missing_metadata_keys(metadata):
+ """Add missing keys to the metadata dictionary."""
+ missing_keys = get_metadata_missing_keys(metadata)
+ for k in missing_keys:
+ metadata[k] = ""
+ return metadata
+
+
+def sort_metadata_dictionary(metadata):
+ """Sort the keys of the metadata dictionary by valid_metadata_keys list order."""
+ list_metadata_keys = get_valid_metadata_keys()
+ metadata = {k: metadata[k] for k in list_metadata_keys}
+ return metadata
diff --git a/disdrodb/metadata/scripts/disdrodb_check_metadata_archive.py b/disdrodb/metadata/scripts/disdrodb_check_metadata_archive.py
new file mode 100644
index 00000000..e852be1b
--- /dev/null
+++ b/disdrodb/metadata/scripts/disdrodb_check_metadata_archive.py
@@ -0,0 +1,26 @@
+# -----------------------------------------------------------------------------.
+# Copyright (c) 2021-2023 DISDRODB developers
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+# -----------------------------------------------------------------------------.
+"""Wrapper to check DISDRODB Metadata Archive Compliance from terminal."""
+import click
+
+
+@click.command()
+@click.option("--base_dir", type=str, show_default=True, default=None, help="DISDRODB root directory")
+def disdrodb_check_metadata_archive(base_dir=None):
+ from disdrodb.metadata.check_metadata import check_archive_metadata_compliance
+
+ check_archive_metadata_compliance(base_dir=base_dir)
diff --git a/disdrodb/metadata/standards.py b/disdrodb/metadata/standards.py
new file mode 100644
index 00000000..f0995b0c
--- /dev/null
+++ b/disdrodb/metadata/standards.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+
+# -----------------------------------------------------------------------------.
+# Copyright (c) 2021-2023 DISDRODB developers
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+# -----------------------------------------------------------------------------.
+"""Define DISDRODB Metadata Standards."""
+
+
+def get_valid_metadata_keys() -> list:
+ """Get DISDRODB valid metadata list.
+
+ Returns
+ -------
+ list
+ List of valid metadata keys
+ """
+ # NOTE: When updating one of these keys, one need to update the yaml in/at:
+ # - the disdrodb-data repository
+ # - disdrodb/data/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/*.yml
+ # - disdrodb/tests/data/check_readers/DISDRODB/Raw/*/*/metadata/10.yml
+ # - disdrodb/tests/data/test_dir_creation/metadata/123.yml
+ # - disdrodb/tests/data/test_dir_creation/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml
+ # - /disdrodb/tests/data/test_dir_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml
+ list_attrs = [
+ ## Mandatory fields
+ "data_source",
+ "campaign_name",
+ "station_name",
+ "sensor_name",
+ "reader",
+ "raw_data_format", # 'txt', 'netcdf'
+ "platform_type", # 'fixed', 'mobile'
+ ## DISDRODB keys
+ "disdrodb_data_url",
+ ## Source
+ "source",
+ "source_convention",
+ "source_processing_date",
+ ## Description
+ "title",
+ "description",
+ "project_name",
+ "keywords",
+ "summary",
+ "history",
+ "comment",
+ "station_id",
+ "location",
+ "country",
+ "continent",
+ ## Deployment Info
+ "latitude", # in degrees North
+ "longitude", # in degrees East
+ "altitude", # in meter above sea level
+ "deployment_status", # 'ended', 'ongoing'
+ "deployment mode", # 'land', 'ship', 'truck', 'cable'
+ "platform_protection", # 'shielded', 'unshielded'
+ "platform_orientation", # [0-360] from N (clockwise)
+ ## Sensor info
+ "sensor_long_name",
+ "sensor_manufacturer",
+ "sensor_wavelength",
+ "sensor_serial_number",
+ "firmware_iop",
+ "firmware_dsp",
+ "firmware_version",
+ "sensor_beam_length",
+ "sensor_beam_width",
+ "sensor_nominal_width", # ?
+ ## effective_measurement_area ? # 0.54 m^2
+ "measurement_interval", # sampling_interval ? [in seconds]
+ "calibration_sensitivity",
+ "calibration_certification_date",
+ "calibration_certification_url",
+ ## Attribution
+ "contributors",
+ "authors",
+ "authors_url",
+ "contact",
+ "contact_information",
+ "acknowledgement", # acknowledgements?
+ "references",
+ "documentation",
+ "website",
+ "institution",
+ "source_repository",
+ "license",
+ "doi",
+ ]
+ return list_attrs
diff --git a/disdrodb/tests/conftest.py b/disdrodb/tests/conftest.py
index 46de3101..d8e92ddd 100644
--- a/disdrodb/tests/conftest.py
+++ b/disdrodb/tests/conftest.py
@@ -8,9 +8,9 @@
import shutil
import pytest
-import yaml
from disdrodb import __root_path__
+from disdrodb.utils.yaml import write_yaml
@pytest.fixture
@@ -41,8 +41,7 @@ def create_test_config_files(request):
os.makedirs(test_folder)
test_file_path = os.path.join(test_folder, file_name)
- with open(test_file_path, "w") as f:
- yaml.dump(dictionary, f)
+ write_yaml(dictionary, test_file_path)
yield
os.remove(test_file_path)
diff --git a/disdrodb/tests/data/check_readers/DISDRODB/Raw/EPFL/PARSIVEL_2007/metadata/10.yml b/disdrodb/tests/data/check_readers/DISDRODB/Raw/EPFL/PARSIVEL_2007/metadata/10.yml
index c3a2c57c..ccb3bea0 100644
--- a/disdrodb/tests/data/check_readers/DISDRODB/Raw/EPFL/PARSIVEL_2007/metadata/10.yml
+++ b/disdrodb/tests/data/check_readers/DISDRODB/Raw/EPFL/PARSIVEL_2007/metadata/10.yml
@@ -5,6 +5,7 @@ sensor_name: OTT_Parsivel
reader: EPFL/PARSIVEL_2007
raw_data_format: txt
platform_type: fixed
+disdrodb_data_url: ''
source: ''
source_convention: ''
source_processing_date: ''
diff --git a/disdrodb/tests/data/test_folders_files_creation/.gitkeep b/disdrodb/tests/data/test_dir_creation/.gitkeep
similarity index 100%
rename from disdrodb/tests/data/test_folders_files_creation/.gitkeep
rename to disdrodb/tests/data/test_dir_creation/.gitkeep
diff --git a/disdrodb/tests/data/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/L0A/STATION_NAME/CAMPAIGN_NAME_sSTATIONID_SUFFIX.parquet b/disdrodb/tests/data/test_dir_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/L0A/STATION_NAME/CAMPAIGN_NAME_sSTATIONID_SUFFIX.parquet
similarity index 100%
rename from disdrodb/tests/data/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/L0A/STATION_NAME/CAMPAIGN_NAME_sSTATIONID_SUFFIX.parquet
rename to disdrodb/tests/data/test_dir_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/L0A/STATION_NAME/CAMPAIGN_NAME_sSTATIONID_SUFFIX.parquet
diff --git a/disdrodb/tests/data/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/L0B/STATION_NAME/DISDRODB.L0B.Raw.CAMPAIGN_NAME.STATIONID.sensor-name.s20190326000000.e20210208000000.dev.nc b/disdrodb/tests/data/test_dir_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/L0B/STATION_NAME/DISDRODB.L0B.Raw.CAMPAIGN_NAME.STATIONID.sensor-name.s20190326000000.e20210208000000.dev.nc
similarity index 100%
rename from disdrodb/tests/data/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/L0B/STATION_NAME/DISDRODB.L0B.Raw.CAMPAIGN_NAME.STATIONID.sensor-name.s20190326000000.e20210208000000.dev.nc
rename to disdrodb/tests/data/test_dir_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/L0B/STATION_NAME/DISDRODB.L0B.Raw.CAMPAIGN_NAME.STATIONID.sensor-name.s20190326000000.e20210208000000.dev.nc
diff --git a/disdrodb/tests/data/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml b/disdrodb/tests/data/test_dir_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml
similarity index 100%
rename from disdrodb/tests/data/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml
rename to disdrodb/tests/data/test_dir_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml
diff --git a/disdrodb/tests/data/test_folders_files_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/data/STATION_NAME/data.csv b/disdrodb/tests/data/test_dir_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/data/STATION_NAME/data.csv
similarity index 100%
rename from disdrodb/tests/data/test_folders_files_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/data/STATION_NAME/data.csv
rename to disdrodb/tests/data/test_dir_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/data/STATION_NAME/data.csv
diff --git a/disdrodb/tests/data/test_folders_files_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/issue/STATION_NAME.yml b/disdrodb/tests/data/test_dir_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/issue/STATION_NAME.yml
similarity index 100%
rename from disdrodb/tests/data/test_folders_files_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/issue/STATION_NAME.yml
rename to disdrodb/tests/data/test_dir_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/issue/STATION_NAME.yml
diff --git a/disdrodb/tests/data/test_folders_files_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml b/disdrodb/tests/data/test_dir_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml
similarity index 97%
rename from disdrodb/tests/data/test_folders_files_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml
rename to disdrodb/tests/data/test_dir_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml
index 382e7f2f..5193b4e8 100644
--- a/disdrodb/tests/data/test_folders_files_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml
+++ b/disdrodb/tests/data/test_dir_structure/DISDRODB/Raw/DATA_SOURCE/CAMPAIGN_NAME/metadata/STATION_NAME.yml
@@ -5,6 +5,7 @@ sensor_name: 'OTT_Parsivel'
reader: 'EPFL/EPFL_2009'
raw_data_format: txt
platform_type: fixed
+disdrodb_data_url: ''
source: ''
source_convention: ''
source_processing_date: ''
diff --git a/disdrodb/tests/test_data_transfer/test_download_data.py b/disdrodb/tests/test_data_transfer/test_download_data.py
index c8b1d259..ab0deec2 100644
--- a/disdrodb/tests/test_data_transfer/test_download_data.py
+++ b/disdrodb/tests/test_data_transfer/test_download_data.py
@@ -1,56 +1,32 @@
-#!/usr/bin/env python3
-
-# -----------------------------------------------------------------------------.
-# Copyright (c) 2021-2023 DISDRODB developers
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-# -----------------------------------------------------------------------------.
-"""Test DISDRODB download utility."""
+# #!/usr/bin/env python3
+
+# # -----------------------------------------------------------------------------.
+# # Copyright (c) 2021-2023 DISDRODB developers
+# #
+# # This program is free software: you can redistribute it and/or modify
+# # it under the terms of the GNU General Public License as published by
+# # the Free Software Foundation, either version 3 of the License, or
+# # (at your option) any later version.
+# #
+# # This program is distributed in the hope that it will be useful,
+# # but WITHOUT ANY WARRANTY; without even the implied warranty of
+# # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# # GNU General Public License for more details.
+# #
+# # You should have received a copy of the GNU General Public License
+# # along with this program. If not, see .
+# # -----------------------------------------------------------------------------.
+# """Test DISDRODB download utility."""
import os
import pytest
-import yaml
from disdrodb.data_transfer.download_data import (
_download_file_from_url,
- _download_station_data,
_is_empty_directory,
)
-
-
-def test_download_file_from_url(tmp_path):
- # Test download case when empty directory
- url = "https://raw.githubusercontent.com/ltelab/disdrodb/main/README.md"
- _download_file_from_url(url, tmp_path, force=False)
- filename = os.path.basename(url) # README.md
- filepath = os.path.join(tmp_path, filename)
- assert os.path.isfile(filepath) is True
-
- # Test download case when directory is not empty and force=False --> avoid download
- url = "https://raw.githubusercontent.com/ltelab/disdrodb/main/CODE_OF_CONDUCT.md"
- _download_file_from_url(url, tmp_path, force=False)
- filename = os.path.basename(url) # README.md
- filepath = os.path.join(tmp_path, filename)
- assert not os.path.isfile(filepath)
-
- # Test download case when directory is not empty and force=True --> it download
- url = "https://raw.githubusercontent.com/ltelab/disdrodb/main/CODE_OF_CONDUCT.md"
- _download_file_from_url(url, tmp_path, force=True)
- filename = os.path.basename(url) # README.md
- filepath = os.path.join(tmp_path, filename)
- assert os.path.isfile(filepath)
+from disdrodb.utils.yaml import write_yaml
class TestIsEmptyDirectory:
@@ -67,13 +43,40 @@ def test_non_directory_path(self, tmp_path):
def test_empty_directory(self, tmp_path):
# `tmp_path` is a pytest fixture that provides a temporary directory unique to the test invocation
- assert _is_empty_directory(tmp_path) is True
+ assert _is_empty_directory(tmp_path)
def test_non_empty_directory(self, tmp_path):
# Create a temporary file inside the temporary directory
file_path = tmp_path / "test_file.txt"
file_path.write_text("This is a test file.")
- assert _is_empty_directory(tmp_path) is False
+ assert not _is_empty_directory(tmp_path)
+
+
+def test_download_file_from_url(tmp_path):
+ # DUBUG
+ # tmp_path = "/tmp/empty_2"
+ # os.makedirs(tmp_path)
+
+ # Test download case when empty directory
+ url = "https://httpbin.org/stream-bytes/1024"
+ _download_file_from_url(url, tmp_path, force=False)
+ filename = os.path.basename(url) # README.md
+ filepath = os.path.join(tmp_path, filename)
+ assert os.path.isfile(filepath)
+
+ # Test download case when directory is not empty and force=False --> avoid download
+ url = "https://httpbin.org/stream-bytes/1025"
+ _download_file_from_url(url, tmp_path, force=False)
+ filename = os.path.basename(url) # README.md
+ filepath = os.path.join(tmp_path, filename)
+ assert not os.path.isfile(filepath)
+
+ # Test download case when directory is not empty and force=True --> it download
+ url = "https://httpbin.org/stream-bytes/1026"
+ _download_file_from_url(url, tmp_path, force=True)
+ filename = os.path.basename(url) # README.md
+ filepath = os.path.join(tmp_path, filename)
+ assert os.path.isfile(filepath)
def create_fake_metadata_file(
@@ -86,31 +89,36 @@ def create_fake_metadata_file(
metadata_dir_path = tmp_path / "DISDRODB" / "Raw" / data_source / campaign_name / "metadata"
metadata_dir_path.mkdir(parents=True)
metadata_fpath = os.path.join(metadata_dir_path, f"{station_name}.yml")
- # create a fake yaml file in temp folder
- with open(metadata_fpath, "w") as f:
- yaml_dict = {}
- yaml_dict["station_name"] = station_name
- if with_url:
- disdro_repo_path = "https://raw.githubusercontent.com/ltelab/disdrodb/main/"
- test_data_path = "disdrodb/tests/data/test_data_download/station_files.zip"
- disdrodb_data_url = disdro_repo_path + test_data_path
- yaml_dict["disdrodb_data_url"] = disdrodb_data_url
-
- yaml.dump(yaml_dict, f)
+ # Define fake metadata dictionary
+ yaml_dict = {}
+ yaml_dict["station_name"] = station_name
+ if with_url:
+ raw_github_path = "https://raw.githubusercontent.com"
+ disdro_repo_path = f"{raw_github_path}/ltelab/disdrodb/main"
+ test_data_path = "disdrodb/tests/data/test_data_download/station_files.zip"
+ disdrodb_data_url = f"{disdro_repo_path}/{test_data_path}"
+ yaml_dict["disdrodb_data_url"] = disdrodb_data_url
+ # Write fake yaml file in temp folder
+ write_yaml(yaml_dict, metadata_fpath)
assert os.path.exists(metadata_fpath)
return metadata_fpath
-def test_download_station_data(tmp_path):
- station_name = "station_name"
- metadata_fpath = create_fake_metadata_file(tmp_path, station_name=station_name, with_url=True)
- station_dir_path = metadata_fpath.replace("metadata", "data").replace(".yml", "")
- _download_station_data(metadata_fpath=metadata_fpath, force=True)
- # Assert files in the zip file have been unzipped
- assert os.path.isfile(os.path.join(station_dir_path, "station_file1.txt"))
- # Assert inner zipped files are not unzipped !
- assert os.path.isfile(os.path.join(station_dir_path, "station_file2.zip"))
- # Assert inner directories are there
- assert os.path.isdir(os.path.join(station_dir_path, "2020"))
- # Assert zip file has been removed
- assert not os.path.exists(os.path.join(station_dir_path, "station_files.zip"))
+# def test_download_station_data(tmp_path):
+# # DUBUG
+# # from pathlib import Path
+# # tmp_path = Path("/tmp/empty_3")
+# # os.makedirs(tmp_path)
+
+# station_name = "station_name"
+# metadata_fpath = create_fake_metadata_file(tmp_path, station_name=station_name, with_url=True)
+# station_dir_path = metadata_fpath.replace("metadata", "data").replace(".yml", "")
+# _download_station_data(metadata_fpath=metadata_fpath, force=True)
+# # Assert files in the zip file have been unzipped
+# assert os.path.isfile(os.path.join(station_dir_path, "station_file1.txt"))
+# # Assert inner zipped files are not unzipped !
+# assert os.path.isfile(os.path.join(station_dir_path, "station_file2.zip"))
+# # Assert inner directories are there
+# assert os.path.isdir(os.path.join(station_dir_path, "2020"))
+# # Assert zip file has been removed
+# assert not os.path.exists(os.path.join(station_dir_path, "station_files.zip"))
diff --git a/disdrodb/tests/test_data_transfer/test_upload_data.py b/disdrodb/tests/test_data_transfer/test_upload_data.py
index ee70e2bf..552bd51d 100644
--- a/disdrodb/tests/test_data_transfer/test_upload_data.py
+++ b/disdrodb/tests/test_data_transfer/test_upload_data.py
@@ -26,7 +26,8 @@
import pytest
from disdrodb.data_transfer.upload_data import upload_disdrodb_archives
-from disdrodb.utils.yaml import read_yaml, write_yaml
+from disdrodb.metadata import read_station_metadata
+from disdrodb.utils.yaml import write_yaml
from disdrodb.utils.zenodo import _create_zenodo_deposition
@@ -53,11 +54,6 @@ def create_fake_data_dir(base_dir, data_source, campaign_name, station_name):
return data_dir
-def get_metadata_dict(base_dir, data_source, campaign_name, station_name):
- metadata_fpath = base_dir / "Raw" / data_source / campaign_name / "metadata" / f"{station_name}.yml"
- return read_yaml(metadata_fpath)
-
-
def mock_zenodo_api(requests_mock):
"""Mock Zenodo API."""
@@ -98,11 +94,23 @@ def test_upload_to_zenodo(tmp_path, requests_mock):
upload_disdrodb_archives(platform="sandbox.zenodo", base_dir=str(base_dir))
# Check metadata files (1st one should not have changed)
- metadata_dict1 = get_metadata_dict(base_dir, data_source, campaign_name, station_name1)
+ metadata_dict1 = read_station_metadata(
+ base_dir=base_dir,
+ product_level="RAW",
+ data_source=data_source,
+ campaign_name=campaign_name,
+ station_name=station_name1,
+ )
new_station_url1 = metadata_dict1["disdrodb_data_url"]
assert new_station_url1 == station_url1
- metadata_dict2 = get_metadata_dict(base_dir, data_source, campaign_name, station_name2)
+ metadata_dict2 = read_station_metadata(
+ base_dir=base_dir,
+ product_level="RAW",
+ data_source=data_source,
+ campaign_name=campaign_name,
+ station_name=station_name2,
+ )
new_station_url2 = metadata_dict2["disdrodb_data_url"]
list_new_station_url2 = new_station_url2.split(os.path.sep)
diff --git a/disdrodb/tests/test_l0/test_io.py b/disdrodb/tests/test_l0/test_io.py
index 68f90dee..42c54cba 100644
--- a/disdrodb/tests/test_l0/test_io.py
+++ b/disdrodb/tests/test_l0/test_io.py
@@ -26,10 +26,10 @@
import pandas as pd
import pytest
import xarray as xr
-import yaml
from disdrodb import __root_path__
from disdrodb.l0 import io
+from disdrodb.utils.yaml import write_yaml
TEST_DATA_DIR = os.path.join(__root_path__, "disdrodb", "tests", "data")
@@ -42,9 +42,7 @@ def create_fake_metadata_file(
subfolder_path.mkdir(parents=True)
file_path = os.path.join(subfolder_path, yaml_file_name)
# create a fake yaml file in temp folder
- with open(file_path, "w") as f:
- yaml.dump(yaml_dict, f)
-
+ write_yaml(yaml_dict, file_path)
assert os.path.exists(file_path)
return file_path
@@ -90,7 +88,7 @@ def test_create_initial_directory_structure(tmp_path, mocker):
processed_dir = os.path.join(tmp_path, "DISDRODB", "Processed", campaign_name)
subfolder_path = tmp_path / "DISDRODB" / "Processed" / campaign_name
subfolder_path.mkdir(parents=True)
- mocker.patch("disdrodb.l0.metadata.check_metadata_compliance", return_value=None)
+ mocker.patch("disdrodb.metadata.check_metadata.check_metadata_compliance", return_value=None)
io.create_initial_directory_structure(
raw_dir=raw_dir, processed_dir=processed_dir, station_name=station_name, force=force
@@ -290,7 +288,7 @@ def test_get_l0b_dir(path_process_dir):
def test_get_l0a_fpath():
"""
Test the naming and the path of the L0A file
- Note that this test needs "/data/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/
+ Note that this test needs "/data/test_dir_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/
metadata/STATION_NAME.yml"
"""
from disdrodb.l0.standards import PRODUCT_VERSION
@@ -307,7 +305,7 @@ def test_get_l0a_fpath():
# Set paths
path_campaign_name = os.path.join(
TEST_DATA_DIR,
- "test_folders_files_structure",
+ "test_dir_structure",
"DISDRODB",
"Processed",
data_source,
@@ -331,7 +329,7 @@ def test_get_l0a_fpath():
def test_get_l0b_fpath():
"""
Test the naming and the path of the L0B file
- Note that this test needs "/data/test_folders_files_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/
+ Note that this test needs "/data/test_dir_structure/DISDRODB/Processed/DATA_SOURCE/CAMPAIGN_NAME/
metadata/STATION_NAME.yml"
"""
from disdrodb.l0.standards import PRODUCT_VERSION
@@ -348,7 +346,7 @@ def test_get_l0b_fpath():
# Set paths
path_campaign_name = os.path.join(
TEST_DATA_DIR,
- "test_folders_files_structure",
+ "test_dir_structure",
"DISDRODB",
"Processed",
data_source,
@@ -418,7 +416,7 @@ def test_get_raw_file_list():
####--------------------------------------------------------------------------.
folder_name = "folder_creation_deletion_test"
-path_file_temp = os.path.join(TEST_DATA_DIR, "test_folders_files_creation", folder_name)
+path_file_temp = os.path.join(TEST_DATA_DIR, "test_dir_creation", folder_name)
def test_create_directory(tmp_path):
@@ -468,7 +466,7 @@ def test_check_raw_dir():
# Set paths
raw_dir = os.path.join(
TEST_DATA_DIR,
- "test_folders_files_structure",
+ "test_dir_structure",
"DISDRODB",
"Raw",
data_source,
@@ -483,7 +481,7 @@ def test_check_campaign_name():
data_source = "DATA_SOURCE"
path_raw = os.path.join(
TEST_DATA_DIR,
- "test_folders_files_structure",
+ "test_dir_structure",
"DISDRODB",
"Raw",
data_source,
@@ -491,7 +489,7 @@ def test_check_campaign_name():
)
path_process = os.path.join(
TEST_DATA_DIR,
- "test_folders_files_creation",
+ "test_dir_creation",
"DISDRODB",
"Processed",
data_source,
@@ -507,7 +505,7 @@ def test_copy_station_metadata():
station_name = "STATION_NAME"
raw_dir = os.path.join(
TEST_DATA_DIR,
- "test_folders_files_structure",
+ "test_dir_structure",
"DISDRODB",
"Raw",
data_source,
@@ -515,7 +513,7 @@ def test_copy_station_metadata():
)
processed_dir = os.path.join(
TEST_DATA_DIR,
- "test_folders_files_creation",
+ "test_dir_creation",
"DISDRODB",
"Processed",
data_source,
@@ -553,7 +551,7 @@ def test_copy_station_metadata():
# raw_dir = os.path.join(
# TEST_DATA_DIR,
-# "test_folders_files_structure",
+# "test_dir_structure",
# "DISDRODB",
# "Raw",
# data_source,
@@ -561,7 +559,7 @@ def test_copy_station_metadata():
# )
# processed_dir = os.path.join(
# TEST_DATA_DIR,
-# "test_folders_files_creation",
+# "test_dir_creation",
# "DISDRODB",
# "Processed",
# data_source,
@@ -602,7 +600,7 @@ def test_copy_station_metadata():
# processed_dir = os.path.join(
# TEST_DATA_DIR,
-# "test_folders_files_creation",
+# "test_dir_creation",
# "DISDRODB",
# "Processed",
# data_source,
@@ -639,7 +637,7 @@ def test__read_l0a():
# save dataframe to parquet file
path_parquet_file = os.path.join(
TEST_DATA_DIR,
- "test_folders_files_creation",
+ "test_dir_creation",
"fake_data_sample.parquet",
)
df.to_parquet(path_parquet_file, compression="gzip")
@@ -662,7 +660,7 @@ def test_read_l0a_dataframe():
# save dataframe to parquet file
path_parquet_file = os.path.join(
TEST_DATA_DIR,
- "test_folders_files_creation",
+ "test_dir_creation",
f"fake_data_sample_{i}.parquet",
)
df.to_parquet(path_parquet_file, compression="gzip")
diff --git a/disdrodb/tests/test_l0/test_l0_reader.py b/disdrodb/tests/test_l0/test_l0_reader.py
index 9170b526..68d889cb 100644
--- a/disdrodb/tests/test_l0/test_l0_reader.py
+++ b/disdrodb/tests/test_l0/test_l0_reader.py
@@ -22,7 +22,6 @@
import os
import pytest
-import yaml
from disdrodb.l0 import l0_reader
from disdrodb.l0.l0_reader import (
@@ -34,6 +33,7 @@
get_reader_from_metadata_reader_key,
get_station_reader_function,
)
+from disdrodb.utils.yaml import write_yaml
# Some test are based on the following reader:
DATA_SOURCE = "EPFL"
@@ -48,11 +48,8 @@ def create_fake_metadata_file(
os.makedirs(subfolder_path, exist_ok=True)
file_path = os.path.join(subfolder_path, yaml_file_name)
# create a fake yaml file in temp folder
- with open(file_path, "w") as f:
- yaml.dump(yaml_dict, f)
-
+ write_yaml(yaml_dict, file_path)
assert os.path.exists(file_path)
-
return file_path
diff --git a/disdrodb/tests/test_l0/test_l0a_processing.py b/disdrodb/tests/test_l0/test_l0a_processing.py
index d23b2c36..4c9a9405 100644
--- a/disdrodb/tests/test_l0/test_l0a_processing.py
+++ b/disdrodb/tests/test_l0/test_l0a_processing.py
@@ -536,7 +536,7 @@ def test_write_l0a():
# Write parquet file
path_parquet_file = os.path.join(
TEST_DATA_DIR,
- "test_folders_files_creation",
+ "test_dir_creation",
"fake_data_sample.parquet",
)
l0a_processing.write_l0a(df, path_parquet_file, True, False)
diff --git a/disdrodb/tests/test_l0/test_l0b_concat.py b/disdrodb/tests/test_l0/test_l0b_concat.py
index 3bc52425..0c474bd0 100644
--- a/disdrodb/tests/test_l0/test_l0b_concat.py
+++ b/disdrodb/tests/test_l0/test_l0b_concat.py
@@ -24,9 +24,9 @@
import netCDF4 as nc
import numpy as np
import xarray as xr
-import yaml
from disdrodb.l0.l0b_nc_concat import _concatenate_netcdf_files, run_disdrodb_l0b_concat
+from disdrodb.utils.yaml import write_yaml
def create_dummy_netcdf_file(filename: str, data: tuple):
@@ -72,8 +72,7 @@ def create_fake_data_file(tmp_path, data_source, campaign_name, station_name="",
assert os.path.exists(metedata_folder_path)
file_path = os.path.join(metedata_folder_path, f"{station_name}.yml")
- with open(file_path, "w") as f:
- yaml.dump({"station_name": station_name}, f)
+ write_yaml({"station_name": station_name}, file_path)
return subfolder_path
diff --git a/disdrodb/tests/test_l0/test_metadata.py b/disdrodb/tests/test_l0/test_metadata.py
index 59139038..e24cf8e0 100644
--- a/disdrodb/tests/test_l0/test_metadata.py
+++ b/disdrodb/tests/test_l0/test_metadata.py
@@ -20,15 +20,14 @@
import os
-import yaml
-
from disdrodb import __root_path__
from disdrodb.l0.metadata import (
+ _get_default_metadata_dict,
create_campaign_default_metadata,
- get_default_metadata_dict,
read_metadata,
write_default_metadata,
)
+from disdrodb.utils.yaml import read_yaml
TEST_DATA_DIR = os.path.join(__root_path__, "disdrodb", "tests", "data")
@@ -70,7 +69,7 @@ def test_create_campaign_default_metadata(tmp_path):
def test_get_default_metadata():
- assert isinstance(get_default_metadata_dict(), dict)
+ assert isinstance(_get_default_metadata_dict(), dict)
def create_fake_metadata_folder(tmp_path, data_source="data_source", campaign_name="campaign_name"):
@@ -97,11 +96,10 @@ def test_write_default_metadata(tmp_path):
assert os.path.exists(fpath)
# open it
- with open(str(fpath)) as f:
- dictionary = yaml.safe_load(f)
+ dictionary = read_yaml(str(fpath))
# check is the expected dictionary
- expected_dict = get_default_metadata_dict()
+ expected_dict = _get_default_metadata_dict()
expected_dict["data_source"] = data_source
expected_dict["campaign_name"] = campaign_name
expected_dict["station_name"] = station_name
@@ -113,7 +111,7 @@ def test_write_default_metadata(tmp_path):
def test_read_metadata():
- raw_dir = os.path.join(TEST_DATA_DIR, "test_folders_files_creation")
+ raw_dir = os.path.join(TEST_DATA_DIR, "test_dir_creation")
station_name = "123"
metadata_folder_path = os.path.join(raw_dir, "metadata")
@@ -127,7 +125,7 @@ def test_read_metadata():
os.remove(metadata_path)
# create data
- data = get_default_metadata_dict()
+ data = _get_default_metadata_dict()
# create metadata file
write_default_metadata(str(metadata_path))
@@ -136,9 +134,3 @@ def test_read_metadata():
function_return = read_metadata(raw_dir, station_name)
assert function_return == data
-
-
-def test_check_metadata_compliance():
- # function_return = metadata.check_metadata_compliance()
- # function not implemented
- assert 1 == 1
diff --git a/disdrodb/tests/test_l0/test_check_metadata.py b/disdrodb/tests/test_metadata/test_check_metadata.py
similarity index 82%
rename from disdrodb/tests/test_l0/test_check_metadata.py
rename to disdrodb/tests/test_metadata/test_check_metadata.py
index 355d948f..41dde581 100644
--- a/disdrodb/tests/test_l0/test_check_metadata.py
+++ b/disdrodb/tests/test_metadata/test_check_metadata.py
@@ -25,8 +25,8 @@
from disdrodb import __root_path__
from disdrodb.api.configs import available_sensor_names
-from disdrodb.l0 import metadata
-from disdrodb.l0.check_metadata import (
+from disdrodb.l0.l0_reader import available_readers
+from disdrodb.metadata.check_metadata import (
check_archive_metadata_campaign_name,
check_archive_metadata_compliance,
check_archive_metadata_data_source,
@@ -36,12 +36,11 @@
check_archive_metadata_sensor_name,
check_archive_metadata_station_name,
check_metadata_geolocation,
- get_archive_metadata_key_value,
identify_empty_metadata_keys,
identify_missing_metadata_coords,
)
-from disdrodb.l0.l0_reader import available_readers
-from disdrodb.utils.yaml import read_yaml
+from disdrodb.metadata.standards import get_valid_metadata_keys
+from disdrodb.utils.yaml import read_yaml, write_yaml
TEST_DATA_DIR = os.path.join(__root_path__, "disdrodb", "tests", "data")
@@ -100,11 +99,8 @@ def create_fake_metadata_file(
subfolder_path.mkdir(parents=True)
file_path = os.path.join(subfolder_path, yaml_file_name)
# create a fake yaml file in temp folder
- with open(file_path, "w") as f:
- yaml.dump(yaml_dict, f)
-
+ write_yaml(yaml_dict, file_path)
assert os.path.exists(file_path)
-
return file_path
@@ -127,58 +123,11 @@ def test_identify_missing_metadata_keys(tmp_path, capsys):
assert not captured.out
-def test_get_archive_metadata_key_value(tmp_path):
- expected_result = []
-
- base_dir = os.path.join(tmp_path, "DISDRODB")
- # Test 1 : one config file
- yaml_file_name = "station_1.yml"
- expected_key = "key1"
- expected_value = "value1"
- data_source = "data_source"
- campaign_name = "campaign_name"
-
- yaml_dict = {expected_key: expected_value}
- create_fake_metadata_file(tmp_path, yaml_file_name, yaml_dict, data_source, campaign_name)
- result = get_archive_metadata_key_value(key=expected_key, base_dir=base_dir)
- expected_result.append((data_source, campaign_name, os.path.splitext(yaml_file_name)[0], expected_value))
-
- assert sorted(result) == sorted(expected_result)
-
- # Test 2 : two config files
- yaml_file_name = "station_2.yml"
- expected_key = "key1"
- expected_value = "value1"
- data_source = "data_source"
- campaign_name = "campaign_name"
-
- yaml_dict = {expected_key: expected_value}
- create_fake_metadata_file(tmp_path, yaml_file_name, yaml_dict, data_source, campaign_name)
- result = get_archive_metadata_key_value(key=expected_key, base_dir=base_dir)
- expected_result.append((data_source, campaign_name, os.path.splitext(yaml_file_name)[0], expected_value))
-
- assert sorted(result) == sorted(expected_result)
-
- # Test 3: test tuple
- yaml_file_name = "station_3.yml"
- expected_key = "key1"
- expected_value = "value1"
- data_source = "data_source"
- campaign_name = "campaign_name"
- yaml_dict = {expected_key: expected_value}
- create_fake_metadata_file(tmp_path, yaml_file_name, yaml_dict, data_source, campaign_name)
- result = get_archive_metadata_key_value(key=expected_key, base_dir=base_dir, return_tuple=False)
- expected_result.append((data_source, campaign_name, os.path.splitext(yaml_file_name)[0], expected_value))
- expected_result = [item[3] for item in expected_result]
-
- assert sorted(result) == sorted(expected_result)
-
-
def test_check_archive_metadata_keys(tmp_path):
base_dir = os.path.join(tmp_path, "DISDRODB")
# Test 1 : create a correct metadata file
# Get the list of valid metadata keys
- list_of_valid_metadata_keys = metadata.get_valid_metadata_keys()
+ list_of_valid_metadata_keys = get_valid_metadata_keys()
yaml_file_name = "station_1.yml"
yaml_dict = {i: "value1" for i in list_of_valid_metadata_keys}
data_source = "data_source"
diff --git a/disdrodb/tests/test_metadata/test_metadata_info.py b/disdrodb/tests/test_metadata/test_metadata_info.py
new file mode 100644
index 00000000..bd58186e
--- /dev/null
+++ b/disdrodb/tests/test_metadata/test_metadata_info.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+
+# -----------------------------------------------------------------------------.
+# Copyright (c) 2021-2023 DISDRODB developers
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+# -----------------------------------------------------------------------------.
+"""Test Metadata Info Extraction."""
+
+import os
+
+from disdrodb.metadata.info import get_archive_metadata_key_value
+from disdrodb.utils.yaml import write_yaml
+
+
+def create_fake_metadata_file(
+ tmp_path, yaml_file_name, yaml_dict, data_source="data_source", campaign_name="campaign_name"
+):
+ subfolder_path = tmp_path / "DISDRODB" / "Raw" / data_source / campaign_name / "metadata"
+ if not os.path.exists(subfolder_path):
+ subfolder_path.mkdir(parents=True)
+ file_path = os.path.join(subfolder_path, yaml_file_name)
+ # create a fake yaml file in temp folder
+ write_yaml(yaml_dict, file_path)
+ assert os.path.exists(file_path)
+ return file_path
+
+
+def test_get_archive_metadata_key_value(tmp_path):
+ expected_result = []
+
+ base_dir = os.path.join(tmp_path, "DISDRODB")
+ # Test 1 : one config file
+ yaml_file_name = "station_1.yml"
+ expected_key = "key1"
+ expected_value = "value1"
+ data_source = "data_source"
+ campaign_name = "campaign_name"
+
+ yaml_dict = {expected_key: expected_value}
+ create_fake_metadata_file(tmp_path, yaml_file_name, yaml_dict, data_source, campaign_name)
+ result = get_archive_metadata_key_value(key=expected_key, base_dir=base_dir)
+ expected_result.append((data_source, campaign_name, os.path.splitext(yaml_file_name)[0], expected_value))
+
+ assert sorted(result) == sorted(expected_result)
+
+ # Test 2 : two config files
+ yaml_file_name = "station_2.yml"
+ expected_key = "key1"
+ expected_value = "value1"
+ data_source = "data_source"
+ campaign_name = "campaign_name"
+
+ yaml_dict = {expected_key: expected_value}
+ create_fake_metadata_file(tmp_path, yaml_file_name, yaml_dict, data_source, campaign_name)
+ result = get_archive_metadata_key_value(key=expected_key, base_dir=base_dir)
+ expected_result.append((data_source, campaign_name, os.path.splitext(yaml_file_name)[0], expected_value))
+
+ assert sorted(result) == sorted(expected_result)
+
+ # Test 3: test tuple
+ yaml_file_name = "station_3.yml"
+ expected_key = "key1"
+ expected_value = "value1"
+ data_source = "data_source"
+ campaign_name = "campaign_name"
+ yaml_dict = {expected_key: expected_value}
+ create_fake_metadata_file(tmp_path, yaml_file_name, yaml_dict, data_source, campaign_name)
+ result = get_archive_metadata_key_value(key=expected_key, base_dir=base_dir, return_tuple=False)
+ expected_result.append((data_source, campaign_name, os.path.splitext(yaml_file_name)[0], expected_value))
+ expected_result = [item[3] for item in expected_result]
+
+ assert sorted(result) == sorted(expected_result)
diff --git a/disdrodb/tests/test_api/test_api_metadata.py b/disdrodb/tests/test_metadata/test_metadata_io.py
similarity index 94%
rename from disdrodb/tests/test_api/test_api_metadata.py
rename to disdrodb/tests/test_metadata/test_metadata_io.py
index 2d4101c8..55f6010c 100644
--- a/disdrodb/tests/test_api/test_api_metadata.py
+++ b/disdrodb/tests/test_metadata/test_metadata_io.py
@@ -20,9 +20,8 @@
import os
-import yaml
-
-from disdrodb.api.metadata import _get_list_all_metadata, _get_list_metadata_with_data
+from disdrodb.metadata.io import _get_list_all_metadata, _get_list_metadata_with_data
+from disdrodb.utils.yaml import write_yaml
def create_fake_metadata_file(
@@ -33,11 +32,8 @@ def create_fake_metadata_file(
subfolder_path.mkdir(parents=True)
file_path = os.path.join(subfolder_path, yaml_file_name)
# create a fake yaml file in temp folder
- with open(file_path, "w") as f:
- yaml.dump(yaml_dict, f)
-
+ write_yaml(yaml_dict, file_path)
assert os.path.exists(file_path)
-
return file_path
diff --git a/docs/source/metadata_archive.rst b/docs/source/metadata_archive.rst
index cb96e2e1..0834fc56 100644
--- a/docs/source/metadata_archive.rst
+++ b/docs/source/metadata_archive.rst
@@ -53,7 +53,7 @@ To update the DISDRODB Metadata Archive follow these steps:
.. code:: bash
export DISDRODB_BASE_DIR="/disdrodb-data/DISDRODB"
- disdrodb_check_metadata_compliance
+ disdrodb_check_metadata_archive_compliance
.. note::
The ``DISDRODB_BASE_DIR`` environment variable has to be specified only if the DISDRODB root directory had not been specified before.
diff --git a/pyproject.toml b/pyproject.toml
index 602397d4..ba927c49 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,7 @@ authors = [
]
description = "This package provides tools to homogenize, process, and analyze global disdrometer data."
readme = "README.md"
-keywords = ["python", "disdrometer"]
+keywords = ["python", "disdrometer","parsivel", "drop size distribution"]
classifiers = [
"Development Status :: 1 - Planning",
"Intended Audience :: Developers",
@@ -48,7 +48,7 @@ dev = [
"pydantic",
"pytest-cov",
"pytest-mock",
- "requests-mock"
+ "requests-mock",
]
[tool.setuptools_scm]
@@ -73,6 +73,8 @@ run_disdrodb_l0="disdrodb.l0.scripts.run_disdrodb_l0:run_disdrodb_l0"
# Data transfer
download_disdrodb_archive="disdrodb.data_transfer.scripts.download_disdrodb_archive:download_disdrodb_archive"
upload_disdrodb_archive="disdrodb.data_transfer.scripts.upload_disdrodb_archive:upload_disdrodb_archive"
+# Metadata archive
+disdrodb_check_metadata_archive="disdrodb.metadata.scripts.disdrodb_check_metadata_archive:disdrodb_check_metadata_archive"
[tool.ruff]
select = ["F",
@@ -100,5 +102,6 @@ preview = true
[tool.coverage.run]
omit = [
- "disdrodb/l0/readers/*"
+ "disdrodb/l0/readers/*",
+ "disdrodb/l0/manuals/*",
]