Skip to content

Commit

Permalink
Refactor metadata internals
Browse files Browse the repository at this point in the history
  • Loading branch information
ghiggi committed Nov 3, 2023
1 parent 1e7d554 commit 8911365
Show file tree
Hide file tree
Showing 42 changed files with 657 additions and 541 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,6 @@ docs\source\reader_preparation.ipynb

# Ignore specific folders
/disdrodb/tests/temp/
/disdrodb/tests/data/test_folders_files_creation/*
!disdrodb/tests/data/test_folders_files_creation/.gitkeep
/disdrodb/tests/data/test_dir_creation/*
!disdrodb/tests/data/test_dir_creation/.gitkeep
disdrodb-dev
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ reader: EPFL/LOCARNO_2018
raw_data_format: raw
raw_data_type: raw
platform_type: fixed
disdrodb_data_url: ''
crs: WGS84
proj4_string: +proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs
EPSG: 4326
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ reader: EPFL/LOCARNO_2018
raw_data_format: raw
raw_data_type: raw
platform_type: fixed
disdrodb_data_url: ''
crs: WGS84
proj4_string: +proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs
EPSG: 4326
Expand Down
2 changes: 1 addition & 1 deletion disdrodb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
available_data_sources,
available_stations,
)
from disdrodb.api.metadata import read_station_metadata
from disdrodb.configs import define_disdrodb_configs as define_configs
from disdrodb.docs import open_documentation, open_sensor_documentation
from disdrodb.metadata import read_station_metadata

__root_path__ = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))

Expand Down
11 changes: 0 additions & 11 deletions disdrodb/api/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,3 @@ def check_sensor_name(sensor_name: str, product_level: str = "l0") -> None:
msg = f"{sensor_name} not valid {sensor_name}. Valid values are {sensor_names}."
logger.error(msg)
raise ValueError(msg)


def check_product_level(product_level):
"""Check DISDRODB product level validity."""
if not isinstance(product_level, str):
raise TypeError("'product_level' must be a string.")
product_level = product_level.lower()
valid_product_levels = ["l0"]
if product_level not in valid_product_levels:
raise ValueError(f"{product_level} is an invalid 'product_level'. Valid values are: {valid_product_levels}")
return product_level
22 changes: 18 additions & 4 deletions disdrodb/api/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,26 @@
import logging
import os

from disdrodb.api.checks import check_product_level, check_sensor_name
from disdrodb.api.checks import check_sensor_name
from disdrodb.utils.yaml import read_yaml

logger = logging.getLogger(__name__)


def _check_product_level(product_level):
"""Check DISDRODB product level validity."""
# Note: in disdrodb.api.io there is another _check_product_level function !
if not isinstance(product_level, str):
raise TypeError("'product_level' must be a string.")
product_level = product_level.lower()
valid_product_levels = ["l0", "l0a", "l0b"]
if product_level not in valid_product_levels:
raise ValueError(f"{product_level} is an invalid 'product_level'. Valid values are: {valid_product_levels}")
if product_level in ["l0a", "l0b"]:
product_level = "l0"
return product_level


def _get_config_dir(product_level):
"""Define the config directory path of a given DISDRODB product level."""
from disdrodb import __root_path__
Expand Down Expand Up @@ -57,7 +71,7 @@ def get_sensor_configs_dir(sensor_name: str, product_level: str) -> str:
Error if the config directory does not exist.
"""
check_sensor_name(sensor_name, product_level=product_level)
product_level = check_product_level(product_level)
product_level = _check_product_level(product_level)
config_dir_path = _get_config_dir(product_level=product_level)
config_sensor_dir_path = os.path.join(config_dir_path, sensor_name)
if not os.path.exists(config_sensor_dir_path):
Expand Down Expand Up @@ -88,7 +102,7 @@ def read_config_file(sensor_name: str, product_level: str, filename: str) -> dic
Error if file does not exist.
"""
check_sensor_name(sensor_name, product_level=product_level)
product_level = check_product_level(product_level)
product_level = _check_product_level(product_level)
config_sensor_dir_path = get_sensor_configs_dir(sensor_name, product_level=product_level)
config_fpath = os.path.join(config_sensor_dir_path, filename)
# Check yaml file exists
Expand All @@ -112,6 +126,6 @@ def available_sensor_names(product_level: str = "L0") -> sorted:
DISDRODB product level.
By default, it returns the sensors available for DISDRODB L0 products.
"""
product_level = check_product_level(product_level)
product_level = _check_product_level(product_level)
config_dir_path = _get_config_dir(product_level=product_level)
return sorted(os.listdir(config_dir_path))
25 changes: 4 additions & 21 deletions disdrodb/api/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import os

import numpy as np
import yaml

from disdrodb.configs import get_base_dir

Expand Down Expand Up @@ -85,6 +84,10 @@ def _get_list_stations_dirs(product_level, campaign_dir):
data_path = os.path.join(campaign_dir, "data")
else:
data_path = os.path.join(campaign_dir, product_level)
# Check if the data directory exists
# - For a fresh disdrodb-data cloned repo, no "data" directories
if not os.path.exists(data_path):
return []
# Get list of directories (stations)
list_stations = os.listdir(data_path)
list_stations_dir = [os.path.join(data_path, station_name) for station_name in list_stations]
Expand Down Expand Up @@ -215,26 +218,6 @@ def _get_stations(base_dir, product_level):
return list_available_stations


def _get_metadata_fpath(base_dir, product_level, data_source, campaign_name, station_name):
"""Get metadata file path a given station."""
campaign_dir = get_disdrodb_path(
base_dir=base_dir,
product_level=product_level,
data_source=data_source,
campaign_name=campaign_name,
)
metadata_fpath = os.path.join(campaign_dir, "metadata", station_name + ".yml")
return metadata_fpath


def get_metadata_dict(base_dir, product_level, data_source, campaign_name, station_name):
"""Get metadata of a given station."""
metadata_fpath = _get_metadata_fpath(base_dir, product_level, data_source, campaign_name, station_name)
with open(metadata_fpath) as f:
metadata_dict = yaml.safe_load(f)
return metadata_dict


####---------------------------------------------------------------------------.
#### I/O CHECKS
def check_product_level(product_level):
Expand Down
1 change: 1 addition & 0 deletions disdrodb/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,5 @@ def get_base_dir(base_dir=None):

if base_dir is None:
base_dir = disdrodb.config["dir"]
base_dir = str(base_dir) # convert Path to str
return base_dir
4 changes: 2 additions & 2 deletions disdrodb/data_transfer/download_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
import pooch
import tqdm

from disdrodb.api.metadata import get_list_metadata
from disdrodb.configs import get_base_dir
from disdrodb.l0.io import _infer_disdrodb_tree_path
from disdrodb.metadata import get_list_metadata
from disdrodb.utils.compression import _unzip_file
from disdrodb.utils.yaml import read_yaml

Expand Down Expand Up @@ -159,7 +159,7 @@ def _download_station_data(metadata_fpath: str, force: bool = False) -> None:
"""
disdrodb_data_url, station_dir_path = _get_station_url_and_dir_path(metadata_fpath)
if disdrodb_data_url is not None:
if isinstance(disdrodb_data_url, str) and disdrodb_data_url != "":
# Download file
zip_fpath, to_unzip = _download_file_from_url(disdrodb_data_url, dst_dir_path=station_dir_path, force=force)
# Extract the stations files from the downloaded station.zip file
Expand Down
6 changes: 2 additions & 4 deletions disdrodb/data_transfer/upload_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

import click

from disdrodb.api.metadata import get_list_metadata
from disdrodb.metadata import get_list_metadata
from disdrodb.utils.compression import _zip_dir
from disdrodb.utils.yaml import read_yaml, write_yaml
from disdrodb.utils.zenodo import _create_zenodo_deposition, _upload_file_to_zenodo
Expand Down Expand Up @@ -95,19 +95,17 @@ def _filter_already_uploaded(metadata_fpaths: List[str]) -> List[str]:
"""Filter metadata files that already have a remote url specified."""

filtered = []

for metadata_fpath in metadata_fpaths:
metadata_dict = read_yaml(metadata_fpath)
if metadata_dict.get("disdrodb_data_url"):
print(f"{metadata_fpath} already has a remote url specified. Skipping.")
continue
filtered.append(metadata_fpath)

return filtered


def _upload_data_to_zenodo(metadata_fpaths: List[str], sandbox: bool = False) -> None:
"""Upload data to Zenodo.
"""Upload data to Zenodo Sandbox.
Parameters
----------
Expand Down
8 changes: 4 additions & 4 deletions disdrodb/l0/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from disdrodb.l0.check_metadata import (
check_archive_metadata_compliance,
check_archive_metadata_geolocation,
)
from disdrodb.l0.l0_processing import (
run_disdrodb_l0,
run_disdrodb_l0_station,
run_l0a,
run_l0b_from_nc,
)
from disdrodb.l0.l0_reader import available_readers
from disdrodb.metadata.check_metadata import (
check_archive_metadata_compliance,
check_archive_metadata_geolocation,
)

__all__ = [
"run_l0a",
Expand Down
3 changes: 2 additions & 1 deletion disdrodb/l0/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,8 @@ def _check_raw_dir_data_subfolders(raw_dir):

def _check_raw_dir_metadata(raw_dir, verbose=True):
"""Check metadata in the raw_dir directory."""
from disdrodb.l0.metadata import check_metadata_compliance, write_default_metadata
from disdrodb.l0.metadata import write_default_metadata
from disdrodb.metadata.check_metadata import check_metadata_compliance

# Get list of stations
raw_data_dir = os.path.join(raw_dir, "data")
Expand Down
4 changes: 2 additions & 2 deletions disdrodb/l0/l0_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,10 +320,10 @@ def _get_reader_from_metadata(metadata):

def get_station_reader_function(data_source, campaign_name, station_name, base_dir=None):
"""Retrieve the reader function from the station metadata."""
from disdrodb.api.io import get_metadata_dict
from disdrodb.metadata import read_station_metadata

# Get metadata
metadata = get_metadata_dict(
metadata = read_station_metadata(
base_dir=base_dir,
product_level="RAW",
data_source=data_source,
Expand Down
Loading

0 comments on commit 8911365

Please sign in to comment.