Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add routines for L1 and L2 processing #220

Draft
wants to merge 26 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
6841233
Update precommit rules and docs
ghiggi Jun 5, 2024
d537d98
Update docstrings
ghiggi Jun 5, 2024
01994b8
Fix dummy typo
ghiggi Jun 5, 2024
1929dd8
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 5, 2024
844592e
Add code for L1 processing
ghiggi Nov 19, 2024
1d9fb73
Merge branch 'ltelab:main' into l2-processing
ghiggi Nov 19, 2024
38cf58f
Add L0C processing
ghiggi Nov 21, 2024
b9851c1
Add L2M code
ghiggi Nov 22, 2024
20e690f
Fix issues
ghiggi Dec 6, 2024
72d444b
Reorganize CLI scripts into single directory
ghiggi Dec 9, 2024
00ff7ee
Refactor for L0C
ghiggi Dec 10, 2024
c20e7e1
Add L1 and L2 processing code
ghiggi Dec 10, 2024
7f802eb
Update precommit
ghiggi Dec 10, 2024
de09ad9
Update precommit
ghiggi Dec 10, 2024
6298811
Change dask configs
ghiggi Dec 11, 2024
deb0972
Add new logger
ghiggi Dec 11, 2024
055384c
Fix issues
ghiggi Dec 12, 2024
7874379
Refactor dask cluster init
ghiggi Dec 16, 2024
6f760e4
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 16, 2024
68ca587
Fix issue in L0C processing
ghiggi Dec 16, 2024
62e96ed
Merge branch 'l2-processing' of github.com:ghiggi/disdrodb into l2-pr…
ghiggi Dec 16, 2024
51c8df4
Robustify L0C
ghiggi Dec 20, 2024
7fd47bc
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 20, 2024
5494f92
Add new L2M processing
ghiggi Dec 23, 2024
0ed1694
Merge branch 'l2-processing' of github.com:ghiggi/disdrodb into l2-pr…
ghiggi Dec 23, 2024
9ac2b65
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
Expand All @@ -12,12 +12,12 @@ repos:
- id: check-ast
- id: check-added-large-files
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.5
rev: v0.7.1
hooks:
- id: ruff
args: [--fix]
- repo: https://github.com/psf/black
rev: 24.3.0
rev: 24.10.0
hooks:
- id: black
language_version: python3
Expand All @@ -27,18 +27,18 @@ repos:
- id: blackdoc
additional_dependencies: ["black[jupyter]"]
- repo: https://github.com/pre-commit/mirrors-prettier
rev: "v3.1.0"
rev: "v4.0.0-alpha.8"
hooks:
- id: prettier
types_or: [yaml, html, css, scss, javascript, json] # markdown to avoid conflicts with mdformat
- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
rev: v2.3.0
hooks:
- id: codespell
types_or: [python, markdown, rst]
additional_dependencies: [tomli]
- repo: https://github.com/asottile/pyupgrade
rev: v3.15.2
rev: v3.19.0
hooks:
- id: pyupgrade
- repo: https://github.com/MarcoGorelli/madforhooks
Expand All @@ -47,7 +47,7 @@ repos:
# - id: conda-env-sorter # conflicts with prettier
- id: check-execution-order
- repo: https://github.com/executablebooks/mdformat
rev: 0.7.17
rev: 0.7.18
hooks:
- id: mdformat
additional_dependencies: [mdformat-gfm, mdformat-black]
Expand All @@ -58,7 +58,7 @@ repos:
- id: nbstripout
args: [--keep-output]
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.8.5
rev: 1.8.7
hooks:
- id: nbqa-black
- id: nbqa-ruff
Expand Down
2 changes: 1 addition & 1 deletion CODE_OF_CONDUCT.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
identity and expression, level of experience, education, socioeconomic status,
nationality, personal appearance, race, caste, color, religion, or sexual
identity and orientation.

Expand Down
24 changes: 24 additions & 0 deletions disdrodb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,23 @@
# -----------------------------------------------------------------------------.
# Copyright (c) 2021-2023 DISDRODB developers
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------.
"""DISDRODB software."""

import contextlib
import importlib
import os
from importlib.metadata import PackageNotFoundError, version

Expand All @@ -18,6 +37,11 @@
check_archive_metadata_geolocation,
)

PRODUCT_VERSION = "V0"
SOFTWARE_VERSION = "V" + importlib.metadata.version("disdrodb")
CONVENTIONS = "CF-1.10, ACDD-1.3"


__all__ = [
"define_configs",
"available_stations",
Expand Down
86 changes: 54 additions & 32 deletions disdrodb/api/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@

from disdrodb.api.info import infer_disdrodb_tree_path_components
from disdrodb.api.path import (
define_data_dir,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

✅ No longer an issue: Code Duplication
The module no longer contains too many functions with similar structure

define_issue_dir,
define_issue_filepath,
define_metadata_dir,
define_metadata_filepath,
define_station_dir,
)
from disdrodb.utils.directories import (
ensure_string_path,
Expand Down Expand Up @@ -70,10 +70,7 @@ def check_url(url: str) -> bool:
``True`` if url well formatted, ``False`` if not well formatted.
"""
regex = r"^(https?:\/\/)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$" # noqa: E501

if re.match(regex, url):
return True
return False
return re.match(regex, url)


def check_path_is_a_directory(dir_path, path_name=""):
Expand All @@ -95,6 +92,7 @@ def check_directories_inside(dir_path):
def check_base_dir(base_dir: str):
"""Raise an error if the path does not end with ``DISDRODB``."""
base_dir = str(base_dir) # convert Pathlib to string
base_dir = os.path.normpath(base_dir)
if not base_dir.endswith("DISDRODB"):
raise ValueError(f"The path {base_dir} does not end with DISDRODB. Please check the path.")
return base_dir
Expand Down Expand Up @@ -150,53 +148,76 @@ def check_product(product):
"""Check DISDRODB product."""
if not isinstance(product, str):
raise TypeError("`product` must be a string.")
valid_products = ["RAW", "L0A", "L0B"]
valid_products = ["RAW", "L0A", "L0B", "L0C", "L1", "L2E", "L2M", "L2S"]
if product.upper() not in valid_products:
msg = f"Valid `products` are {valid_products}."
logger.error(msg)
raise ValueError(msg)
return product


def check_station_dir(product, data_source, campaign_name, station_name, base_dir=None):
"""Check existence of the station data directory. If does not exists, raise an error."""
station_dir = define_station_dir(
ghiggi marked this conversation as resolved.
Show resolved Hide resolved
def has_available_data(
data_source,
campaign_name,
station_name,
product,
base_dir=None,
# Option for L2E
sample_interval=None,
rolling=None,
# Option for L2M
distribution=None,
):
"""Return ``True`` if data are available for the given product and station."""
# Define product directory
data_dir = define_data_dir(
product=product,
base_dir=base_dir,
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
# Option for L2E
sample_interval=sample_interval,
rolling=rolling,
# Option for L2M
distribution=distribution,
# Directory options
check_exists=False,
)
if not os.path.exists(station_dir) and os.path.isdir(station_dir):
msg = f"The station {station_name} data directory does not exist at {station_dir}."
logger.error(msg)
raise ValueError(msg)
return station_dir
# If the product directory does not exists, return False
if not os.path.isdir(data_dir):
return False


def has_available_station_files(product, data_source, campaign_name, station_name, base_dir=None):
"""Return ``True`` if data are available for the given product and station."""
station_dir = check_station_dir(
product=product,
base_dir=base_dir,
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
)
filepaths = list_files(station_dir, glob_pattern="*", recursive=True)
ghiggi marked this conversation as resolved.
Show resolved Hide resolved
# If no files, return False
filepaths = list_files(data_dir, glob_pattern="*", recursive=True)
nfiles = len(filepaths)
return nfiles >= 1


def check_station_has_data(product, data_source, campaign_name, station_name, base_dir=None):
"""Check the station data directory has data inside. If not, raise an error."""
if not has_available_station_files(
ghiggi marked this conversation as resolved.
Show resolved Hide resolved
def check_data_availability(
product,
data_source,
campaign_name,
station_name,
base_dir=None,
# Option for L2E
sample_interval=None,
rolling=None,
# Option for L2M
distribution=None,
):
"""Check the station product data directory has files inside. If not, raise an error."""
if not has_available_data(
product=product,
base_dir=base_dir,
data_source=data_source,
campaign_name=campaign_name,
station_name=station_name,
# Option for L2E
sample_interval=sample_interval,
rolling=rolling,
# Option for L2M
distribution=distribution,
):
msg = f"The {product} station data directory of {data_source} {campaign_name} {station_name} is empty !"
logger.error(msg)
Expand Down Expand Up @@ -271,6 +292,7 @@ def check_issue_dir(data_source, campaign_name, base_dir=None):
def check_issue_file(data_source, campaign_name, station_name, base_dir=None):
"""Check existence of a valid issue YAML file. If does not exists, raise an error."""
from disdrodb.issue.checks import check_issue_compliance
from disdrodb.issue.writer import create_station_issue

_ = check_issue_dir(
base_dir=base_dir,
Expand All @@ -286,9 +308,9 @@ def check_issue_file(data_source, campaign_name, station_name, base_dir=None):
)
# Check existence
if not os.path.exists(issue_filepath):
msg = f"The issue YAML file of {data_source} {campaign_name} {station_name} does not exist at {issue_filepath}."
logger.error(msg)
raise ValueError(msg)
create_station_issue(
base_dir=base_dir, data_source=data_source, campaign_name=campaign_name, station_name=station_name
)

# Check validity
check_issue_compliance(
Expand Down Expand Up @@ -398,7 +420,7 @@ def check_raw_dir(raw_dir: str, station_name: str) -> None:
check_directories_inside(raw_dir)

# Check there is data in the station directory
check_station_has_data(
check_data_availability(
product="RAW",
base_dir=base_dir,
data_source=data_source,
Expand Down
Loading
Loading