Skip to content

Commit

Permalink
Merge branch 'main' into doc-ci-pre-commit
Browse files Browse the repository at this point in the history
  • Loading branch information
regislon committed May 8, 2023
2 parents 17e9585 + 6a89111 commit 4b549de
Show file tree
Hide file tree
Showing 10 changed files with 539 additions and 139 deletions.
5 changes: 4 additions & 1 deletion disdrodb/l0/l0_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,10 @@ def _generate_l0b(
filename=filename,
parallel=parallel,
)
logger_fpath = logger.handlers[0].baseFilename
if not os.environ.get("PYTEST_CURRENT_TEST"):
logger_fpath = logger.handlers[0].baseFilename
else:
logger_fpath = None

##------------------------------------------------------------------------.
# Log start processing
Expand Down
61 changes: 45 additions & 16 deletions disdrodb/tests/test_check_standards.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,91 @@
import pytest
import pandas as pd
from disdrodb.l0 import check_standards
from disdrodb.l0.check_standards import (
check_l0a_standards,
_check_valid_range,
_check_valid_values,
_check_raw_fields_available,
check_sensor_name,
check_l0a_column_names,
)
import random
import numpy as np
import os


PACKAGE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
RAW_DIR = os.path.join(PACKAGE_DIR, "tests", "pytest_files", "check_readers", "DISDRODB")


def test_check_l0a_standards():
path_file = os.path.join(
RAW_DIR,
"Raw",
"EPFL",
"PARSIVEL_2007",
"ground_truth",
"10",
"L0A.PARSIVEL_2007.10.s20070723141530.e20070723141930.V0.parquet",
)

# read apache parquet file
df = pd.read_parquet(path_file)

assert check_l0a_standards(df, sensor_name="OTT_Parsivel") is None


def test_check_valid_range():
# Test case 1: All columns within range
df = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": [0.5, 1.2, 2.7, 3.8]})
dict_data_range = {"col1": [0, 5], "col2": [0, 4]}
assert check_standards._check_valid_range(df, dict_data_range) is None
assert _check_valid_range(df, dict_data_range) is None

# Test case 2: Some columns outside range
df = pd.DataFrame({"col1": [1, 2, 10, 4], "col2": [0.5, 5.2, 2.7, 3.8]})
dict_data_range = {"col1": [0, 5], "col2": [0, 4]}
with pytest.raises(ValueError, match=r".*Columns \['col1', 'col2'\] has values outside the expected data range.*"):
check_standards._check_valid_range(df, dict_data_range)
_check_valid_range(df, dict_data_range)

# Test case 3: Empty dataframe
df = pd.DataFrame()
dict_data_range = {"col1": [0, 5], "col2": [0, 4]}
assert check_standards._check_valid_range(df, dict_data_range) is None
assert _check_valid_range(df, dict_data_range) is None

# Test case 4: Non-existing columns
df = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": [0.5, 1.2, 2.7, 3.8]})
dict_data_range = {"col1": [0, 5], "col3": [0, 4]}
assert check_standards._check_valid_range(df, dict_data_range) is None
assert _check_valid_range(df, dict_data_range) is None


def test_check_valid_values():
# Test case 1: All columns have valid values
df = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]})
dict_valid_values = {"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}
assert check_standards._check_valid_values(df, dict_valid_values) is None
assert _check_valid_values(df, dict_valid_values) is None

# Test case 2: Some columns have invalid values
df = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": [1, 5, 3, 4]})
dict_valid_values = {"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}
with pytest.raises(ValueError):
check_standards._check_valid_values(df, dict_valid_values)
_check_valid_values(df, dict_valid_values)

# Test case 3: Empty dataframe
df = pd.DataFrame()
dict_valid_values = {"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}
assert check_standards._check_valid_values(df, dict_valid_values) is None
assert _check_valid_values(df, dict_valid_values) is None

# Test case 4: Non-existing columns
df = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]})
dict_valid_values = {"col1": [1, 2, 3, 4], "col3": [1, 2, 3, 4]}
assert check_standards._check_valid_values(df, dict_valid_values) is None
assert _check_valid_values(df, dict_valid_values) is None


def test_check_raw_fields_available():
# Test case 1: Missing 'raw_drop_number' column
df = pd.DataFrame({"other_column": [1, 2, 3]})
sensor_name = "some_sensor_type"
with pytest.raises(ValueError):
check_standards._check_raw_fields_available(df, sensor_name)
_check_raw_fields_available(df, sensor_name)

# Test case 2: All required columns present
from disdrodb.l0.standards import available_sensor_name, get_raw_array_nvalues
Expand All @@ -68,19 +97,19 @@ def test_check_raw_fields_available():
dict_data = {i: [1, 2] for i in raw_vars}
df = pd.DataFrame.from_dict(dict_data)

assert check_standards._check_raw_fields_available(df, sensor_name) is None
assert _check_raw_fields_available(df, sensor_name) is None


def test_check_sensor_name():
sensor_name = "wrong_sensor_name"

# Test with an unknown device
with pytest.raises(ValueError):
check_standards.check_sensor_name(sensor_name)
check_sensor_name(sensor_name)

# Test with a woronf type
with pytest.raises(TypeError):
check_standards.check_sensor_name(123)
check_sensor_name(123)


def test_check_l0a_column_names(capsys):
Expand All @@ -93,17 +122,17 @@ def test_check_l0a_column_names(capsys):
list_column_names = get_sensor_variables(sensor_name) + ["time", "latitude", "longitude"]
dict_data = {i: [1, 2] for i in list_column_names}
df = pd.DataFrame.from_dict(dict_data)
# assert check_standards.check_l0a_column_names(df, sensor_name=sensor_name) is None
# assert check_l0a_column_names(df, sensor_name=sensor_name) is None

# Test 2 : Missing columns time
list_column_names = get_sensor_variables(sensor_name) + ["latitude", "longitude"]
dict_data = {i: [1, 2] for i in list_column_names}
df = pd.DataFrame.from_dict(dict_data)
with pytest.raises(ValueError):
check_standards.check_l0a_column_names(df, sensor_name=sensor_name) is None
check_l0a_column_names(df, sensor_name=sensor_name) is None

# Test 3 : fake panda dataframe
data = {"wrong_column_name": ["John", "Jane", "Bob", "Sara"]}
df = pd.DataFrame(data)
with pytest.raises(ValueError):
check_standards.check_l0a_column_names(df, sensor_name=sensor_name)
check_l0a_column_names(df, sensor_name=sensor_name)
109 changes: 109 additions & 0 deletions disdrodb/tests/test_cmd_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from click.testing import CliRunner
import os
import pytest
import shutil
import glob


# current file path
PACKAGE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
RAW_DIR = os.path.join(PACKAGE_DIR, "tests", "pytest_files", "check_readers", "DISDRODB")
DATA_SOURCE = "EPFL"
CAMPAIGN_NAME = "PARSIVEL_2007"
STATION_NAME = "10"


@pytest.fixture
def remove_processed_folder(request: list) -> None:
path_processed_folder = os.path.join(RAW_DIR, "Processed")
if os.path.exists(path_processed_folder):
shutil.rmtree(path_processed_folder)
yield
if os.path.exists(path_processed_folder):
shutil.rmtree(path_processed_folder)


@pytest.mark.parametrize("remove_processed_folder", [()], indirect=True)
def test_run_disdrodb_l0a_station(remove_processed_folder):
"""Test the run_disdrodb_l0a_station command."""

from disdrodb.l0.scripts.run_disdrodb_l0a_station import run_disdrodb_l0a_station

runner = CliRunner()
runner.invoke(run_disdrodb_l0a_station, [RAW_DIR, DATA_SOURCE, CAMPAIGN_NAME, STATION_NAME])
list_of_l0a = glob.glob(
os.path.join(RAW_DIR, "Processed", DATA_SOURCE, CAMPAIGN_NAME, "L0A", STATION_NAME, "*.parquet")
)
assert len(list_of_l0a) > 0


@pytest.mark.parametrize("remove_processed_folder", [()], indirect=True)
def test_run_disdrodb_l0a(remove_processed_folder):
"""Test the run_disdrodb_l0a command."""

from disdrodb.l0.scripts.run_disdrodb_l0a import run_disdrodb_l0a

runner = CliRunner()
runner.invoke(run_disdrodb_l0a, [RAW_DIR])
list_of_l0a = glob.glob(
os.path.join(RAW_DIR, "Processed", DATA_SOURCE, CAMPAIGN_NAME, "L0A", STATION_NAME, "*.parquet")
)
assert len(list_of_l0a) > 0


@pytest.mark.parametrize("remove_processed_folder", [()], indirect=True)
def test_run_disdrodb_l0b_station(remove_processed_folder):
"""Test the run_disdrodb_l0b_station command."""
from disdrodb.l0.scripts.run_disdrodb_l0a_station import run_disdrodb_l0a_station

runner = CliRunner()
runner.invoke(run_disdrodb_l0a_station, [RAW_DIR, DATA_SOURCE, CAMPAIGN_NAME, STATION_NAME])

from disdrodb.l0.scripts.run_disdrodb_l0b_station import run_disdrodb_l0b_station

runner.invoke(run_disdrodb_l0b_station, [RAW_DIR, DATA_SOURCE, CAMPAIGN_NAME, STATION_NAME])

list_of_l0b = glob.glob(os.path.join(RAW_DIR, "Processed", DATA_SOURCE, CAMPAIGN_NAME, "L0B", STATION_NAME, "*.nc"))
assert len(list_of_l0b) > 0


@pytest.mark.parametrize("remove_processed_folder", [()], indirect=True)
def test_run_disdrodb_l0_station(remove_processed_folder):
"""Test the run_disdrodb_l0_station command."""

from disdrodb.l0.scripts.run_disdrodb_l0_station import run_disdrodb_l0_station

runner = CliRunner()
runner.invoke(run_disdrodb_l0_station, [RAW_DIR, DATA_SOURCE, CAMPAIGN_NAME, STATION_NAME])

list_of_l0b = glob.glob(os.path.join(RAW_DIR, "Processed", DATA_SOURCE, CAMPAIGN_NAME, "L0B", STATION_NAME, "*.nc"))
assert len(list_of_l0b) > 0


@pytest.mark.parametrize("remove_processed_folder", [()], indirect=True)
def test_run_disdrodb_l0b(remove_processed_folder):
"""Test the run_disdrodb_l0b command."""

from disdrodb.l0.scripts.run_disdrodb_l0a import run_disdrodb_l0a

runner = CliRunner()
runner.invoke(run_disdrodb_l0a, [RAW_DIR])

from disdrodb.l0.scripts.run_disdrodb_l0b import run_disdrodb_l0b

runner.invoke(run_disdrodb_l0b, [RAW_DIR])
list_of_l0b = glob.glob(os.path.join(RAW_DIR, "Processed", DATA_SOURCE, CAMPAIGN_NAME, "L0B", STATION_NAME, "*.nc"))
assert len(list_of_l0b) > 0


@pytest.mark.parametrize("remove_processed_folder", [()], indirect=True)
def test_full_run_disdrodb_l0(remove_processed_folder):
"""Test the run_disdrodb_l0b command."""

from disdrodb.l0.scripts.run_disdrodb_l0 import run_disdrodb_l0

runner = CliRunner()
runner.invoke(run_disdrodb_l0, [RAW_DIR])

list_of_l0b = glob.glob(os.path.join(RAW_DIR, "Processed", DATA_SOURCE, CAMPAIGN_NAME, "L0B", STATION_NAME, "*.nc"))
assert len(list_of_l0b) > 0
Loading

0 comments on commit 4b549de

Please sign in to comment.