Skip to content

Commit

Permalink
Fix linting
Browse files Browse the repository at this point in the history
  • Loading branch information
ghiggi committed Dec 2, 2023
1 parent abf08dd commit 9a01cd3
Showing 1 changed file with 103 additions and 108 deletions.
211 changes: 103 additions & 108 deletions disdrodb/tests/test_l0/test_l0b_nc_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,19 @@
"""Test DISDRODB L0B (from raw netCDFs) processing routines."""

import numpy as np
import xarray as xr
import pytest
import xarray as xr

from disdrodb.l0.l0b_nc_processing import (
_check_dict_names_validity,
_get_missing_variables,
add_dataset_missing_variables,
rename_dataset,
replace_custom_nan_flags,
replace_nan_flags,
set_nan_outside_data_range,
set_nan_invalid_values,
replace_custom_nan_flags,
add_dataset_missing_variables,
set_nan_outside_data_range,
subset_dataset,
rename_dataset,
_check_dict_names_validity,
_get_missing_variables,
)

# NOTE:
Expand All @@ -52,7 +52,7 @@
"data_range": [10, 50],
"nan_flags": -9999,
},
# Assess possible int/float problems
# Assess possible int/float problems
"key_3": {
"valid_values": [0, 1],
"data_range": [-10, 10],
Expand All @@ -70,8 +70,7 @@
l0b_encoding_dict = {k: "dummy" for k in mock_valid_names}


config_dict = {"raw_data_format.yml": raw_data_format_dict,
"l0b_encodings.yml": l0b_encoding_dict}
config_dict = {"raw_data_format.yml": raw_data_format_dict, "l0b_encodings.yml": l0b_encoding_dict}

TEST_SENSOR_NAME = "test"

Expand All @@ -86,78 +85,94 @@ def test_replace_nan_flags(create_test_config_files):
Function that creates and removes the dummy config file.
"""
# Mock xarray Dataset
ds = xr.Dataset({
"key_1": xr.DataArray([0, 1, 2, 3, 4]),
"key_2": xr.DataArray([1, -9999, 2, 3, 89]),
"key_3": xr.DataArray([1.0, -9999.0, 2.0, 3.0, 89.0]),
"key_4": xr.DataArray([1, -9999, -8888, 2, 3]),
"key_not_in_dict": xr.DataArray([10, 20, 30, 40, 50])
})
ds = xr.Dataset(
{
"key_1": xr.DataArray([0, 1, 2, 3, 4]),
"key_2": xr.DataArray([1, -9999, 2, 3, 89]),
"key_3": xr.DataArray([1.0, -9999.0, 2.0, 3.0, 89.0]),
"key_4": xr.DataArray([1, -9999, -8888, 2, 3]),
"key_not_in_dict": xr.DataArray([10, 20, 30, 40, 50]),
}
)

# Call the replace_nan_flags function
result_ds = replace_nan_flags(ds, sensor_name=TEST_SENSOR_NAME, verbose=True)

# Assertions
assert result_ds["key_1"].equals(ds["key_1"]), "Key 1 should remain unchanged"
assert result_ds["key_2"].equals(xr.DataArray([1, np.nan, 2, 3, 89])), "Key 2 nan flags not replaced correctly"
assert result_ds["key_3"].equals(xr.DataArray([1.0, np.nan, 2.0, 3.0, 89.0])), "Key 3 float values not processed correctly"
assert result_ds["key_3"].equals(
xr.DataArray([1.0, np.nan, 2.0, 3.0, 89.0])
), "Key 3 float values not processed correctly"
assert result_ds["key_4"].equals(xr.DataArray([1, np.nan, np.nan, 2, 3])), "Key 4 nan flags not replaced correctly"
assert result_ds["key_not_in_dict"].equals(ds["key_not_in_dict"]), "Unrelated keys should remain unchanged"


@pytest.mark.parametrize("create_test_config_files", [config_dict], indirect=True)
def test_set_nan_outside_data_range(create_test_config_files):
ds = xr.Dataset({
"key_1": xr.DataArray([0, 1, 2, 3, 4]),
"key_2": xr.DataArray([9, 10, 50, 51, 30]),
"key_3": xr.DataArray([-11, -10, 0, 10, 11]),
"key_4": xr.DataArray([99, 100, 150, 200, 201]),
"key_not_in_dict": xr.DataArray([0, 1, 2, 3, 4])
})
ds = xr.Dataset(
{
"key_1": xr.DataArray([0, 1, 2, 3, 4]),
"key_2": xr.DataArray([9, 10, 50, 51, 30]),
"key_3": xr.DataArray([-11, -10, 0, 10, 11]),
"key_4": xr.DataArray([99, 100, 150, 200, 201]),
"key_not_in_dict": xr.DataArray([0, 1, 2, 3, 4]),
}
)

result_ds = set_nan_outside_data_range(ds, TEST_SENSOR_NAME, verbose=True)

assert result_ds["key_1"].equals(ds["key_1"]), "Key 1 should remain unchanged"
assert result_ds["key_2"].equals(xr.DataArray([np.nan, 10, 50, np.nan, 30])), "Key 2 data range not applied correctly"
assert result_ds["key_3"].equals(xr.DataArray([np.nan, -10, 0, 10, np.nan])), "Key 3 data range not applied correctly"
assert result_ds["key_2"].equals(
xr.DataArray([np.nan, 10, 50, np.nan, 30])
), "Key 2 data range not applied correctly"
assert result_ds["key_3"].equals(
xr.DataArray([np.nan, -10, 0, 10, np.nan])
), "Key 3 data range not applied correctly"
assert result_ds["key_4"].equals(ds["key_4"]), "If data_range for key4 is None, data should remain unchanged"
assert result_ds["key_not_in_dict"].equals(ds["key_not_in_dict"]), "Unrelated keys should remain unchanged"


@pytest.mark.parametrize("create_test_config_files", [config_dict], indirect=True)
def test_set_nan_invalid_values(create_test_config_files):
ds = xr.Dataset({
"key_1": xr.DataArray([0, 1, 2, 3, 4]),
"key_2": xr.DataArray([9, 10, 20, 30, 40]),
"key_3": xr.DataArray([0, 0.1, 0.2, 0.3, 1.0]),
"key_4": xr.DataArray([0, 0, 0, 1, 1]),
"key_not_in_dict": xr.DataArray([0, 1, 2, 3, 4])
})
ds = xr.Dataset(
{
"key_1": xr.DataArray([0, 1, 2, 3, 4]),
"key_2": xr.DataArray([9, 10, 20, 30, 40]),
"key_3": xr.DataArray([0, 0.1, 0.2, 0.3, 1.0]),
"key_4": xr.DataArray([0, 0, 0, 1, 1]),
"key_not_in_dict": xr.DataArray([0, 1, 2, 3, 4]),
}
)

result_ds = set_nan_invalid_values(ds, TEST_SENSOR_NAME, verbose=True)

assert result_ds["key_1"].equals(ds["key_1"]), "Key 1 should remain unchanged"
assert result_ds["key_2"].equals(xr.DataArray([np.nan, np.nan, np.nan, np.nan, np.nan])), "Key 2 valid values not applied correctly"
assert result_ds["key_3"].equals(xr.DataArray([0.0, np.nan, np.nan, np.nan, 1.0])), "Key 3 float values not processed correctly"
assert result_ds["key_4"].equals(xr.DataArray([0, 0, 0, 1, 1])), "Key 4 should not have been modified. Only valid values are present."
assert result_ds["key_2"].equals(
xr.DataArray([np.nan, np.nan, np.nan, np.nan, np.nan])
), "Key 2 valid values not applied correctly"
assert result_ds["key_3"].equals(
xr.DataArray([0.0, np.nan, np.nan, np.nan, 1.0])
), "Key 3 float values not processed correctly"
assert result_ds["key_4"].equals(
xr.DataArray([0, 0, 0, 1, 1])
), "Key 4 should not have been modified. Only valid values are present."
assert result_ds["key_not_in_dict"].equals(ds["key_not_in_dict"]), "Unrelated keys should remain unchanged"


def test_replace_custom_nan_flags():
# Custom dictionary of nan flags for testing
dict_nan_flags = {
"key_1": [-999],
"key_2": [-9999, -8888],
"key_3": [0]
}
dict_nan_flags = {"key_1": [-999], "key_2": [-9999, -8888], "key_3": [0]}

# Mock xarray Dataset
ds = xr.Dataset({
"key_1": xr.DataArray([1, -999, 2, 3, 4]),
"key_2": xr.DataArray([1, -9999, -8888, 2, 3]),
"key_3": xr.DataArray([0, 1, 0, 2, 3]),
"key_not_in_flags": xr.DataArray([10, 20, 30, 40, 50])
})
ds = xr.Dataset(
{
"key_1": xr.DataArray([1, -999, 2, 3, 4]),
"key_2": xr.DataArray([1, -9999, -8888, 2, 3]),
"key_3": xr.DataArray([0, 1, 0, 2, 3]),
"key_not_in_flags": xr.DataArray([10, 20, 30, 40, 50]),
}
)

# Call the replace_custom_nan_flags function
result_ds = replace_custom_nan_flags(ds, dict_nan_flags=dict_nan_flags)
Expand All @@ -167,24 +182,20 @@ def test_replace_custom_nan_flags():
assert result_ds["key_2"].equals(xr.DataArray([1, np.nan, np.nan, 2, 3])), "Key 2 nan flags not replaced correctly"
assert result_ds["key_3"].equals(xr.DataArray([np.nan, 1, np.nan, 2, 3])), "Key 3 nan flags not replaced correctly"
assert result_ds["key_not_in_flags"].equals(ds["key_not_in_flags"]), "Unrelated keys should remain unchanged"


def test_add_dataset_missing_variables(monkeypatch):
# Mock variables and their dimensions
mock_var_dims_dict = {
"missing_var_1": ["dim1"],
"missing_var_2": ["dim1", "dim2"]
}
mock_var_dims_dict = {"missing_var_1": ["dim1"], "missing_var_2": ["dim1", "dim2"]}

# Mock get_variables_dimension function
def mock_get_variables_dimension(sensor_name):
return mock_var_dims_dict

monkeypatch.setattr("disdrodb.l0.standards.get_variables_dimension", mock_get_variables_dimension)

# Define xarray Dataset
ds = xr.Dataset({
"existing_var": xr.DataArray(np.random.rand(5, 3), dims=["dim1", "dim2"])
})
ds = xr.Dataset({"existing_var": xr.DataArray(np.random.rand(5, 3), dims=["dim1", "dim2"])})

# List of missing variables
missing_vars = ["missing_var_1", "missing_var_2"]
Expand All @@ -201,33 +212,25 @@ def mock_get_variables_dimension(sensor_name):
assert np.all(np.isnan(result_ds["missing_var_2"])), "Values of missing_var_2 should be NaN"




@pytest.mark.parametrize("create_test_config_files", [config_dict], indirect=True)
def test_check_dict_names_validity(create_test_config_files):
# Define dict_names with valid values
dict_names_valid = {
"key1": "var1",
"key2": "var2"
}
dict_names_valid = {"key1": "var1", "key2": "var2"}
# No exception should be raised
_check_dict_names_validity(dict_names_valid, sensor_name=TEST_SENSOR_NAME)

# Define dict_names with some invalid values
dict_names_invalid = {
"key1": "invalid_name",
"key2": "var2"
}
dict_names_invalid = {"key1": "invalid_name", "key2": "var2"}
with pytest.raises(ValueError):
_check_dict_names_validity(dict_names_invalid, sensor_name=TEST_SENSOR_NAME)


def test_rename_dataset():
# Define xarray Dataset with variables, coordinates, and dimensions
ds = xr.Dataset({
"var1": (("dim1", "dim2"), np.random.rand(2, 3)),
"var2": (("dim1", "dim2"), np.random.rand(2, 3))
}, coords={"dim2": [1, 2, 3], "coord1": ("dim2", [1, 2, 3])})
ds = xr.Dataset(
{"var1": (("dim1", "dim2"), np.random.rand(2, 3)), "var2": (("dim1", "dim2"), np.random.rand(2, 3))},
coords={"dim2": [1, 2, 3], "coord1": ("dim2", [1, 2, 3])},
)

# Define dict_names for renaming
dict_names = {
Expand All @@ -236,7 +239,7 @@ def test_rename_dataset():
"dim1": "new_dim1",
"dim2": "new_dim2",
"coord1": "new_coord1",
"non_existing_var": "should_be_ignored"
"non_existing_var": "should_be_ignored",
}

# Call rename_dataset
Expand All @@ -249,31 +252,28 @@ def test_rename_dataset():
assert "new_dim2" in result_ds.coords, "dim2 should be renamed to new_dim2"
assert "new_coord1" in result_ds.coords, "coord1 should be renamed to new_coord1"
assert "coord1" not in result_ds.coords, "coord1 should be renamed to new_coord1"

assert "non_existing_var" not in result_ds, "non_existing_var should not be in the renamed dataset"
assert "var1" not in result_ds, "Original var1 should not exist after renaming"
assert "var2" not in result_ds, "Original var2 should not exist after renaming"


@pytest.mark.parametrize("create_test_config_files", [config_dict], indirect=True)
def test_subset_dataset(create_test_config_files):
# Define xarray Dataset with extra variables (assumed to be renamed)
ds = xr.Dataset({
"var1": xr.DataArray([1, 2, 3]),
"var2": xr.DataArray([4, 5, 6]),
"var3": xr.DataArray([7, 8, 9]),
"var_not_needed": xr.DataArray([10, 11, 12])
})

# Define dict_names mapping
ds = xr.Dataset(
{
"var1": xr.DataArray([1, 2, 3]),
"var2": xr.DataArray([4, 5, 6]),
"var3": xr.DataArray([7, 8, 9]),
"var_not_needed": xr.DataArray([10, 11, 12]),
}
)

# Define dict_names mapping
# - Key are used to rename (the values are used for subsetting)
# - Values are used for subsetting
dict_names = {
"key1": "var1",
"key2": "var2",
"key3": "var3",
"key4": "var_not_in_ds"
}
dict_names = {"key1": "var1", "key2": "var2", "key3": "var3", "key4": "var_not_in_ds"}

# Call subset_dataset
result_ds = subset_dataset(ds, dict_names=dict_names, sensor_name=TEST_SENSOR_NAME)
Expand All @@ -282,31 +282,26 @@ def test_subset_dataset(create_test_config_files):
assert set(result_ds.data_vars) == {"var1", "var2", "var3"}, "Dataset should only contain var1, var2, and var3"
assert "var_not_needed" not in result_ds, "var_not_needed should not be in the subset dataset"
assert "var_not_in_ds" not in result_ds, "var_not_in_ds should not be in the subset dataset"


@pytest.mark.parametrize("create_test_config_files", [config_dict], indirect=True)
def test_get_missing_variables(create_test_config_files):
# Define xarray Dataset with some variables (assumed to be renamed and subsetted)
ds = xr.Dataset({
"var1": xr.DataArray([1, 2, 3]),
"var2": xr.DataArray([4, 5, 6]),
})

# Define dict_names mapping
ds = xr.Dataset(
{
"var1": xr.DataArray([1, 2, 3]),
"var2": xr.DataArray([4, 5, 6]),
}
)

# Define dict_names mapping
# - Key are used to rename (the values are used for subsetting)
# - Values are used for subsetting
dict_names = {
"key1": "var1",
"key2": "var2",
"key3": "var3",
"key4": "var_not_in_ds"
}
dict_names = {"key1": "var1", "key2": "var2", "key3": "var3", "key4": "var_not_in_ds"}

# Call _get_missing_variables
ds = rename_dataset(ds=ds, dict_names=dict_names)
missing_vars = _get_missing_variables(ds, dict_names, sensor_name=TEST_SENSOR_NAME)

# Assertions
assert missing_vars == {"var3", "var_not_in_ds"}, "Missing variables should be identified correctly"


0 comments on commit 9a01cd3

Please sign in to comment.