From 4e8f4f69ae33ff860faf73538de0692f5de95209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Longchamp?= <97044425+regislon@users.noreply.github.com> Date: Fri, 5 May 2023 15:12:13 +0200 Subject: [PATCH 1/8] feat() : add new tests --- disdrodb/tests/test_check_standards.py | 61 ++++++++--- disdrodb/tests/test_io.py | 130 +++++++++++++++++++++++ disdrodb/tests/test_l0_reader.py | 120 +++++++++++++++++++-- disdrodb/tests/test_standards.py | 140 +++++++------------------ 4 files changed, 323 insertions(+), 128 deletions(-) diff --git a/disdrodb/tests/test_check_standards.py b/disdrodb/tests/test_check_standards.py index 1c8caf2f..7a8348ca 100644 --- a/disdrodb/tests/test_check_standards.py +++ b/disdrodb/tests/test_check_standards.py @@ -1,54 +1,83 @@ import pytest import pandas as pd -from disdrodb.l0 import check_standards +from disdrodb.l0.check_standards import ( + check_l0a_standards, + _check_valid_range, + _check_valid_values, + _check_raw_fields_available, + check_sensor_name, + check_l0a_column_names, +) import random import numpy as np +import os + + +PACKAGE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +RAW_DIR = os.path.join(PACKAGE_DIR, "tests", "pytest_files", "check_readers", "DISDRODB") + + +def test_check_l0a_standards(): + path_file = os.path.join( + RAW_DIR, + "Raw", + "EPFL", + "PARSIVEL_2007", + "ground_truth", + "10", + "L0A.PARSIVEL_2007.10.s20070723141530.e20070723141930.V0.parquet", + ) + + # read apache parquet file + df = pd.read_parquet(path_file) + + assert check_l0a_standards(df, sensor_name="OTT_Parsivel") is None def test_check_valid_range(): # Test case 1: All columns within range df = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": [0.5, 1.2, 2.7, 3.8]}) dict_data_range = {"col1": [0, 5], "col2": [0, 4]} - assert check_standards._check_valid_range(df, dict_data_range) is None + assert _check_valid_range(df, dict_data_range) is None # Test case 2: Some columns outside range df = pd.DataFrame({"col1": [1, 2, 10, 4], "col2": [0.5, 5.2, 2.7, 3.8]}) dict_data_range = {"col1": [0, 5], "col2": [0, 4]} with pytest.raises(ValueError, match=r".*Columns \['col1', 'col2'\] has values outside the expected data range.*"): - check_standards._check_valid_range(df, dict_data_range) + _check_valid_range(df, dict_data_range) # Test case 3: Empty dataframe df = pd.DataFrame() dict_data_range = {"col1": [0, 5], "col2": [0, 4]} - assert check_standards._check_valid_range(df, dict_data_range) is None + assert _check_valid_range(df, dict_data_range) is None # Test case 4: Non-existing columns df = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": [0.5, 1.2, 2.7, 3.8]}) dict_data_range = {"col1": [0, 5], "col3": [0, 4]} - assert check_standards._check_valid_range(df, dict_data_range) is None + assert _check_valid_range(df, dict_data_range) is None def test_check_valid_values(): # Test case 1: All columns have valid values df = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) dict_valid_values = {"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]} - assert check_standards._check_valid_values(df, dict_valid_values) is None + assert _check_valid_values(df, dict_valid_values) is None # Test case 2: Some columns have invalid values df = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": [1, 5, 3, 4]}) dict_valid_values = {"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]} with pytest.raises(ValueError): - check_standards._check_valid_values(df, dict_valid_values) + _check_valid_values(df, dict_valid_values) # Test case 3: Empty dataframe df = pd.DataFrame() dict_valid_values = {"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]} - assert check_standards._check_valid_values(df, dict_valid_values) is None + assert _check_valid_values(df, dict_valid_values) is None # Test case 4: Non-existing columns df = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) dict_valid_values = {"col1": [1, 2, 3, 4], "col3": [1, 2, 3, 4]} - assert check_standards._check_valid_values(df, dict_valid_values) is None + assert _check_valid_values(df, dict_valid_values) is None def test_check_raw_fields_available(): @@ -56,7 +85,7 @@ def test_check_raw_fields_available(): df = pd.DataFrame({"other_column": [1, 2, 3]}) sensor_name = "some_sensor_type" with pytest.raises(ValueError): - check_standards._check_raw_fields_available(df, sensor_name) + _check_raw_fields_available(df, sensor_name) # Test case 2: All required columns present from disdrodb.l0.standards import available_sensor_name, get_raw_array_nvalues @@ -68,7 +97,7 @@ def test_check_raw_fields_available(): dict_data = {i: [1, 2] for i in raw_vars} df = pd.DataFrame.from_dict(dict_data) - assert check_standards._check_raw_fields_available(df, sensor_name) is None + assert _check_raw_fields_available(df, sensor_name) is None def test_check_sensor_name(): @@ -76,11 +105,11 @@ def test_check_sensor_name(): # Test with an unknown device with pytest.raises(ValueError): - check_standards.check_sensor_name(sensor_name) + check_sensor_name(sensor_name) # Test with a woronf type with pytest.raises(TypeError): - check_standards.check_sensor_name(123) + check_sensor_name(123) def test_check_l0a_column_names(capsys): @@ -93,17 +122,17 @@ def test_check_l0a_column_names(capsys): list_column_names = get_sensor_variables(sensor_name) + ["time", "latitude", "longitude"] dict_data = {i: [1, 2] for i in list_column_names} df = pd.DataFrame.from_dict(dict_data) - # assert check_standards.check_l0a_column_names(df, sensor_name=sensor_name) is None + # assert check_l0a_column_names(df, sensor_name=sensor_name) is None # Test 2 : Missing columns time list_column_names = get_sensor_variables(sensor_name) + ["latitude", "longitude"] dict_data = {i: [1, 2] for i in list_column_names} df = pd.DataFrame.from_dict(dict_data) with pytest.raises(ValueError): - check_standards.check_l0a_column_names(df, sensor_name=sensor_name) is None + check_l0a_column_names(df, sensor_name=sensor_name) is None # Test 3 : fake panda dataframe data = {"wrong_column_name": ["John", "Jane", "Bob", "Sara"]} df = pd.DataFrame(data) with pytest.raises(ValueError): - check_standards.check_l0a_column_names(df, sensor_name=sensor_name) + check_l0a_column_names(df, sensor_name=sensor_name) diff --git a/disdrodb/tests/test_io.py b/disdrodb/tests/test_io.py index 92af4218..480b1ab3 100644 --- a/disdrodb/tests/test_io.py +++ b/disdrodb/tests/test_io.py @@ -6,11 +6,141 @@ import xarray as xr import platform from disdrodb.l0 import io +import yaml PATH_TEST_FOLDERS_FILES = os.path.join(os.path.dirname(os.path.realpath(__file__)), "pytest_files") +def create_fake_metadata_file( + tmp_path, yaml_file_name, yaml_dict, data_source="data_source", campaign_name="campaign_name" +): + subfolder_path = tmp_path / "DISDRODB" / "Raw" / data_source / campaign_name / "metadata" + if not os.path.exists(subfolder_path): + subfolder_path.mkdir(parents=True) + file_path = os.path.join(subfolder_path, yaml_file_name) + # create a fake yaml file in temp folder + with open(file_path, "w") as f: + yaml.dump(yaml_dict, f) + + assert os.path.exists(file_path) + + return file_path + + +def test_check_is_processed_dir(tmp_path): + subfolder_path = tmp_path / "DISDRODB" / "Processed" / "data_source" / "campaign_name" + subfolder_path.mkdir(parents=True) + + assert io._check_is_processed_dir(str(subfolder_path)) == str(subfolder_path) + + +def test_check_processed_dir(tmp_path): + subfolder_path = tmp_path / "DISDRODB" / "Processed" / "data_source" / "campaign_name" + subfolder_path.mkdir(parents=True) + + assert io.check_processed_dir(str(subfolder_path)) == str(subfolder_path) + + +def test_create_initial_directory_structure(tmp_path, mocker): + station_name = "station_1" + yaml_dict = {} + data_source = "data_source" + campaign_name = "CAMPAIGN_NAME" + + create_fake_metadata_file( + tmp_path=tmp_path, + yaml_file_name=f"{station_name}.yml", + yaml_dict=yaml_dict, + data_source=data_source, + campaign_name=campaign_name, + ) + + raw_dir = os.path.join(tmp_path, "DISDRODB", "Raw", "data_source", campaign_name) + force = False + subfolder_path = tmp_path / "DISDRODB" / "Raw" / "data_source" / campaign_name / "data" / station_name + subfolder_path.mkdir(parents=True) + + fake_csv_file_path = os.path.join(subfolder_path, f"{station_name}.csv") + with open(fake_csv_file_path, "w") as f: + f.write("fake csv file") + + processed_dir = os.path.join(tmp_path, "DISDRODB", "Processed", campaign_name) + subfolder_path = tmp_path / "DISDRODB" / "Processed" / campaign_name + subfolder_path.mkdir(parents=True) + mocker.patch("disdrodb.l0.metadata.check_metadata_compliance", return_value=None) + + io.create_initial_directory_structure( + raw_dir=raw_dir, processed_dir=processed_dir, station_name=station_name, force=force + ) + + l0a_folder_path = os.path.join(processed_dir, "L0B") + assert os.path.exists(l0a_folder_path) + + +def test_create_directory_structure(tmp_path, mocker): + station_name = "station_1" + yaml_dict = {} + data_source = "data_source" + campaign_name = "CAMPAIGN_NAME" + + create_fake_metadata_file( + tmp_path=tmp_path, + yaml_file_name=f"{station_name}.yml", + yaml_dict=yaml_dict, + data_source=data_source, + campaign_name=campaign_name, + ) + + os.path.join(tmp_path, "DISDRODB", "Raw", "data_source", campaign_name) + force = False + subfolder_path = tmp_path / "DISDRODB" / "Raw" / "data_source" / campaign_name / "data" / station_name + subfolder_path.mkdir(parents=True) + + fake_csv_file_path = os.path.join(subfolder_path, f"{station_name}.csv") + with open(fake_csv_file_path, "w") as f: + f.write("fake csv file") + + processed_dir = os.path.join(tmp_path, "DISDRODB", "Processed", campaign_name) + subfolder_path = tmp_path / "DISDRODB" / "Processed" / campaign_name / "L0B" + subfolder_path.mkdir(parents=True) + + product_level = "L0B" + station_name = "station_1" + force = False + + mocker.patch("disdrodb.api.io._get_list_stations_with_data", return_value=[station_name]) + mocker.patch("disdrodb.l0.io._check_pre_existing_station_data", return_value=None) + + io.create_directory_structure( + processed_dir=processed_dir, product_level=product_level, station_name=station_name, force=force, verbose=False + ) + + l0a_folder_path = os.path.join(processed_dir, "L0B") + assert os.path.exists(l0a_folder_path) + + +def test_check_raw_dir_input(tmp_path): + station_name = "station_1" + yaml_dict = {} + data_source = "data_source" + campaign_name = "campaign_name" + + create_fake_metadata_file( + tmp_path=tmp_path, + yaml_file_name=f"{station_name}.yml", + yaml_dict=yaml_dict, + data_source=data_source, + campaign_name=campaign_name, + ) + + io._check_raw_dir_input(os.path.join(tmp_path, "DISDRODB")) + + +def test_check_directory_exist(): + assert io._check_directory_exist(PATH_TEST_FOLDERS_FILES) is None + + def get_disdrodb_path(): # Assert retrieve correct disdrodb path disdrodb_path = os.path.join("DISDRODB", "Raw", "DATA_SOURCE", "CAMPAIGN_NAME") diff --git a/disdrodb/tests/test_l0_reader.py b/disdrodb/tests/test_l0_reader.py index 98b82911..bb9cf9f6 100644 --- a/disdrodb/tests/test_l0_reader.py +++ b/disdrodb/tests/test_l0_reader.py @@ -1,18 +1,122 @@ import inspect import pytest from disdrodb.l0 import l0_reader +import os +import yaml +from disdrodb.l0.l0_reader import ( + get_station_reader, + _check_metadata_reader, + available_readers, + get_reader_from_metadata_reader_key, + check_available_readers, + _get_readers_data_sources_path, + _get_readers_paths_by_data_source, +) -def test_is_documented_by(): - # Uncheck function. - # function_return = L0_processing.is_documented_by() - assert 1 == 1 +# Some test are based on the following reader: +DATA_SOURCE = "EPFL" +CAMPAIGN_NAME = "EPFL_2009" -def test_reader_generic_docstring(): - # Uncheck function. - l0_reader.reader_generic_docstring() - assert 1 == 1 +def create_fake_metadata_file( + tmp_path, yaml_file_name, yaml_dict, data_source="data_source", campaign_name="campaign_name" +): + subfolder_path = tmp_path / "DISDRODB" / "Raw" / data_source / campaign_name / "metadata" + if not os.path.exists(subfolder_path): + subfolder_path.mkdir(parents=True) + file_path = os.path.join(subfolder_path, yaml_file_name) + # create a fake yaml file in temp folder + with open(file_path, "w") as f: + yaml.dump(yaml_dict, f) + + assert os.path.exists(file_path) + + return file_path + + +def test_available_readers(): + result = available_readers(data_sources=None, reader_path=False) + assert isinstance(result, dict) + assert all(isinstance(value, list) for value in result.values()) + + +def test_check_metadata_reader(): + # Test when "reader" key is missing + with pytest.raises(ValueError, match="The reader is not specified in the metadata."): + _check_metadata_reader({}) + + # Test when "reader" key is present but invalid + with pytest.raises(ValueError, match="The reader 'invalid_reader' reported in the metadata is not valid."): + _check_metadata_reader({"reader": "invalid_reader"}) + + # Test when "reader" key is not present + with pytest.raises(ValueError, match="The reader is not specified in the metadata."): + _check_metadata_reader({"reader2": f"{DATA_SOURCE}/{CAMPAIGN_NAME}"}) + + # Test when "reader" key is present and valid + assert _check_metadata_reader({"reader": f"{DATA_SOURCE}/{CAMPAIGN_NAME}"}) is None + + +def test_get_station_reader(tmp_path): + station_name = "station_1" + yaml_dict = {"reader": f"{DATA_SOURCE}/{CAMPAIGN_NAME}"} + data_source = "data_source" + campaign_name = "campaign_name" + + create_fake_metadata_file( + tmp_path=tmp_path, + yaml_file_name=f"{station_name}.yml", + yaml_dict=yaml_dict, + data_source=data_source, + campaign_name=campaign_name, + ) + + result = get_station_reader( + disdrodb_dir=os.path.join(tmp_path, "DISDRODB"), + data_source=data_source, + campaign_name=campaign_name, + station_name=station_name, + ) + assert callable(result) is True + + +def test_get_reader_from_metadata(tmp_path): + station_name = "station_1" + yaml_dict = {"reader": f"{DATA_SOURCE}/{CAMPAIGN_NAME}"} + data_source = DATA_SOURCE + campaign_name = CAMPAIGN_NAME + reader_data_source_name = f"{DATA_SOURCE}/{CAMPAIGN_NAME}" + + create_fake_metadata_file( + tmp_path=tmp_path, + yaml_file_name=f"{station_name}.yml", + yaml_dict=yaml_dict, + data_source=data_source, + campaign_name=campaign_name, + ) + result = get_reader_from_metadata_reader_key(reader_data_source_name=reader_data_source_name) + assert callable(result) is True + + +def test_get_readers_paths_by_data_source(): + with pytest.raises(ValueError): + _get_readers_paths_by_data_source(data_source="dummy") + + +def test_check_available_readers(): + assert check_available_readers() is None + + +def test_get_reader_from_metadata_reader_key(): + reader_data_source_name = f"{DATA_SOURCE}/{CAMPAIGN_NAME}" + result = get_reader_from_metadata_reader_key(reader_data_source_name=reader_data_source_name) + assert callable(result) is True + + +def test__get_readers_data_sources_path(): + result = _get_readers_data_sources_path() + assert isinstance(result, list) def test_get_available_readers_dict(): diff --git a/disdrodb/tests/test_standards.py b/disdrodb/tests/test_standards.py index 1150eb59..932cd4b8 100644 --- a/disdrodb/tests/test_standards.py +++ b/disdrodb/tests/test_standards.py @@ -1,7 +1,20 @@ # The yaml files validity is tested in the test_config_files.py file import os import pytest -from disdrodb.l0 import standards +from disdrodb.l0.standards import ( + get_time_encoding, + get_field_ndigits_natural_dict, + get_field_ndigits_decimals_dict, + get_field_ndigits_dict, + get_field_nchar_dict, + get_data_range_dict, + get_nan_flags_dict, + get_variables_dimension, + get_valid_variable_names, + get_valid_dimension_names, + get_valid_names, + get_valid_coordinates_names, +) # Set paths ROOT_DISDRODB_FOLDER = os.path.join( @@ -12,145 +25,64 @@ @pytest.mark.parametrize("sensor_name", os.listdir(CONFIG_FOLDER)) def test_get_field_ndigits_natural_dict(sensor_name): - function_return = standards.get_field_ndigits_natural_dict(sensor_name) + function_return = get_field_ndigits_natural_dict(sensor_name) assert isinstance(function_return, dict) @pytest.mark.parametrize("sensor_name", os.listdir(CONFIG_FOLDER)) def test_get_field_ndigits_decimals_dict(sensor_name): - function_return = standards.get_field_ndigits_decimals_dict(sensor_name) + function_return = get_field_ndigits_decimals_dict(sensor_name) assert isinstance(function_return, dict) @pytest.mark.parametrize("sensor_name", os.listdir(CONFIG_FOLDER)) def test_get_field_ndigits_dict(sensor_name): - function_return = standards.get_field_ndigits_dict(sensor_name) + function_return = get_field_ndigits_dict(sensor_name) assert isinstance(function_return, dict) @pytest.mark.parametrize("sensor_name", os.listdir(CONFIG_FOLDER)) def test_get_field_nchar_dict(sensor_name): - function_return = standards.get_field_nchar_dict(sensor_name) + function_return = get_field_nchar_dict(sensor_name) assert isinstance(function_return, dict) @pytest.mark.parametrize("sensor_name", os.listdir(CONFIG_FOLDER)) def test_get_data_range_dict(sensor_name): - function_return = standards.get_data_range_dict(sensor_name) + function_return = get_data_range_dict(sensor_name) assert isinstance(function_return, dict) @pytest.mark.parametrize("sensor_name", os.listdir(CONFIG_FOLDER)) def test_get_nan_flags_dict(sensor_name): - function_return = standards.get_nan_flags_dict(sensor_name) + function_return = get_nan_flags_dict(sensor_name) assert isinstance(function_return, dict) -""" - -def test_read_config_yml(): - function_return = standards.read_config_yml() - assert function_return == - - -def test_get_configs_dir(): - function_return = standards.get_configs_dir() - assert function_return == - -def test_available_sensor_name(): - function_return = standards.available_sensor_name() - assert function_return == - -def test_get_variables_dict(): - function_return = standards.get_variables_dict() - assert function_return == - -def test_get_sensor_variables(): - function_return = standards.get_sensor_variables() - assert function_return == - -def test_get_data_format_dict(): - function_return = standards.get_data_format_dict() - assert function_return == - -def test_get_description_dict(): - function_return = standards.get_description_dict() - assert function_return == - -def test_get_long_name_dict(): - function_return = standards.get_long_name_dict() - assert function_return == - -def test_get_units_dict(): - function_return = standards.get_units_dict() - assert function_return == - -def test_get_diameter_bins_dict(): - function_return = standards.get_diameter_bins_dict() - assert function_return == - -def test_get_velocity_bins_dict(): - function_return = standards.get_velocity_bins_dict() - assert function_return == - -def test_get_l0a_dtype(): - function_return = standards.get_l0a_dtype() - assert function_return == - -def test_get_L0A_encodings_dict(): - function_return = standards.get_L0A_encodings_dict() - assert function_return == - -def test_get_L0B_encodings_dict(): - function_return = standards.get_L0B_encodings_dict() - assert function_return == - def test_get_time_encoding(): - function_return = standards.get_time_encoding() - assert function_return == - -def test_set_DISDRODB_L0_attrs(): - function_return = standards.set_DISDRODB_L0_attrs() - assert function_return == + assert isinstance(get_time_encoding(), dict) -def test_get_diameter_bin_center(): - function_return = standards.get_diameter_bin_center() - assert function_return == -def test_get_diameter_bin_lower(): - function_return = standards.get_diameter_bin_lower() - assert function_return == - -def test_get_diameter_bin_upper(): - function_return = standards.get_diameter_bin_upper() - assert function_return == +@pytest.mark.parametrize("sensor_name", os.listdir(CONFIG_FOLDER)) +def test_get_variables_dimension(sensor_name): + assert isinstance(get_variables_dimension(sensor_name), dict) -def test_get_diameter_bin_width(): - function_return = standards.get_diameter_bin_width() - assert function_return == -def test_get_velocity_bin_center(): - function_return = standards.get_velocity_bin_center() - assert function_return == +@pytest.mark.parametrize("sensor_name", os.listdir(CONFIG_FOLDER)) +def test_get_valid_variable_names(sensor_name): + assert isinstance(get_valid_variable_names(sensor_name), list) -def test_get_velocity_bin_lower(): - function_return = standards.get_velocity_bin_lower() - assert function_return == -def test_get_velocity_bin_upper(): - function_return = standards.get_velocity_bin_upper() - assert function_return == +@pytest.mark.parametrize("sensor_name", os.listdir(CONFIG_FOLDER)) +def test_get_valid_dimension_names(sensor_name): + assert isinstance(get_valid_dimension_names(sensor_name), list) -def test_get_velocity_bin_width(): - function_return = standards.get_velocity_bin_width() - assert function_return == -def test_get_raw_array_nvalues(): - function_return = standards.get_raw_array_nvalues() - assert function_return == +@pytest.mark.parametrize("sensor_name", os.listdir(CONFIG_FOLDER)) +def test_get_valid_coordinates_names(sensor_name): + assert isinstance(get_valid_coordinates_names(sensor_name), list) -def test_get_raw_array_dims_order(): - function_return = standards.get_raw_array_dims_order() - assert function_return == -""" +@pytest.mark.parametrize("sensor_name", os.listdir(CONFIG_FOLDER)) +def test_get_valid_names(sensor_name): + assert isinstance(get_valid_names(sensor_name), list) From 1fb9a07ee4546b0afb8a7e6649feef1f9e2ed5a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Longchamp?= <97044425+regislon@users.noreply.github.com> Date: Fri, 5 May 2023 16:50:46 +0200 Subject: [PATCH 2/8] feat() : Add click test command update --- disdrodb/l0/l0_processing.py | 5 +- disdrodb/tests/test_click.py | 109 +++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 disdrodb/tests/test_click.py diff --git a/disdrodb/l0/l0_processing.py b/disdrodb/l0/l0_processing.py index b1a4d422..8f0db263 100644 --- a/disdrodb/l0/l0_processing.py +++ b/disdrodb/l0/l0_processing.py @@ -192,7 +192,10 @@ def _generate_l0b( filename=filename, parallel=parallel, ) - logger_fpath = logger.handlers[0].baseFilename + if not os.environ.get("PYTEST_CURRENT_TEST"): + logger_fpath = logger.handlers[0].baseFilename + else: + logger_fpath = None ##------------------------------------------------------------------------. # Log start processing diff --git a/disdrodb/tests/test_click.py b/disdrodb/tests/test_click.py new file mode 100644 index 00000000..bb67ba7e --- /dev/null +++ b/disdrodb/tests/test_click.py @@ -0,0 +1,109 @@ +from click.testing import CliRunner +import os +import pytest +import shutil +import glob + + +# current file path +PACKAGE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +RAW_DIR = os.path.join(PACKAGE_DIR, "tests", "pytest_files", "check_readers", "DISDRODB") +DATA_SOURCE = "EPFL" +CAMPAIGN_NAME = "PARSIVEL_2007" +STATION_NAME = "10" + + +@pytest.fixture +def remove_processed_folder(request: list) -> None: + path_processed_folder = os.path.join(RAW_DIR, "Processed") + if os.path.exists(path_processed_folder): + shutil.rmtree(path_processed_folder) + yield + if os.path.exists(path_processed_folder): + shutil.rmtree(path_processed_folder) + + +@pytest.mark.parametrize("remove_processed_folder", [()], indirect=True) +def test_run_disdrodb_l0a_station(remove_processed_folder): + """Test the run_disdrodb_l0a_station command.""" + + from disdrodb.l0.scripts.run_disdrodb_l0a_station import run_disdrodb_l0a_station + + runner = CliRunner() + runner.invoke(run_disdrodb_l0a_station, [RAW_DIR, DATA_SOURCE, CAMPAIGN_NAME, STATION_NAME]) + list_of_l0a = glob.glob( + os.path.join(RAW_DIR, "Processed", DATA_SOURCE, CAMPAIGN_NAME, "L0A", STATION_NAME, "*.parquet") + ) + assert len(list_of_l0a) > 0 + + +@pytest.mark.parametrize("remove_processed_folder", [()], indirect=True) +def test_run_disdrodb_l0a(remove_processed_folder): + """Test the run_disdrodb_l0a command.""" + + from disdrodb.l0.scripts.run_disdrodb_l0a import run_disdrodb_l0a + + runner = CliRunner() + runner.invoke(run_disdrodb_l0a, [RAW_DIR]) + list_of_l0a = glob.glob( + os.path.join(RAW_DIR, "Processed", DATA_SOURCE, CAMPAIGN_NAME, "L0A", STATION_NAME, "*.parquet") + ) + assert len(list_of_l0a) > 0 + + +@pytest.mark.parametrize("remove_processed_folder", [()], indirect=True) +def test_run_disdrodb_l0b_station(remove_processed_folder): + """Test the run_disdrodb_l0b_station command.""" + from disdrodb.l0.scripts.run_disdrodb_l0a_station import run_disdrodb_l0a_station + + runner = CliRunner() + runner.invoke(run_disdrodb_l0a_station, [RAW_DIR, DATA_SOURCE, CAMPAIGN_NAME, STATION_NAME]) + + from disdrodb.l0.scripts.run_disdrodb_l0b_station import run_disdrodb_l0b_station + + runner.invoke(run_disdrodb_l0b_station, [RAW_DIR, DATA_SOURCE, CAMPAIGN_NAME, STATION_NAME]) + + list_of_l0b = glob.glob(os.path.join(RAW_DIR, "Processed", DATA_SOURCE, CAMPAIGN_NAME, "L0B", STATION_NAME, "*.nc")) + assert len(list_of_l0b) > 0 + + +@pytest.mark.parametrize("remove_processed_folder", [()], indirect=True) +def test_run_disdrodb_l0_station(remove_processed_folder): + """Test the run_disdrodb_l0_station command.""" + + from disdrodb.l0.scripts.run_disdrodb_l0_station import run_disdrodb_l0_station + + runner = CliRunner() + runner.invoke(run_disdrodb_l0_station, [RAW_DIR, DATA_SOURCE, CAMPAIGN_NAME, STATION_NAME]) + + list_of_l0b = glob.glob(os.path.join(RAW_DIR, "Processed", DATA_SOURCE, CAMPAIGN_NAME, "L0B", STATION_NAME, "*.nc")) + assert len(list_of_l0b) > 0 + + +@pytest.mark.parametrize("remove_processed_folder", [()], indirect=True) +def test_run_disdrodb_l0b(remove_processed_folder): + """Test the run_disdrodb_l0b command.""" + + from disdrodb.l0.scripts.run_disdrodb_l0a import run_disdrodb_l0a + + runner = CliRunner() + runner.invoke(run_disdrodb_l0a, [RAW_DIR]) + + from disdrodb.l0.scripts.run_disdrodb_l0b import run_disdrodb_l0b + + runner.invoke(run_disdrodb_l0b, [RAW_DIR]) + list_of_l0b = glob.glob(os.path.join(RAW_DIR, "Processed", DATA_SOURCE, CAMPAIGN_NAME, "L0B", STATION_NAME, "*.nc")) + assert len(list_of_l0b) > 0 + + +@pytest.mark.parametrize("remove_processed_folder", [()], indirect=True) +def test_full_run_disdrodb_l0(remove_processed_folder): + """Test the run_disdrodb_l0b command.""" + + from disdrodb.l0.scripts.run_disdrodb_l0 import run_disdrodb_l0 + + runner = CliRunner() + runner.invoke(run_disdrodb_l0, [RAW_DIR]) + + list_of_l0b = glob.glob(os.path.join(RAW_DIR, "Processed", DATA_SOURCE, CAMPAIGN_NAME, "L0B", STATION_NAME, "*.nc")) + assert len(list_of_l0b) > 0 From 4a0d77e19646bc5e6a375364895380b44cc18d2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Longchamp?= <97044425+regislon@users.noreply.github.com> Date: Mon, 8 May 2023 09:12:06 +0200 Subject: [PATCH 3/8] fix() : issue on tests definition --- disdrodb/tests/test_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/disdrodb/tests/test_io.py b/disdrodb/tests/test_io.py index 480b1ab3..a42387a1 100644 --- a/disdrodb/tests/test_io.py +++ b/disdrodb/tests/test_io.py @@ -74,7 +74,7 @@ def test_create_initial_directory_structure(tmp_path, mocker): raw_dir=raw_dir, processed_dir=processed_dir, station_name=station_name, force=force ) - l0a_folder_path = os.path.join(processed_dir, "L0B") + l0a_folder_path = os.path.join(processed_dir, "L0A") assert os.path.exists(l0a_folder_path) From e6623feb85d3671011cfc44df6501a6ff6f15a31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Longchamp?= <97044425+regislon@users.noreply.github.com> Date: Mon, 8 May 2023 09:12:59 +0200 Subject: [PATCH 4/8] fix() : make test windows compatible --- disdrodb/tests/test_upload_data.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/disdrodb/tests/test_upload_data.py b/disdrodb/tests/test_upload_data.py index 7c644bf9..76d7b3e0 100644 --- a/disdrodb/tests/test_upload_data.py +++ b/disdrodb/tests/test_upload_data.py @@ -82,7 +82,11 @@ def test_upload_to_zenodo(tmp_path, requests_mock): metadata_dict2 = get_metadata_dict(disdrodb_dir, data_source, campaign_name, station_name2) new_station_url2 = metadata_dict2["data_url"] - assert new_station_url2.endswith(f"/files/{data_source}/{campaign_name}/{station_name2}.zip") + list_new_station_url2 = new_station_url2.split(os.path.sep) + + list_new_station_url2 = re.split(r"[\\/]", new_station_url2) + + assert list_new_station_url2[-4:] == ["files", data_source, campaign_name, f"{station_name2}.zip"] # Test upload of already uploaded data upload_disdrodb_archives(platform="sandbox.zenodo", disdrodb_dir=str(disdrodb_dir)) From 5948499063de519ba3a05e04bfa8c02c08cf397f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Longchamp?= <97044425+regislon@users.noreply.github.com> Date: Mon, 8 May 2023 09:42:58 +0200 Subject: [PATCH 5/8] fix() : add dask[distributed] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1b8691bc..e8acaa0e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ dependencies = [ "PyYAML", "setuptools", "xarray", - "dask", + "dask[distributed]", "tqdm", "pooch", "requests" From e782413b6ed58ab0d1abde0cdfd6ebc63eee0ab4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Longchamp?= <97044425+regislon@users.noreply.github.com> Date: Mon, 8 May 2023 09:54:34 +0200 Subject: [PATCH 6/8] refactor() : Addapt tests to PR comments --- disdrodb/tests/test_l0_reader.py | 6 +++--- disdrodb/tests/{test_click.py => test_scripts.py} | 0 2 files changed, 3 insertions(+), 3 deletions(-) rename disdrodb/tests/{test_click.py => test_scripts.py} (100%) diff --git a/disdrodb/tests/test_l0_reader.py b/disdrodb/tests/test_l0_reader.py index bb9cf9f6..9fce7e8a 100644 --- a/disdrodb/tests/test_l0_reader.py +++ b/disdrodb/tests/test_l0_reader.py @@ -78,7 +78,7 @@ def test_get_station_reader(tmp_path): campaign_name=campaign_name, station_name=station_name, ) - assert callable(result) is True + assert callable(result) def test_get_reader_from_metadata(tmp_path): @@ -96,7 +96,7 @@ def test_get_reader_from_metadata(tmp_path): campaign_name=campaign_name, ) result = get_reader_from_metadata_reader_key(reader_data_source_name=reader_data_source_name) - assert callable(result) is True + assert callable(result) def test_get_readers_paths_by_data_source(): @@ -111,7 +111,7 @@ def test_check_available_readers(): def test_get_reader_from_metadata_reader_key(): reader_data_source_name = f"{DATA_SOURCE}/{CAMPAIGN_NAME}" result = get_reader_from_metadata_reader_key(reader_data_source_name=reader_data_source_name) - assert callable(result) is True + assert callable(result) def test__get_readers_data_sources_path(): diff --git a/disdrodb/tests/test_click.py b/disdrodb/tests/test_scripts.py similarity index 100% rename from disdrodb/tests/test_click.py rename to disdrodb/tests/test_scripts.py From 7dc76cd149037247ec6d33c58379cc1f3a22ecb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Longchamp?= <97044425+regislon@users.noreply.github.com> Date: Mon, 8 May 2023 12:55:05 +0200 Subject: [PATCH 7/8] feat() : add new tests --- disdrodb/tests/test_metadata.py | 55 ++++++++++++++++++++++++---- disdrodb/tests/test_utils_scripts.py | 50 +++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 8 deletions(-) create mode 100644 disdrodb/tests/test_utils_scripts.py diff --git a/disdrodb/tests/test_metadata.py b/disdrodb/tests/test_metadata.py index d5151b18..c19f9189 100644 --- a/disdrodb/tests/test_metadata.py +++ b/disdrodb/tests/test_metadata.py @@ -1,13 +1,52 @@ import os import yaml -from disdrodb.l0 import metadata - +from disdrodb.l0.metadata import ( + create_campaign_default_metadata, + read_metadata, + write_default_metadata, + get_default_metadata_dict, +) PATH_TEST_FOLDERS_FILES = os.path.join(os.path.dirname(os.path.realpath(__file__)), "pytest_files") +def create_fake_station_file( + disdrodb_dir, data_source="data_source", campaign_name="campaign_name", station_name="station_name" +): + subfolder_path = disdrodb_dir / "DISDRODB" / "Raw" / data_source / campaign_name / "data" / station_name + if not os.path.exists(subfolder_path): + subfolder_path.mkdir(parents=True) + + subfolder_path = disdrodb_dir / "DISDRODB" / "Raw" / data_source / campaign_name / "metadata" + if not os.path.exists(subfolder_path): + subfolder_path.mkdir(parents=True) + + path_file = os.path.join(subfolder_path, f"{station_name}.txt") + print(path_file) + with open(path_file, "w") as f: + f.write("This is some fake text.") + + +def test_create_campaign_default_metadata(tmp_path): + campaign_name = "test_campaign" + data_source = "test_data_source" + station_name = "test_station" + + create_fake_station_file( + disdrodb_dir=tmp_path, data_source=data_source, campaign_name=campaign_name, station_name=station_name + ) + + create_campaign_default_metadata(os.path.join(tmp_path, "DISDRODB"), campaign_name, data_source) + + expected_file_path = os.path.join( + tmp_path, "DISDRODB", "Raw", data_source, campaign_name, "metadata", f"{station_name}.yml" + ) + + assert os.path.exists(expected_file_path) + + def test_get_default_metadata(): - assert isinstance(metadata.get_default_metadata_dict(), dict) + assert isinstance(get_default_metadata_dict(), dict) def create_fake_metadata_folder(tmp_path, data_source="data_source", campaign_name="campaign_name"): @@ -28,7 +67,7 @@ def test_write_default_metadata(tmp_path): fpath = os.path.join(create_fake_metadata_folder(tmp_path, data_source, campaign_name), f"{station_name}.yml") # create metadata file - metadata.write_default_metadata(str(fpath)) + write_default_metadata(str(fpath)) # check file exist assert os.path.exists(fpath) @@ -38,7 +77,7 @@ def test_write_default_metadata(tmp_path): dictionary = yaml.safe_load(f) # check is the expected dictionary - expected_dict = metadata.get_default_metadata_dict() + expected_dict = get_default_metadata_dict() expected_dict["data_source"] = data_source expected_dict["campaign_name"] = campaign_name expected_dict["station_name"] = station_name @@ -64,13 +103,13 @@ def test_read_metadata(): os.remove(metadata_path) # create data - data = metadata.get_default_metadata_dict() + data = get_default_metadata_dict() # create metadata file - metadata.write_default_metadata(str(metadata_path)) + write_default_metadata(str(metadata_path)) # Read the metadata file - function_return = metadata.read_metadata(raw_dir, station_name) + function_return = read_metadata(raw_dir, station_name) assert function_return == data diff --git a/disdrodb/tests/test_utils_scripts.py b/disdrodb/tests/test_utils_scripts.py new file mode 100644 index 00000000..173cbdaa --- /dev/null +++ b/disdrodb/tests/test_utils_scripts.py @@ -0,0 +1,50 @@ +from disdrodb.utils.scripts import parse_arg_to_list + + +def test_parse_arg_to_list_empty_string(): + """Test parse_arg_to_list() with an empty string.""" + args = "" + expected_output = None + assert parse_arg_to_list(args) == expected_output + + +def test_parse_arg_to_list_single_variable(): + """Test parse_arg_to_list() with a single variable.""" + args = "variable" + expected_output = ["variable"] + assert parse_arg_to_list(args) == expected_output + + +def test_parse_arg_to_list_multiple_variables(): + """Test parse_arg_to_list() with multiple variables.""" + args = "variable1 variable2" + expected_output = ["variable1", "variable2"] + assert parse_arg_to_list(args) == expected_output + + +def test_parse_arg_to_list_extra_spaces(): + """Test parse_arg_to_list() with extra spaces between variables.""" + args = " variable1 variable2 " + expected_output = ["variable1", "variable2"] + assert parse_arg_to_list(args) == expected_output + + +def test_parse_arg_to_list_none(): + """Test parse_arg_to_list() with None input.""" + args = None + expected_output = None + assert parse_arg_to_list(args) == expected_output + + +def test_parse_arg_to_list_other_types(): + """Test parse_arg_to_list() with other types of input.""" + args = 123 + expected_output = 123 + assert parse_arg_to_list(args) == expected_output + + +def test_parse_arg_to_list_empty_list(): + """Test parse_arg_to_list() with an empty list.""" + args = [] + expected_output = [] + assert parse_arg_to_list(args) == expected_output From e0ff0824392c79b630edc3907cfb1d9760cc38b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Longchamp?= <97044425+regislon@users.noreply.github.com> Date: Mon, 8 May 2023 12:56:31 +0200 Subject: [PATCH 8/8] refactor() : CI test ordering issue --- disdrodb/tests/{test_scripts.py => test_cmd_processing.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename disdrodb/tests/{test_scripts.py => test_cmd_processing.py} (100%) diff --git a/disdrodb/tests/test_scripts.py b/disdrodb/tests/test_cmd_processing.py similarity index 100% rename from disdrodb/tests/test_scripts.py rename to disdrodb/tests/test_cmd_processing.py