diff --git a/disdrodb/L0/L0A_processing.py b/disdrodb/L0/L0A_processing.py index ad31a3ad..c7b43ec6 100644 --- a/disdrodb/L0/L0A_processing.py +++ b/disdrodb/L0/L0A_processing.py @@ -615,6 +615,10 @@ def read_L0A_raw_file_list( if df_sanitizer_fun is not None: df = df_sanitizer_fun(df, lazy=lazy) + # Remove duplicated timesteps + # - TODO: Log info !!! + df = df.drop_duplicates(subset="time", keep="first") + # ------------------------------------------------------. # Check column names met DISDRODB standards check_L0A_column_names(df, sensor_name=sensor_name) diff --git a/disdrodb/L0/readers/EPFL/HYMEX_2012.py b/disdrodb/L0/readers/EPFL/HYMEX_LTE_SOP2.py similarity index 95% rename from disdrodb/L0/readers/EPFL/HYMEX_2012.py rename to disdrodb/L0/readers/EPFL/HYMEX_LTE_SOP2.py index 58363393..60041f2b 100644 --- a/disdrodb/L0/readers/EPFL/HYMEX_2012.py +++ b/disdrodb/L0/readers/EPFL/HYMEX_LTE_SOP2.py @@ -16,7 +16,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . # -----------------------------------------------------------------------------. -"""Reader for HYMEX campaign.""" +"""Reader for HYMEX SOP2 campaign.""" from disdrodb.L0 import run_L0 from disdrodb.L0.L0_processing import reader_generic_docstring, is_documented_by @@ -101,6 +101,10 @@ def df_sanitizer_fun(df, lazy=False): # - Convert time column to datetime df["time"] = dd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S") + # - Drop rows when "raw_drop_number" is "NA" + # --> This is used to drop all rows where all values are "NA" + df = df.dropna(subset="raw_drop_number", axis=0) + # - Drop columns not agreeing with DISDRODB L0 standards columns_to_drop = [ "datalogger_debug", diff --git a/disdrodb/L0/readers/EPFL/HYMEX_LTE_SOP3.py b/disdrodb/L0/readers/EPFL/HYMEX_LTE_SOP3.py new file mode 100644 index 00000000..5d7ee691 --- /dev/null +++ b/disdrodb/L0/readers/EPFL/HYMEX_LTE_SOP3.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# -----------------------------------------------------------------------------. +# Copyright (c) 2021-2022 DISDRODB developers +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# -----------------------------------------------------------------------------. +"""Reader for HYMEX SOP3 campaign.""" +from disdrodb.L0 import run_L0 +from disdrodb.L0.L0_processing import reader_generic_docstring, is_documented_by + + +@is_documented_by(reader_generic_docstring) +def reader( + raw_dir, + processed_dir, + l0a_processing=True, + l0b_processing=True, + keep_l0a=False, + force=False, + verbose=False, + debugging_mode=False, + lazy=True, + single_netcdf=True, +): + + ##------------------------------------------------------------------------. + #### - Define column names + # - When no data are logged (every 30 seconds), all columns (except time) have "NA" values + column_names = [ + "time", + "id", + "datalogger_temperature", + "datalogger_voltage", + "rainfall_rate_32bit", + "rainfall_accumulated_32bit", + "weather_code_synop_4680", + "weather_code_synop_4677", + "reflectivity_32bit", + "mor_visibility", + "laser_amplitude", + "number_particles", + "sensor_temperature", + "sensor_heating_current", + "sensor_battery_voltage", + "sensor_status", + "rainfall_amount_absolute_32bit", + "datalogger_debug", + "raw_drop_concentration", + "raw_drop_average_velocity", + "raw_drop_number", + "datalogger_error", + ] + + ##------------------------------------------------------------------------. + #### - Define reader options + reader_kwargs = {} + # - Define delimiter + reader_kwargs["delimiter"] = "," + # - Avoid first column to become df index !!! + reader_kwargs["index_col"] = False + # - Define behaviour when encountering bad lines + reader_kwargs["on_bad_lines"] = "skip" + # - Define reader engine + # - C engine is faster + # - Python engine is more feature-complete + reader_kwargs["engine"] = "python" + # - Define on-the-fly decompression of on-disk data + # - Available: gzip, bz2, zip + reader_kwargs["compression"] = "infer" + # - Strings to recognize as NA/NaN and replace with standard NA flags + # - Already included: ‘#N/A’, ‘#N/A N/A’, ‘#NA’, ‘-1.#IND’, ‘-1.#QNAN’, + # ‘-NaN’, ‘-nan’, ‘1.#IND’, ‘1.#QNAN’, ‘’, ‘N/A’, + # ‘NA’, ‘NULL’, ‘NaN’, ‘n/a’, ‘nan’, ‘null’ + reader_kwargs["na_values"] = ["na", "", "error", "-.-", " NA"] + # - Define max size of dask dataframe chunks (if lazy=True) + # - If None: use a single block for each file + # - Otherwise: "MB" by which to cut up larger files + reader_kwargs["blocksize"] = None # "50MB" + + ##------------------------------------------------------------------------. + #### - Define dataframe sanitizer function for L0 processing + def df_sanitizer_fun(df, lazy=False): + # Import dask or pandas + if lazy: + import dask.dataframe as dd + else: + import pandas as dd + + # - Convert time column to datetime + df["time"] = dd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S") + + # - Drop rows when "raw_drop_number" is "NA" + # --> This is used to drop all rows where all values are "NA" + df = df.dropna(subset="raw_drop_number", axis=0) + + # - Drop columns not agreeing with DISDRODB L0 standards + columns_to_drop = [ + "datalogger_debug", + "datalogger_voltage", + "id", + "datalogger_temperature", + "datalogger_error", + ] + df = df.drop(columns=columns_to_drop) + return df + + ##------------------------------------------------------------------------. + #### - Define glob pattern to search data files in /data/ + files_glob_pattern = "*.dat*" + + ####----------------------------------------------------------------------. + #### - Create L0 products + run_L0( + raw_dir=raw_dir, + processed_dir=processed_dir, + l0a_processing=l0a_processing, + l0b_processing=l0b_processing, + keep_l0a=keep_l0a, + force=force, + verbose=verbose, + debugging_mode=debugging_mode, + lazy=lazy, + single_netcdf=single_netcdf, + # Custom arguments of the reader + files_glob_pattern=files_glob_pattern, + column_names=column_names, + reader_kwargs=reader_kwargs, + df_sanitizer_fun=df_sanitizer_fun, + ) diff --git a/disdrodb/L0/readers/EPFL/HYMEX_LTE_SOP4.py b/disdrodb/L0/readers/EPFL/HYMEX_LTE_SOP4.py new file mode 100644 index 00000000..5c931643 --- /dev/null +++ b/disdrodb/L0/readers/EPFL/HYMEX_LTE_SOP4.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# -----------------------------------------------------------------------------. +# Copyright (c) 2021-2022 DISDRODB developers +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# -----------------------------------------------------------------------------. +"""Reader for HYMEX SOP4 campaign.""" +from disdrodb.L0 import run_L0 +from disdrodb.L0.L0_processing import reader_generic_docstring, is_documented_by + + +@is_documented_by(reader_generic_docstring) +def reader( + raw_dir, + processed_dir, + l0a_processing=True, + l0b_processing=True, + keep_l0a=False, + force=False, + verbose=False, + debugging_mode=False, + lazy=True, + single_netcdf=True, +): + + ##------------------------------------------------------------------------. + #### - Define column names + # - When no data are logged (every 30 seconds), all columns (except time) have "NA" values + column_names = [ + "time", + "id", + "datalogger_temperature", + "datalogger_voltage", + "rainfall_rate_32bit", + "rainfall_accumulated_32bit", + "weather_code_synop_4680", + "weather_code_synop_4677", + "reflectivity_32bit", + "mor_visibility", + "laser_amplitude", + "number_particles", + "sensor_temperature", + "sensor_heating_current", + "sensor_battery_voltage", + "sensor_status", + "rainfall_amount_absolute_32bit", + "datalogger_debug", + "raw_drop_concentration", + "raw_drop_average_velocity", + "raw_drop_number", + "datalogger_error", + ] + + ##------------------------------------------------------------------------. + #### - Define reader options + reader_kwargs = {} + # - Define delimiter + reader_kwargs["delimiter"] = "," + # - Avoid first column to become df index !!! + reader_kwargs["index_col"] = False + # - Define behaviour when encountering bad lines + reader_kwargs["on_bad_lines"] = "skip" + # - Define reader engine + # - C engine is faster + # - Python engine is more feature-complete + reader_kwargs["engine"] = "python" + # - Define on-the-fly decompression of on-disk data + # - Available: gzip, bz2, zip + reader_kwargs["compression"] = "infer" + # - Strings to recognize as NA/NaN and replace with standard NA flags + # - Already included: ‘#N/A’, ‘#N/A N/A’, ‘#NA’, ‘-1.#IND’, ‘-1.#QNAN’, + # ‘-NaN’, ‘-nan’, ‘1.#IND’, ‘1.#QNAN’, ‘’, ‘N/A’, + # ‘NA’, ‘NULL’, ‘NaN’, ‘n/a’, ‘nan’, ‘null’ + reader_kwargs["na_values"] = ["na", "", "error", "-.-", " NA"] + # - Define max size of dask dataframe chunks (if lazy=True) + # - If None: use a single block for each file + # - Otherwise: "MB" by which to cut up larger files + reader_kwargs["blocksize"] = None # "50MB" + + ##------------------------------------------------------------------------. + #### - Define dataframe sanitizer function for L0 processing + def df_sanitizer_fun(df, lazy=False): + # Import dask or pandas + if lazy: + import dask.dataframe as dd + else: + import pandas as dd + + # - Convert time column to datetime + df["time"] = dd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S") + + # - Drop rows when "raw_drop_number" is "NA" + # --> This is used to drop all rows where all values are "NA" + df = df.dropna(subset="raw_drop_number", axis=0) + + # - Drop columns not agreeing with DISDRODB L0 standards + columns_to_drop = [ + "datalogger_debug", + "datalogger_voltage", + "id", + "datalogger_temperature", + "datalogger_error", + ] + df = df.drop(columns=columns_to_drop) + return df + + ##------------------------------------------------------------------------. + #### - Define glob pattern to search data files in /data/ + files_glob_pattern = "*.dat*" + + ####----------------------------------------------------------------------. + #### - Create L0 products + run_L0( + raw_dir=raw_dir, + processed_dir=processed_dir, + l0a_processing=l0a_processing, + l0b_processing=l0b_processing, + keep_l0a=keep_l0a, + force=force, + verbose=verbose, + debugging_mode=debugging_mode, + lazy=lazy, + single_netcdf=single_netcdf, + # Custom arguments of the reader + files_glob_pattern=files_glob_pattern, + column_names=column_names, + reader_kwargs=reader_kwargs, + df_sanitizer_fun=df_sanitizer_fun, + ) diff --git a/disdrodb/L0/readers/EPFL/LOCARNO_2019_2020.py b/disdrodb/L0/readers/EPFL/LOCARNO_2019_2020.py new file mode 100644 index 00000000..a1bac169 --- /dev/null +++ b/disdrodb/L0/readers/EPFL/LOCARNO_2019_2020.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +# -----------------------------------------------------------------------------. +# Copyright (c) 2021-2022 DISDRODB developers +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# -----------------------------------------------------------------------------. +from disdrodb.L0 import run_L0 +from disdrodb.L0.L0_processing import reader_generic_docstring, is_documented_by + + +@is_documented_by(reader_generic_docstring) +def reader( + raw_dir, + processed_dir, + l0a_processing=True, + l0b_processing=True, + keep_l0a=False, + force=False, + verbose=False, + debugging_mode=False, + lazy=True, + single_netcdf=True, +): + + ##------------------------------------------------------------------------. + #### - Define column names + column_names = [ + "id", + "latitude", + "longitude", + "time", + "datalogger_temperature", + "TO_BE_SPLITTED", # datalogger_voltage and rainfall_rate_32bit + "rainfall_accumulated_32bit", + "weather_code_synop_4680", + "weather_code_synop_4677", + "reflectivity_32bit", + "mor_visibility", + "laser_amplitude", + "number_particles", + "sensor_temperature", + "sensor_heating_current", + "sensor_battery_voltage", + "sensor_status", + "rainfall_amount_absolute_32bit", + "error_code", + "raw_drop_concentration", + "raw_drop_average_velocity", + "raw_drop_number", + "datalogger_error", + ] + + ##------------------------------------------------------------------------. + #### - Define reader options + reader_kwargs = {} + # - Define delimiter + reader_kwargs["delimiter"] = ";" + # - Avoid first column to become df index + reader_kwargs["index_col"] = False + # - Define behaviour when encountering bad lines + reader_kwargs["on_bad_lines"] = "skip" + # - Define reader engine + # - C engine is faster + # - Python engine is more feature-complete + reader_kwargs["engine"] = "python" + # - Define on-the-fly decompression of on-disk data + # - Available: gzip, bz2, zip + reader_kwargs["compression"] = "infer" + # - Strings to recognize as NA/NaN and replace with standard NA flags + # - Already included: ‘#N/A’, ‘#N/A N/A’, ‘#NA’, ‘-1.#IND’, ‘-1.#QNAN’, + # ‘-NaN’, ‘-nan’, ‘1.#IND’, ‘1.#QNAN’, ‘’, ‘N/A’, + # ‘NA’, ‘NULL’, ‘NaN’, ‘n/a’, ‘nan’, ‘null’ + reader_kwargs["na_values"] = ["na", "", "error"] + # - Define max size of dask dataframe chunks (if lazy=True) + # - If None: use a single block for each file + # - Otherwise: "MB" by which to cut up larger files + reader_kwargs["blocksize"] = None # "50MB" + + ##------------------------------------------------------------------------. + #### - Define dataframe sanitizer function for L0 processing + def df_sanitizer_fun(df, lazy=False): + # - Import dask or pandas + if lazy: + import dask.dataframe as dd + else: + import pandas as dd + + # - Convert time column to datetime with resolution in seconds + df["time"] = dd.to_datetime(df["time"], format="%d-%m-%Y %H:%M:%S") + + # - Split TO_BE_SPLITTED columns + df_splitted = df["TO_BE_SPLITTED"].str.split(",", expand=True, n=1) + df_splitted.columns = ["datalogger_voltage", "rainfall_rate_32bit"] + df["rainfall_rate_32bit"] = df_splitted["rainfall_rate_32bit"] + + # - Drop columns not agreeing with DISDRODB L0 standards + columns_to_drop = [ + "id", + "TO_BE_SPLITTED", + "datalogger_temperature", + "datalogger_error", + "latitude", + "longitude", + ] + df = df.drop(columns=columns_to_drop) + return df + + ##------------------------------------------------------------------------. + #### - Define glob pattern to search data files within /data/ + files_glob_pattern = "*/*/*.dat*" # YYYY/MM/... + + ####----------------------------------------------------------------------. + #### - Create L0 products + run_L0( + raw_dir=raw_dir, + processed_dir=processed_dir, + l0a_processing=l0a_processing, + l0b_processing=l0b_processing, + keep_l0a=keep_l0a, + force=force, + verbose=verbose, + debugging_mode=debugging_mode, + lazy=lazy, + single_netcdf=single_netcdf, + # Custom arguments of the reader + files_glob_pattern=files_glob_pattern, + column_names=column_names, + reader_kwargs=reader_kwargs, + df_sanitizer_fun=df_sanitizer_fun, + ) diff --git a/disdrodb/L0/readers/EPFL/RACLETS_2019_WJF.py b/disdrodb/L0/readers/EPFL/RACLETS_2019_WJF.py index 429054ea..5a5d361f 100644 --- a/disdrodb/L0/readers/EPFL/RACLETS_2019_WJF.py +++ b/disdrodb/L0/readers/EPFL/RACLETS_2019_WJF.py @@ -1,12 +1,5 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -""" -Created on Mon Jan 23 16:43:01 2023 - -@author: ghiggi -""" -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- # -----------------------------------------------------------------------------. # Copyright (c) 2021-2022 DISDRODB developers # diff --git a/disdrodb/L0/readers/EPFL/SAMOYLOV_2019.py b/disdrodb/L0/readers/EPFL/SAMOYLOV_2019.py index f4b79107..1ea2b1ef 100644 --- a/disdrodb/L0/readers/EPFL/SAMOYLOV_2019.py +++ b/disdrodb/L0/readers/EPFL/SAMOYLOV_2019.py @@ -1,12 +1,5 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -""" -Created on Mon Jan 23 18:57:23 2023 - -@author: ghiggi -""" -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- # -----------------------------------------------------------------------------. # Copyright (c) 2021-2022 DISDRODB developers #