From 4c7aa20862f0123086293c619bd93aa82b8f84c1 Mon Sep 17 00:00:00 2001 From: gnrgomes Date: Fri, 12 Jul 2024 15:01:59 +0200 Subject: [PATCH] Reduce minor diffs in grids from diff PCs --- src/lisfloodutilities/gridding/lib/filters.py | 38 ++++++++++--------- src/lisfloodutilities/gridding/lib/utils.py | 34 ++++++++--------- src/lisfloodutilities/gridding/lib/writers.py | 16 ++++---- 3 files changed, 45 insertions(+), 43 deletions(-) diff --git a/src/lisfloodutilities/gridding/lib/filters.py b/src/lisfloodutilities/gridding/lib/filters.py index 547ae8e..c3bc5e8 100644 --- a/src/lisfloodutilities/gridding/lib/filters.py +++ b/src/lisfloodutilities/gridding/lib/filters.py @@ -194,7 +194,7 @@ class ObservationsKiwisFilter(KiwisFilter): filter (as key) and the radius (in decimal degrees) to find the vicinity station from other providers (as value). """ - CLUSTER_COLLAPSE_RADIUS = np.float64(0.011582073434000193) # decimal degrees (1287 m) + CLUSTER_COLLAPSE_RADIUS = np.float32(0.011582073434000193) # decimal degrees (1287 m) def __init__(self, filter_columns: dict = {}, filter_args: dict = {}, var_code: str = '', quiet_mode: bool = False): super().__init__(filter_columns, filter_args, var_code, quiet_mode) @@ -207,9 +207,9 @@ def __init__(self, filter_columns: dict = {}, filter_args: dict = {}, var_code: self.provider_radius[provider_id] = radius @staticmethod - def kilometers2degrees(km: np.float64) -> np.float64: + def kilometers2degrees(km: np.float32) -> np.float32: # Convert km to degrees of latitude - delta_lat = km * np.float64(0.00899928005) + delta_lat = km * np.float32(0.00899928005) return delta_lat def apply_filter(self, df: pd.DataFrame) -> pd.DataFrame: @@ -226,7 +226,7 @@ def apply_filter(self, df: pd.DataFrame) -> pd.DataFrame: return df def has_neighbor_within_radius_from_other_providers(self, row: pd.Series, tree: cKDTree = None, provider_id: int = 0, - radius: np.float64 = CLUSTER_COLLAPSE_RADIUS) -> bool: + radius: np.float32 = CLUSTER_COLLAPSE_RADIUS) -> bool: cur_provider_id = row[self.COL_PROVIDER_ID] if cur_provider_id == provider_id: location = (row[self.COL_LON], row[self.COL_LAT]) @@ -304,21 +304,21 @@ def __init__(self, filter_columns: dict = {}, filter_args: dict = {}, var_code: self.threshold_max_latitude = 72.0 try: if 'EXCLUDE_BELLOW_LATITUDE' in self.args: - self.threshold_max_latitude = float(self.args['EXCLUDE_BELLOW_LATITUDE']) + self.threshold_max_latitude = np.float32(self.args['EXCLUDE_BELLOW_LATITUDE']) except Exception as e: print_msg(f'WARNING: SolarRadiationLimitsKiwisFilter using default max Latitude {self.threshold_max_latitude}') self.threshold_min_value = 0.0 try: if 'EXCLUDE_BELLOW_VALUE' in self.args: - self.threshold_min_value = float(self.args['EXCLUDE_BELLOW_VALUE']) + self.threshold_min_value = np.float32(self.args['EXCLUDE_BELLOW_VALUE']) except Exception as e: print_msg(f'WARNING: SolarRadiationLimitsKiwisFilter using default min RG value {self.threshold_min_value}') def apply_filter(self, df: pd.DataFrame) -> pd.DataFrame: df = super().apply_filter(df) # Convert to float so it can be compared to the thresholds - df[self.COL_LAT] = df[self.COL_LAT].astype(float) - df[self.COL_VALUE] = df[self.COL_VALUE].astype(float) + df[self.COL_LAT] = df[self.COL_LAT].astype(np.float32) + df[self.COL_VALUE] = df[self.COL_VALUE].astype(np.float32) # Filter values df = df[~((df[self.COL_LAT] <= self.threshold_max_latitude) & (df[self.COL_VALUE] <= self.threshold_min_value))] self.print_statistics(df) @@ -352,11 +352,13 @@ def get_all_6hourly_station_values_df(self, kiwis_data_frames: List[pd.DataFrame merged_df = pd.concat(kiwis_data_frames) merged_df = merged_df[[self.COL_LON, self.COL_LAT, self.COL_PROVIDER_ID, self.COL_STATION_NUM, self.COL_STATION_ID, self.COL_VALUE]] merged_df.reset_index(drop=True, inplace=True) - result_df = merged_df.astype({self.COL_VALUE: 'float'}).groupby([self.COL_LON, self.COL_LAT, - self.COL_PROVIDER_ID, - self.COL_STATION_NUM, - self.COL_STATION_ID])[self.COL_VALUE].agg(['sum','count']).reset_index() - result_df.columns = [self.COL_LON, self.COL_LAT, self.COL_PROVIDER_ID, self.COL_STATION_NUM, self.COL_STATION_ID, 'sum_6h_values', 'count_6h_slots'] + result_df = merged_df.astype({self.COL_VALUE: 'np.float32'}).groupby([self.COL_LON, self.COL_LAT, + self.COL_PROVIDER_ID, + self.COL_STATION_NUM, + self.COL_STATION_ID])[self.COL_VALUE].agg( + ['sum','count']).reset_index() + result_df.columns = [self.COL_LON, self.COL_LAT, self.COL_PROVIDER_ID, self.COL_STATION_NUM, + self.COL_STATION_ID, 'sum_6h_values', 'count_6h_slots'] result_df.reset_index(drop=True, inplace=True) return result_df @@ -371,8 +373,8 @@ def format_dwd_synop_wmo_num(self, station_num: str) -> str: return station_num def get_decumulated_24h_value_for_missing_6h_values(self, row: pd.Series, tree: cKDTree = None, provider_id: int = 0, - radius: float = ObservationsKiwisFilter.CLUSTER_COLLAPSE_RADIUS, - stations_6h_df: pd.DataFrame = None) -> float: + radius: np.float32 = ObservationsKiwisFilter.CLUSTER_COLLAPSE_RADIUS, + stations_6h_df: pd.DataFrame = None) -> np.float32: """ DECUMULATED_PR = (PR - Sum(PR6)) / (number of missing values) If there are more than one 6h station in the radius, select according to the following rules by order: @@ -421,11 +423,11 @@ def filter(self, kiwis_files: List[Path], kiwis_timestamps: List[str], kiwis_dat # Guarantee datatype of value column for i in range(len(kiwis_data_frames)): kiwis_data_frames[i] = kiwis_data_frames[i].astype({ - # self.COL_LON: 'float', - # self.COL_LAT: 'float', + # self.COL_LON: 'np.float32', + # self.COL_LAT: 'np.float32', # self.COL_PROVIDER_ID: 'int', # self.COL_STATION_ID: 'int', - self.COL_VALUE: 'float'}) + self.COL_VALUE: 'np.float32'}) self.kiwis_24h_dataframe = kiwis_data_frames[0] kiwis_6h_dataframes = kiwis_data_frames[1:] diff --git a/src/lisfloodutilities/gridding/lib/utils.py b/src/lisfloodutilities/gridding/lib/utils.py index 0115ebe..3939779 100755 --- a/src/lisfloodutilities/gridding/lib/utils.py +++ b/src/lisfloodutilities/gridding/lib/utils.py @@ -82,11 +82,11 @@ def __init__(self, dem_map: Path, quiet_mode: bool = False): reader = NetCDFReader(self._dem_map) self.nrows = reader._rows self.ncols = reader._cols - self.mv = reader.mv.astype(np.float64) - self.values = reader.values.astype(np.float64) + self.mv = reader.mv.astype(np.float32) + self.values = reader.values.astype(np.float32) self.lats, self.lons = reader.get_lat_lon_values() - self.lats = self.lats.astype(np.float64) - self.lons = self.lons.astype(np.float64) + self.lats = self.lats.astype(np.float32) + self.lons = self.lons.astype(np.float32) self.lat_values = reader.get_lat_values() self.lon_values = reader.get_lon_values() self.cell_size_x = reader._pxlW @@ -319,12 +319,12 @@ def get_config_field(self, config_group: str = '', config_property: str = '') -> return self.__configFile.get(config_group, config_property) @property - def scale_factor(self) -> np.float64: - return np.float64(self.get_config_field('PROPERTIES', 'VALUE_SCALE')) + def scale_factor(self) -> float: + return float(self.get_config_field('PROPERTIES', 'VALUE_SCALE')) @property - def add_offset(self) -> np.float64: - return np.float64(self.get_config_field('PROPERTIES', 'VALUE_OFFSET')) + def add_offset(self) -> float: + return float(self.get_config_field('PROPERTIES', 'VALUE_OFFSET')) @property def value_min(self) -> int: @@ -343,8 +343,8 @@ def value_max_packed(self) -> int: return int((self.value_max - self.add_offset) / self.scale_factor) @property - def value_nan_packed(self) -> np.float64: - return np.float64((self.VALUE_NAN - self.add_offset) / self.scale_factor) + def value_nan_packed(self) -> np.float32: + return np.float32((self.VALUE_NAN - self.add_offset) / self.scale_factor) @property def var_code(self) -> str: @@ -355,8 +355,8 @@ def do_height_correction(self) -> bool: return self.height_correction_factor != 0.0 @property - def height_correction_factor(self) -> np.float64: - return np.float64(self.get_config_field('PROPERTIES', 'HEIGHT_CORRECTION_FACTOR')) + def height_correction_factor(self) -> np.float32: + return np.float32(self.get_config_field('PROPERTIES', 'HEIGHT_CORRECTION_FACTOR')) @property def truncate_negative_values(self) -> bool: @@ -402,7 +402,7 @@ def __init__(self, conf: Config, quiet_mode: bool = False, use_broadcasting: boo super().__init__(quiet_mode) self.conf = conf self.use_broadcasting = use_broadcasting - self.unit_conversion = np.float64(self.conf.get_config_field('PROPERTIES', 'UNIT_CONVERSION')) + self.unit_conversion = np.float32(self.conf.get_config_field('PROPERTIES', 'UNIT_CONVERSION')) def correct_height(self, df: pd.DataFrame) -> pd.DataFrame: if self.conf.do_height_correction: @@ -437,7 +437,7 @@ def prepare_grid(self, result: np.ndarray, grid_shape: np.ndarray) -> np.ndarray result[np.where(result == self.conf.VALUE_NAN)] = np.nan result[np.where(result < self.conf.value_min)] = np.nan result[np.where(result > self.conf.value_max)] = np.nan - result = result.astype(np.float64) + result = result.astype(np.float32) result = np.round(result, 1) result[~np.isnan(result)] -= self.conf.add_offset result[~np.isnan(result)] /= self.conf.scale_factor @@ -479,9 +479,9 @@ def generate_grid(self, filename: Path) -> np.ndarray: x = df[self.conf.COLUMN_LON].values y = df[self.conf.COLUMN_LAT].values z = df[self.conf.COLUMN_VALUE].values - xp = np.array(x).astype(np.float64) - yp = np.array(y).astype(np.float64) - values = np.array(z).astype(np.float64) + xp = np.array(x).astype(np.float32) + yp = np.array(y).astype(np.float32) + values = np.array(z).astype(np.float32) df = None if self.conf.interpolation_mode == 'cdd': scipy_interpolation = ScipyInterpolation(xp, yp, self.conf.grid_details, values, diff --git a/src/lisfloodutilities/gridding/lib/writers.py b/src/lisfloodutilities/gridding/lib/writers.py index 24e6dec..e4b8ef0 100644 --- a/src/lisfloodutilities/gridding/lib/writers.py +++ b/src/lisfloodutilities/gridding/lib/writers.py @@ -107,7 +107,7 @@ def close(self): class NetCDFWriter(OutputWriter): NETCDF_DATASET_FORMAT = 'NETCDF4_CLASSIC' NETCDF_CONVENTIONS = 'CF-1.6' - NETCDF_VAR_DATA_TYPE = 'f8' # np.float64 + NETCDF_VAR_DATA_TYPE = 'f4' # np.float64 NETCDF_COORDINATES_DATA_TYPE = 'i4' # np.int32 NETCDF_VAR_TIME_CALENDAR_TYPE = 'proleptic_gregorian' COMPRESSION_LEVEL = 4 @@ -231,16 +231,16 @@ def __setup_netcdf_metadata(self, start_date: datetime = None): proj = self.nf.createVariable(self.conf.get_config_field('PROJECTION','GRID_MAPPING'), self.NETCDF_COORDINATES_DATA_TYPE) self.__set_property(proj, 'grid_mapping_name', 'PROJECTION', 'GRID_MAPPING') - self.__set_property(proj, 'false_easting', 'PROJECTION', 'FALSE_EASTING', np.float64) - self.__set_property(proj, 'false_northing', 'PROJECTION', 'FALSE_NORTHING', np.float64) - self.__set_property(proj, 'longitude_of_projection_origin', 'PROJECTION', 'ORIGIN_LONGITUDE', np.float64) - self.__set_property(proj, 'latitude_of_projection_origin', 'PROJECTION', 'ORIGIN_LATITUDE', np.float64) - self.__set_property(proj, 'semi_major_axis', 'PROJECTION', 'SEMI_MAJOR_AXIS', np.float64) - self.__set_property(proj, 'inverse_flattening', 'PROJECTION', 'INVERSE_FLATTENING', np.float64) + self.__set_property(proj, 'false_easting', 'PROJECTION', 'FALSE_EASTING', np.float32) + self.__set_property(proj, 'false_northing', 'PROJECTION', 'FALSE_NORTHING', np.float32) + self.__set_property(proj, 'longitude_of_projection_origin', 'PROJECTION', 'ORIGIN_LONGITUDE', np.float32) + self.__set_property(proj, 'latitude_of_projection_origin', 'PROJECTION', 'ORIGIN_LATITUDE', np.float32) + self.__set_property(proj, 'semi_major_axis', 'PROJECTION', 'SEMI_MAJOR_AXIS', np.float32) + self.__set_property(proj, 'inverse_flattening', 'PROJECTION', 'INVERSE_FLATTENING', np.float32) self.__set_property(proj, 'proj4_params', 'PROJECTION', 'PARAMS') self.__set_property(proj, 'EPSG_code', 'PROJECTION', 'EPSG_CODE') self.__set_property(proj, 'spatial_ref', 'PROJECTION', 'STRING') - # self.__set_property(proj, 'longitude_of_prime_meridian', 'PROJECTION', 'LONGITUDE_PRIME_MERIDIAN', np.float64) + # self.__set_property(proj, 'longitude_of_prime_meridian', 'PROJECTION', 'LONGITUDE_PRIME_MERIDIAN', np.float32) # self.__set_property(proj, 'GeoTransform', 'PROJECTION', 'GEO_TRANSFORM', self.__get_tuple) var_data_type_packed = self.conf.get_config_field('PROPERTIES', 'DATA_TYPE_PACKED')