Reduce minor diffs in grids from diff PCs

ec-jrc · Jul 12, 2024 · 4c7aa20 · 4c7aa20
1 parent df668c3
commit 4c7aa20
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 43 deletions.
diff --git a/src/lisfloodutilities/gridding/lib/filters.py b/src/lisfloodutilities/gridding/lib/filters.py
@@ -194,7 +194,7 @@ class ObservationsKiwisFilter(KiwisFilter):
     filter (as key) and the radius (in decimal degrees) to find the vicinity station from other providers (as value).
     """
 
-    CLUSTER_COLLAPSE_RADIUS = np.float64(0.011582073434000193) # decimal degrees (1287 m)
+    CLUSTER_COLLAPSE_RADIUS = np.float32(0.011582073434000193) # decimal degrees (1287 m)
 
     def __init__(self, filter_columns: dict = {}, filter_args: dict = {}, var_code: str = '', quiet_mode: bool = False):
         super().__init__(filter_columns, filter_args, var_code, quiet_mode)
@@ -207,9 +207,9 @@ def __init__(self, filter_columns: dict = {}, filter_args: dict = {}, var_code:
             self.provider_radius[provider_id] = radius
 
     @staticmethod
-    def kilometers2degrees(km: np.float64) -> np.float64:
+    def kilometers2degrees(km: np.float32) -> np.float32:
         # Convert km to degrees of latitude
-        delta_lat = km * np.float64(0.00899928005)
+        delta_lat = km * np.float32(0.00899928005)
         return delta_lat
 
     def apply_filter(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -226,7 +226,7 @@ def apply_filter(self, df: pd.DataFrame) -> pd.DataFrame:
         return df
 
     def has_neighbor_within_radius_from_other_providers(self, row: pd.Series, tree: cKDTree = None, provider_id: int = 0,
-                                                        radius: np.float64 = CLUSTER_COLLAPSE_RADIUS) -> bool:
+                                                        radius: np.float32 = CLUSTER_COLLAPSE_RADIUS) -> bool:
         cur_provider_id = row[self.COL_PROVIDER_ID]
         if cur_provider_id == provider_id:
             location = (row[self.COL_LON], row[self.COL_LAT])
@@ -304,21 +304,21 @@ def __init__(self, filter_columns: dict = {}, filter_args: dict = {}, var_code:
         self.threshold_max_latitude = 72.0
         try:
             if 'EXCLUDE_BELLOW_LATITUDE' in self.args:
-                self.threshold_max_latitude = float(self.args['EXCLUDE_BELLOW_LATITUDE'])
+                self.threshold_max_latitude = np.float32(self.args['EXCLUDE_BELLOW_LATITUDE'])
         except Exception as e:
             print_msg(f'WARNING: SolarRadiationLimitsKiwisFilter using default max Latitude {self.threshold_max_latitude}')
         self.threshold_min_value = 0.0
         try:
             if 'EXCLUDE_BELLOW_VALUE' in self.args:
-                self.threshold_min_value = float(self.args['EXCLUDE_BELLOW_VALUE'])
+                self.threshold_min_value = np.float32(self.args['EXCLUDE_BELLOW_VALUE'])
         except Exception as e:
             print_msg(f'WARNING: SolarRadiationLimitsKiwisFilter using default min RG value {self.threshold_min_value}')
 
     def apply_filter(self, df: pd.DataFrame) -> pd.DataFrame:
         df = super().apply_filter(df)
         # Convert to float so it can be compared to the thresholds
-        df[self.COL_LAT] = df[self.COL_LAT].astype(float)
-        df[self.COL_VALUE] = df[self.COL_VALUE].astype(float)
+        df[self.COL_LAT] = df[self.COL_LAT].astype(np.float32)
+        df[self.COL_VALUE] = df[self.COL_VALUE].astype(np.float32)
         # Filter values
         df = df[~((df[self.COL_LAT] <= self.threshold_max_latitude) & (df[self.COL_VALUE] <= self.threshold_min_value))]
         self.print_statistics(df)
@@ -352,11 +352,13 @@ def get_all_6hourly_station_values_df(self, kiwis_data_frames: List[pd.DataFrame
         merged_df = pd.concat(kiwis_data_frames)
         merged_df = merged_df[[self.COL_LON, self.COL_LAT, self.COL_PROVIDER_ID, self.COL_STATION_NUM, self.COL_STATION_ID, self.COL_VALUE]]
         merged_df.reset_index(drop=True, inplace=True)
-        result_df = merged_df.astype({self.COL_VALUE: 'float'}).groupby([self.COL_LON, self.COL_LAT,
-                                                                         self.COL_PROVIDER_ID,
-                                                                         self.COL_STATION_NUM,
-                                                                         self.COL_STATION_ID])[self.COL_VALUE].agg(['sum','count']).reset_index()
-        result_df.columns = [self.COL_LON, self.COL_LAT, self.COL_PROVIDER_ID, self.COL_STATION_NUM, self.COL_STATION_ID, 'sum_6h_values', 'count_6h_slots']
+        result_df = merged_df.astype({self.COL_VALUE: 'np.float32'}).groupby([self.COL_LON, self.COL_LAT,
+                                                                              self.COL_PROVIDER_ID,
+                                                                              self.COL_STATION_NUM,
+                                                                              self.COL_STATION_ID])[self.COL_VALUE].agg(
+                                                                                  ['sum','count']).reset_index()
+        result_df.columns = [self.COL_LON, self.COL_LAT, self.COL_PROVIDER_ID, self.COL_STATION_NUM,
+                             self.COL_STATION_ID, 'sum_6h_values', 'count_6h_slots']
         result_df.reset_index(drop=True, inplace=True)
         return result_df
 
@@ -371,8 +373,8 @@ def format_dwd_synop_wmo_num(self, station_num: str) -> str:
             return station_num
 
     def get_decumulated_24h_value_for_missing_6h_values(self, row: pd.Series, tree: cKDTree = None, provider_id: int = 0,
-                                                        radius: float = ObservationsKiwisFilter.CLUSTER_COLLAPSE_RADIUS,
-                                                        stations_6h_df: pd.DataFrame = None) -> float:
+                                                        radius: np.float32 = ObservationsKiwisFilter.CLUSTER_COLLAPSE_RADIUS,
+                                                        stations_6h_df: pd.DataFrame = None) -> np.float32:
         """
         DECUMULATED_PR = (PR - Sum(PR6)) / (number of missing values)
         If there are more than one 6h station in the radius, select according to the following rules by order:
@@ -421,11 +423,11 @@ def filter(self, kiwis_files: List[Path], kiwis_timestamps: List[str], kiwis_dat
         # Guarantee datatype of value column
         for i in range(len(kiwis_data_frames)):
             kiwis_data_frames[i] = kiwis_data_frames[i].astype({
-                                                                # self.COL_LON: 'float',
-                                                                # self.COL_LAT: 'float',
+                                                                # self.COL_LON: 'np.float32',
+                                                                # self.COL_LAT: 'np.float32',
                                                                 # self.COL_PROVIDER_ID: 'int',
                                                                 # self.COL_STATION_ID: 'int',
-                                                                self.COL_VALUE: 'float'})
+                                                                self.COL_VALUE: 'np.float32'})
 
         self.kiwis_24h_dataframe = kiwis_data_frames[0]
         kiwis_6h_dataframes = kiwis_data_frames[1:]

diff --git a/src/lisfloodutilities/gridding/lib/utils.py b/src/lisfloodutilities/gridding/lib/utils.py
@@ -82,11 +82,11 @@ def __init__(self, dem_map: Path, quiet_mode: bool = False):
         reader = NetCDFReader(self._dem_map)
         self.nrows = reader._rows
         self.ncols = reader._cols
-        self.mv = reader.mv.astype(np.float64)
-        self.values = reader.values.astype(np.float64)
+        self.mv = reader.mv.astype(np.float32)
+        self.values = reader.values.astype(np.float32)
         self.lats, self.lons = reader.get_lat_lon_values()
-        self.lats = self.lats.astype(np.float64)
-        self.lons = self.lons.astype(np.float64) 
+        self.lats = self.lats.astype(np.float32)
+        self.lons = self.lons.astype(np.float32) 
         self.lat_values = reader.get_lat_values()
         self.lon_values = reader.get_lon_values()
         self.cell_size_x = reader._pxlW
@@ -319,12 +319,12 @@ def get_config_field(self, config_group: str = '', config_property: str = '') ->
         return self.__configFile.get(config_group, config_property)
 
     @property
-    def scale_factor(self) -> np.float64:
-        return np.float64(self.get_config_field('PROPERTIES', 'VALUE_SCALE'))
+    def scale_factor(self) -> float:
+        return float(self.get_config_field('PROPERTIES', 'VALUE_SCALE'))
 
     @property
-    def add_offset(self) -> np.float64:
-        return np.float64(self.get_config_field('PROPERTIES', 'VALUE_OFFSET'))
+    def add_offset(self) -> float:
+        return float(self.get_config_field('PROPERTIES', 'VALUE_OFFSET'))
 
     @property
     def value_min(self) -> int:
@@ -343,8 +343,8 @@ def value_max_packed(self) -> int:
         return int((self.value_max - self.add_offset) / self.scale_factor)
 
     @property
-    def value_nan_packed(self) -> np.float64:
-        return np.float64((self.VALUE_NAN - self.add_offset) / self.scale_factor)
+    def value_nan_packed(self) -> np.float32:
+        return np.float32((self.VALUE_NAN - self.add_offset) / self.scale_factor)
 
     @property
     def var_code(self) -> str:
@@ -355,8 +355,8 @@ def do_height_correction(self) -> bool:
         return self.height_correction_factor != 0.0
 
     @property
-    def height_correction_factor(self) -> np.float64:
-        return np.float64(self.get_config_field('PROPERTIES', 'HEIGHT_CORRECTION_FACTOR'))
+    def height_correction_factor(self) -> np.float32:
+        return np.float32(self.get_config_field('PROPERTIES', 'HEIGHT_CORRECTION_FACTOR'))
 
     @property
     def truncate_negative_values(self) -> bool:
@@ -402,7 +402,7 @@ def __init__(self, conf: Config, quiet_mode: bool = False, use_broadcasting: boo
         super().__init__(quiet_mode)
         self.conf = conf
         self.use_broadcasting = use_broadcasting
-        self.unit_conversion = np.float64(self.conf.get_config_field('PROPERTIES', 'UNIT_CONVERSION'))
+        self.unit_conversion = np.float32(self.conf.get_config_field('PROPERTIES', 'UNIT_CONVERSION'))
 
     def correct_height(self, df: pd.DataFrame) -> pd.DataFrame:
         if self.conf.do_height_correction:
@@ -437,7 +437,7 @@ def prepare_grid(self, result: np.ndarray, grid_shape: np.ndarray) -> np.ndarray
         result[np.where(result == self.conf.VALUE_NAN)] = np.nan
         result[np.where(result < self.conf.value_min)] = np.nan
         result[np.where(result > self.conf.value_max)] = np.nan
-        result = result.astype(np.float64)
+        result = result.astype(np.float32)
         result = np.round(result, 1)
         result[~np.isnan(result)] -= self.conf.add_offset
         result[~np.isnan(result)] /= self.conf.scale_factor
@@ -479,9 +479,9 @@ def generate_grid(self, filename: Path) -> np.ndarray:
         x = df[self.conf.COLUMN_LON].values
         y = df[self.conf.COLUMN_LAT].values
         z = df[self.conf.COLUMN_VALUE].values
-        xp = np.array(x).astype(np.float64)
-        yp = np.array(y).astype(np.float64)
-        values = np.array(z).astype(np.float64)
+        xp = np.array(x).astype(np.float32)
+        yp = np.array(y).astype(np.float32)
+        values = np.array(z).astype(np.float32)
         df = None
         if self.conf.interpolation_mode == 'cdd':
             scipy_interpolation = ScipyInterpolation(xp, yp, self.conf.grid_details, values,

diff --git a/src/lisfloodutilities/gridding/lib/writers.py b/src/lisfloodutilities/gridding/lib/writers.py
@@ -107,7 +107,7 @@ def close(self):
 class NetCDFWriter(OutputWriter):
     NETCDF_DATASET_FORMAT = 'NETCDF4_CLASSIC'
     NETCDF_CONVENTIONS = 'CF-1.6'
-    NETCDF_VAR_DATA_TYPE = 'f8' # np.float64
+    NETCDF_VAR_DATA_TYPE = 'f4' # np.float64
     NETCDF_COORDINATES_DATA_TYPE = 'i4' # np.int32
     NETCDF_VAR_TIME_CALENDAR_TYPE = 'proleptic_gregorian'
     COMPRESSION_LEVEL = 4
@@ -231,16 +231,16 @@ def __setup_netcdf_metadata(self, start_date: datetime = None):
 
         proj = self.nf.createVariable(self.conf.get_config_field('PROJECTION','GRID_MAPPING'), self.NETCDF_COORDINATES_DATA_TYPE)
         self.__set_property(proj, 'grid_mapping_name', 'PROJECTION', 'GRID_MAPPING')
-        self.__set_property(proj, 'false_easting', 'PROJECTION', 'FALSE_EASTING', np.float64)
-        self.__set_property(proj, 'false_northing', 'PROJECTION', 'FALSE_NORTHING', np.float64)
-        self.__set_property(proj, 'longitude_of_projection_origin', 'PROJECTION', 'ORIGIN_LONGITUDE', np.float64)
-        self.__set_property(proj, 'latitude_of_projection_origin', 'PROJECTION', 'ORIGIN_LATITUDE', np.float64)
-        self.__set_property(proj, 'semi_major_axis', 'PROJECTION', 'SEMI_MAJOR_AXIS', np.float64)
-        self.__set_property(proj, 'inverse_flattening', 'PROJECTION', 'INVERSE_FLATTENING', np.float64)
+        self.__set_property(proj, 'false_easting', 'PROJECTION', 'FALSE_EASTING', np.float32)
+        self.__set_property(proj, 'false_northing', 'PROJECTION', 'FALSE_NORTHING', np.float32)
+        self.__set_property(proj, 'longitude_of_projection_origin', 'PROJECTION', 'ORIGIN_LONGITUDE', np.float32)
+        self.__set_property(proj, 'latitude_of_projection_origin', 'PROJECTION', 'ORIGIN_LATITUDE', np.float32)
+        self.__set_property(proj, 'semi_major_axis', 'PROJECTION', 'SEMI_MAJOR_AXIS', np.float32)
+        self.__set_property(proj, 'inverse_flattening', 'PROJECTION', 'INVERSE_FLATTENING', np.float32)
         self.__set_property(proj, 'proj4_params', 'PROJECTION', 'PARAMS')
         self.__set_property(proj, 'EPSG_code', 'PROJECTION', 'EPSG_CODE')
         self.__set_property(proj, 'spatial_ref', 'PROJECTION', 'STRING')
-        # self.__set_property(proj, 'longitude_of_prime_meridian', 'PROJECTION', 'LONGITUDE_PRIME_MERIDIAN', np.float64)
+        # self.__set_property(proj, 'longitude_of_prime_meridian', 'PROJECTION', 'LONGITUDE_PRIME_MERIDIAN', np.float32)
         # self.__set_property(proj, 'GeoTransform', 'PROJECTION', 'GEO_TRANSFORM', self.__get_tuple)
 
         var_data_type_packed = self.conf.get_config_field('PROPERTIES', 'DATA_TYPE_PACKED')