Skip to content

Commit

Permalink
Reduce minor diffs in grids from diff PCs
Browse files Browse the repository at this point in the history
  • Loading branch information
gnrgomes committed Jul 12, 2024
1 parent df668c3 commit 4c7aa20
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 43 deletions.
38 changes: 20 additions & 18 deletions src/lisfloodutilities/gridding/lib/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ class ObservationsKiwisFilter(KiwisFilter):
filter (as key) and the radius (in decimal degrees) to find the vicinity station from other providers (as value).
"""

CLUSTER_COLLAPSE_RADIUS = np.float64(0.011582073434000193) # decimal degrees (1287 m)
CLUSTER_COLLAPSE_RADIUS = np.float32(0.011582073434000193) # decimal degrees (1287 m)

def __init__(self, filter_columns: dict = {}, filter_args: dict = {}, var_code: str = '', quiet_mode: bool = False):
super().__init__(filter_columns, filter_args, var_code, quiet_mode)
Expand All @@ -207,9 +207,9 @@ def __init__(self, filter_columns: dict = {}, filter_args: dict = {}, var_code:
self.provider_radius[provider_id] = radius

@staticmethod
def kilometers2degrees(km: np.float64) -> np.float64:
def kilometers2degrees(km: np.float32) -> np.float32:
# Convert km to degrees of latitude
delta_lat = km * np.float64(0.00899928005)
delta_lat = km * np.float32(0.00899928005)
return delta_lat

def apply_filter(self, df: pd.DataFrame) -> pd.DataFrame:
Expand All @@ -226,7 +226,7 @@ def apply_filter(self, df: pd.DataFrame) -> pd.DataFrame:
return df

def has_neighbor_within_radius_from_other_providers(self, row: pd.Series, tree: cKDTree = None, provider_id: int = 0,
radius: np.float64 = CLUSTER_COLLAPSE_RADIUS) -> bool:
radius: np.float32 = CLUSTER_COLLAPSE_RADIUS) -> bool:
cur_provider_id = row[self.COL_PROVIDER_ID]
if cur_provider_id == provider_id:
location = (row[self.COL_LON], row[self.COL_LAT])
Expand Down Expand Up @@ -304,21 +304,21 @@ def __init__(self, filter_columns: dict = {}, filter_args: dict = {}, var_code:
self.threshold_max_latitude = 72.0
try:
if 'EXCLUDE_BELLOW_LATITUDE' in self.args:
self.threshold_max_latitude = float(self.args['EXCLUDE_BELLOW_LATITUDE'])
self.threshold_max_latitude = np.float32(self.args['EXCLUDE_BELLOW_LATITUDE'])
except Exception as e:
print_msg(f'WARNING: SolarRadiationLimitsKiwisFilter using default max Latitude {self.threshold_max_latitude}')
self.threshold_min_value = 0.0
try:
if 'EXCLUDE_BELLOW_VALUE' in self.args:
self.threshold_min_value = float(self.args['EXCLUDE_BELLOW_VALUE'])
self.threshold_min_value = np.float32(self.args['EXCLUDE_BELLOW_VALUE'])
except Exception as e:
print_msg(f'WARNING: SolarRadiationLimitsKiwisFilter using default min RG value {self.threshold_min_value}')

def apply_filter(self, df: pd.DataFrame) -> pd.DataFrame:
df = super().apply_filter(df)
# Convert to float so it can be compared to the thresholds
df[self.COL_LAT] = df[self.COL_LAT].astype(float)
df[self.COL_VALUE] = df[self.COL_VALUE].astype(float)
df[self.COL_LAT] = df[self.COL_LAT].astype(np.float32)
df[self.COL_VALUE] = df[self.COL_VALUE].astype(np.float32)
# Filter values
df = df[~((df[self.COL_LAT] <= self.threshold_max_latitude) & (df[self.COL_VALUE] <= self.threshold_min_value))]
self.print_statistics(df)
Expand Down Expand Up @@ -352,11 +352,13 @@ def get_all_6hourly_station_values_df(self, kiwis_data_frames: List[pd.DataFrame
merged_df = pd.concat(kiwis_data_frames)
merged_df = merged_df[[self.COL_LON, self.COL_LAT, self.COL_PROVIDER_ID, self.COL_STATION_NUM, self.COL_STATION_ID, self.COL_VALUE]]
merged_df.reset_index(drop=True, inplace=True)
result_df = merged_df.astype({self.COL_VALUE: 'float'}).groupby([self.COL_LON, self.COL_LAT,
self.COL_PROVIDER_ID,
self.COL_STATION_NUM,
self.COL_STATION_ID])[self.COL_VALUE].agg(['sum','count']).reset_index()
result_df.columns = [self.COL_LON, self.COL_LAT, self.COL_PROVIDER_ID, self.COL_STATION_NUM, self.COL_STATION_ID, 'sum_6h_values', 'count_6h_slots']
result_df = merged_df.astype({self.COL_VALUE: 'np.float32'}).groupby([self.COL_LON, self.COL_LAT,
self.COL_PROVIDER_ID,
self.COL_STATION_NUM,
self.COL_STATION_ID])[self.COL_VALUE].agg(
['sum','count']).reset_index()
result_df.columns = [self.COL_LON, self.COL_LAT, self.COL_PROVIDER_ID, self.COL_STATION_NUM,
self.COL_STATION_ID, 'sum_6h_values', 'count_6h_slots']
result_df.reset_index(drop=True, inplace=True)
return result_df

Expand All @@ -371,8 +373,8 @@ def format_dwd_synop_wmo_num(self, station_num: str) -> str:
return station_num

def get_decumulated_24h_value_for_missing_6h_values(self, row: pd.Series, tree: cKDTree = None, provider_id: int = 0,
radius: float = ObservationsKiwisFilter.CLUSTER_COLLAPSE_RADIUS,
stations_6h_df: pd.DataFrame = None) -> float:
radius: np.float32 = ObservationsKiwisFilter.CLUSTER_COLLAPSE_RADIUS,
stations_6h_df: pd.DataFrame = None) -> np.float32:
"""
DECUMULATED_PR = (PR - Sum(PR6)) / (number of missing values)
If there are more than one 6h station in the radius, select according to the following rules by order:
Expand Down Expand Up @@ -421,11 +423,11 @@ def filter(self, kiwis_files: List[Path], kiwis_timestamps: List[str], kiwis_dat
# Guarantee datatype of value column
for i in range(len(kiwis_data_frames)):
kiwis_data_frames[i] = kiwis_data_frames[i].astype({
# self.COL_LON: 'float',
# self.COL_LAT: 'float',
# self.COL_LON: 'np.float32',
# self.COL_LAT: 'np.float32',
# self.COL_PROVIDER_ID: 'int',
# self.COL_STATION_ID: 'int',
self.COL_VALUE: 'float'})
self.COL_VALUE: 'np.float32'})

self.kiwis_24h_dataframe = kiwis_data_frames[0]
kiwis_6h_dataframes = kiwis_data_frames[1:]
Expand Down
34 changes: 17 additions & 17 deletions src/lisfloodutilities/gridding/lib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,11 @@ def __init__(self, dem_map: Path, quiet_mode: bool = False):
reader = NetCDFReader(self._dem_map)
self.nrows = reader._rows
self.ncols = reader._cols
self.mv = reader.mv.astype(np.float64)
self.values = reader.values.astype(np.float64)
self.mv = reader.mv.astype(np.float32)
self.values = reader.values.astype(np.float32)
self.lats, self.lons = reader.get_lat_lon_values()
self.lats = self.lats.astype(np.float64)
self.lons = self.lons.astype(np.float64)
self.lats = self.lats.astype(np.float32)
self.lons = self.lons.astype(np.float32)
self.lat_values = reader.get_lat_values()
self.lon_values = reader.get_lon_values()
self.cell_size_x = reader._pxlW
Expand Down Expand Up @@ -319,12 +319,12 @@ def get_config_field(self, config_group: str = '', config_property: str = '') ->
return self.__configFile.get(config_group, config_property)

@property
def scale_factor(self) -> np.float64:
return np.float64(self.get_config_field('PROPERTIES', 'VALUE_SCALE'))
def scale_factor(self) -> float:
return float(self.get_config_field('PROPERTIES', 'VALUE_SCALE'))

@property
def add_offset(self) -> np.float64:
return np.float64(self.get_config_field('PROPERTIES', 'VALUE_OFFSET'))
def add_offset(self) -> float:
return float(self.get_config_field('PROPERTIES', 'VALUE_OFFSET'))

@property
def value_min(self) -> int:
Expand All @@ -343,8 +343,8 @@ def value_max_packed(self) -> int:
return int((self.value_max - self.add_offset) / self.scale_factor)

@property
def value_nan_packed(self) -> np.float64:
return np.float64((self.VALUE_NAN - self.add_offset) / self.scale_factor)
def value_nan_packed(self) -> np.float32:
return np.float32((self.VALUE_NAN - self.add_offset) / self.scale_factor)

@property
def var_code(self) -> str:
Expand All @@ -355,8 +355,8 @@ def do_height_correction(self) -> bool:
return self.height_correction_factor != 0.0

@property
def height_correction_factor(self) -> np.float64:
return np.float64(self.get_config_field('PROPERTIES', 'HEIGHT_CORRECTION_FACTOR'))
def height_correction_factor(self) -> np.float32:
return np.float32(self.get_config_field('PROPERTIES', 'HEIGHT_CORRECTION_FACTOR'))

@property
def truncate_negative_values(self) -> bool:
Expand Down Expand Up @@ -402,7 +402,7 @@ def __init__(self, conf: Config, quiet_mode: bool = False, use_broadcasting: boo
super().__init__(quiet_mode)
self.conf = conf
self.use_broadcasting = use_broadcasting
self.unit_conversion = np.float64(self.conf.get_config_field('PROPERTIES', 'UNIT_CONVERSION'))
self.unit_conversion = np.float32(self.conf.get_config_field('PROPERTIES', 'UNIT_CONVERSION'))

def correct_height(self, df: pd.DataFrame) -> pd.DataFrame:
if self.conf.do_height_correction:
Expand Down Expand Up @@ -437,7 +437,7 @@ def prepare_grid(self, result: np.ndarray, grid_shape: np.ndarray) -> np.ndarray
result[np.where(result == self.conf.VALUE_NAN)] = np.nan
result[np.where(result < self.conf.value_min)] = np.nan
result[np.where(result > self.conf.value_max)] = np.nan
result = result.astype(np.float64)
result = result.astype(np.float32)
result = np.round(result, 1)
result[~np.isnan(result)] -= self.conf.add_offset
result[~np.isnan(result)] /= self.conf.scale_factor
Expand Down Expand Up @@ -479,9 +479,9 @@ def generate_grid(self, filename: Path) -> np.ndarray:
x = df[self.conf.COLUMN_LON].values
y = df[self.conf.COLUMN_LAT].values
z = df[self.conf.COLUMN_VALUE].values
xp = np.array(x).astype(np.float64)
yp = np.array(y).astype(np.float64)
values = np.array(z).astype(np.float64)
xp = np.array(x).astype(np.float32)
yp = np.array(y).astype(np.float32)
values = np.array(z).astype(np.float32)
df = None
if self.conf.interpolation_mode == 'cdd':
scipy_interpolation = ScipyInterpolation(xp, yp, self.conf.grid_details, values,
Expand Down
16 changes: 8 additions & 8 deletions src/lisfloodutilities/gridding/lib/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def close(self):
class NetCDFWriter(OutputWriter):
NETCDF_DATASET_FORMAT = 'NETCDF4_CLASSIC'
NETCDF_CONVENTIONS = 'CF-1.6'
NETCDF_VAR_DATA_TYPE = 'f8' # np.float64
NETCDF_VAR_DATA_TYPE = 'f4' # np.float64
NETCDF_COORDINATES_DATA_TYPE = 'i4' # np.int32
NETCDF_VAR_TIME_CALENDAR_TYPE = 'proleptic_gregorian'
COMPRESSION_LEVEL = 4
Expand Down Expand Up @@ -231,16 +231,16 @@ def __setup_netcdf_metadata(self, start_date: datetime = None):

proj = self.nf.createVariable(self.conf.get_config_field('PROJECTION','GRID_MAPPING'), self.NETCDF_COORDINATES_DATA_TYPE)
self.__set_property(proj, 'grid_mapping_name', 'PROJECTION', 'GRID_MAPPING')
self.__set_property(proj, 'false_easting', 'PROJECTION', 'FALSE_EASTING', np.float64)
self.__set_property(proj, 'false_northing', 'PROJECTION', 'FALSE_NORTHING', np.float64)
self.__set_property(proj, 'longitude_of_projection_origin', 'PROJECTION', 'ORIGIN_LONGITUDE', np.float64)
self.__set_property(proj, 'latitude_of_projection_origin', 'PROJECTION', 'ORIGIN_LATITUDE', np.float64)
self.__set_property(proj, 'semi_major_axis', 'PROJECTION', 'SEMI_MAJOR_AXIS', np.float64)
self.__set_property(proj, 'inverse_flattening', 'PROJECTION', 'INVERSE_FLATTENING', np.float64)
self.__set_property(proj, 'false_easting', 'PROJECTION', 'FALSE_EASTING', np.float32)
self.__set_property(proj, 'false_northing', 'PROJECTION', 'FALSE_NORTHING', np.float32)
self.__set_property(proj, 'longitude_of_projection_origin', 'PROJECTION', 'ORIGIN_LONGITUDE', np.float32)
self.__set_property(proj, 'latitude_of_projection_origin', 'PROJECTION', 'ORIGIN_LATITUDE', np.float32)
self.__set_property(proj, 'semi_major_axis', 'PROJECTION', 'SEMI_MAJOR_AXIS', np.float32)
self.__set_property(proj, 'inverse_flattening', 'PROJECTION', 'INVERSE_FLATTENING', np.float32)
self.__set_property(proj, 'proj4_params', 'PROJECTION', 'PARAMS')
self.__set_property(proj, 'EPSG_code', 'PROJECTION', 'EPSG_CODE')
self.__set_property(proj, 'spatial_ref', 'PROJECTION', 'STRING')
# self.__set_property(proj, 'longitude_of_prime_meridian', 'PROJECTION', 'LONGITUDE_PRIME_MERIDIAN', np.float64)
# self.__set_property(proj, 'longitude_of_prime_meridian', 'PROJECTION', 'LONGITUDE_PRIME_MERIDIAN', np.float32)
# self.__set_property(proj, 'GeoTransform', 'PROJECTION', 'GEO_TRANSFORM', self.__get_tuple)

var_data_type_packed = self.conf.get_config_field('PROPERTIES', 'DATA_TYPE_PACKED')
Expand Down

0 comments on commit 4c7aa20

Please sign in to comment.