From 85d38c15f0dac4824c20a6bb79b86ed7e9da34cb Mon Sep 17 00:00:00 2001 From: Carlo Russo Date: Fri, 15 Dec 2023 13:32:05 +0100 Subject: [PATCH] Added option num_of_split to split interpolation conputations in submaps and save memory --- README.md | 8 +++-- src/pyg2p/main/api.py | 1 + src/pyg2p/main/context.py | 1 + src/pyg2p/main/interpolation/__init__.py | 6 ++-- .../interpolation/scipy_interpolation_lib.py | 35 +++++++++++++++++-- 5 files changed, 45 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 7e2a3da..9b7c377 100644 --- a/README.md +++ b/README.md @@ -672,6 +672,7 @@ Attributes p, leafsize and eps for the kd tree algorithm are default in scipy li #### ADW It's the Angular Distance Weighted (ADW) algorithm by Shepard et al. 1968, with scipy.kd_tree using 11 neighbours. If @use_broadcasting is set to true, computations will run in full broadcasting mode but requires more memory +If @num_of_splits is set to any number, computations will be split on subset and then recollected into the final map, to save mamory (do not set it if you have enought memory to run interpolation) ```json { @@ -679,7 +680,8 @@ If @use_broadcasting is set to true, computations will run in full broadcasting "@latMap": "/dataset/maps/europe5km/lat.map", "@lonMap": "/dataset/maps/europe5km/long.map", "@mode": "adw", - "@use_broadcasting": false} + "@use_broadcasting": false, + "@num_of_splits": 10} } ``` @@ -688,6 +690,7 @@ It's the Correlation Distance Decay (CDD) modified implementation of the Angular @cdd_mode can be one of the following values: "Hofstra", "NewEtAl" or "MixHofstraShepard" In case of mode "MixHofstraShepard", @cdd_options allows to customize the parameters of Hofstra and Shepard algorithm ("weights_mode": can be "All" or "OnlyTOP10" to take 10 higher values only in the interpolation of each point). If @use_broadcasting is set to true, computations will run in full broadcasting mode but requires more memory +If @num_of_splits is set to any number, computations will be split on subset and then recollected into the final map, to save mamory (do not set it if you have enought memory to run interpolation) ```json { @@ -703,7 +706,8 @@ If @use_broadcasting is set to true, computations will run in full broadcasting "radius_ratio": 0.3333333333333333, "weights_mode": "All" }, - "@use_broadcasting": false} + "@use_broadcasting": false, + "@num_of_splits": 10} } ``` diff --git a/src/pyg2p/main/api.py b/src/pyg2p/main/api.py index 5102a60..aaf78de 100644 --- a/src/pyg2p/main/api.py +++ b/src/pyg2p/main/api.py @@ -172,6 +172,7 @@ def _define_exec_params(self): self._vars['interpolation.cdd_mode'] = interpolation_conf.get('cdd_mode', '') self._vars['interpolation.cdd_options'] = interpolation_conf.get('cdd_options', None) self._vars['interpolation.use_broadcasting'] = interpolation_conf.get('use_broadcasting', False) + self._vars['interpolation.num_of_splits'] = interpolation_conf.get('num_of_splits', None) self._vars['interpolation.rotated_target'] = interpolation_conf.get('rotated_target', False) if not self._vars['interpolation.dir'] and self.api_conf.get('intertableDir'): self._vars['interpolation.dirs']['user'] = self.api_conf['intertableDir'] diff --git a/src/pyg2p/main/context.py b/src/pyg2p/main/context.py index 7d60c56..f139b82 100644 --- a/src/pyg2p/main/context.py +++ b/src/pyg2p/main/context.py @@ -364,6 +364,7 @@ def _define_exec_params(self): self._vars['interpolation.cdd_mode'] = interpolation_conf.get('@cdd_mode', '') self._vars['interpolation.cdd_options'] = interpolation_conf.get('@cdd_options', None) self._vars['interpolation.use_broadcasting'] = interpolation_conf.get('@use_broadcasting', False) + self._vars['interpolation.num_of_splits'] = interpolation_conf.get('@num_of_splits', None) self._vars['interpolation.rotated_target'] = interpolation_conf.get('@rotated_target', False) if not self._vars['interpolation.dir'] and interpolation_conf.get('@intertableDir'): # get from JSON diff --git a/src/pyg2p/main/interpolation/__init__.py b/src/pyg2p/main/interpolation/__init__.py index cf61a5a..8339be0 100644 --- a/src/pyg2p/main/interpolation/__init__.py +++ b/src/pyg2p/main/interpolation/__init__.py @@ -35,6 +35,7 @@ def __init__(self, exec_ctx, mv_input): self._cdd_mode = exec_ctx.get('interpolation.cdd_mode') self._cdd_options = exec_ctx.get('interpolation.cdd_options') self._use_broadcasting = exec_ctx.get('interpolation.use_broadcasting') + self._num_of_splits = exec_ctx.get('interpolation.num_of_splits') self._source_filename = pyg2p.util.files.filename(exec_ctx.get('input.file')) self._suffix = self.suffixes[self._mode] self._intertable_dirs = exec_ctx.get('interpolation.dirs') @@ -232,8 +233,9 @@ def interpolate_scipy(self, latgrib, longrib, v, grid_id, grid_details=None): self._mv_grib, target_is_rotated=self._rotated_target_grid, parallel=self.parallel, mode=self._mode, cdd_map=self._cdd_map, cdd_mode=self._cdd_mode, cdd_options = self._cdd_options, - use_broadcasting=self._use_broadcasting) - _, weights, indexes = scipy_interpolation.interpolate(lonefas, latefas) + use_broadcasting=self._use_broadcasting, + num_of_splits=self._num_of_splits) + _, weights, indexes = scipy_interpolation.interpolate(lonefas, latefas) result = self._interpolate_scipy_append_mv(v, weights, indexes, nnear) # saving interpolation lookup table diff --git a/src/pyg2p/main/interpolation/scipy_interpolation_lib.py b/src/pyg2p/main/interpolation/scipy_interpolation_lib.py index 27b3ba8..a87b7c4 100644 --- a/src/pyg2p/main/interpolation/scipy_interpolation_lib.py +++ b/src/pyg2p/main/interpolation/scipy_interpolation_lib.py @@ -518,7 +518,8 @@ class ScipyInterpolation(object): def __init__(self, longrib, latgrib, grid_details, source_values, nnear, mv_target, mv_source, target_is_rotated=False, parallel=False, - mode='nearest', cdd_map='', cdd_mode='', cdd_options = None, use_broadcasting = False): + mode='nearest', cdd_map='', cdd_mode='', cdd_options = None, use_broadcasting = False, + num_of_splits = None): stdout.write('Start scipy interpolation: {}\n'.format(now_string())) self.geodetic_info = grid_details self.source_grid_is_rotated = 'rotated' in grid_details.get('gridType') @@ -533,6 +534,7 @@ def __init__(self, longrib, latgrib, grid_details, source_values, nnear, self.cdd_mode = cdd_mode self.cdd_options = cdd_options self.use_broadcasting = use_broadcasting + self.num_of_splits = num_of_splits if DEBUG_ADW_INTERPOLATION: self.use_broadcasting = True @@ -563,7 +565,36 @@ def __init__(self, longrib, latgrib, grid_details, source_values, nnear, # set max of distances as min upper bound and add an empirical correction value self.min_upper_bound = np.max(distances) + np.max(distances) * 4 / self.geodetic_info.get('Nj') - def interpolate(self, target_lons, target_lats): + def interpolate(self, lonefas, latefas): + if (self.num_of_splits is not None): + # Define the size of the subsets, only in lonm + subset_size = lonefas.shape[0]//self.num_of_splits + + # Initialize empty arrays to store the results + weights = np.empty((lonefas.shape[0]*lonefas.shape[1],self.nnear)) + indexes = np.empty((lonefas.shape[0]*lonefas.shape[1],self.nnear),dtype=int) + result = np.empty((lonefas.shape[0]*lonefas.shape[1])) + + # Iterate over the subsets of the arrays + for i in range(0, lonefas.shape[0], subset_size): + subset_lonefas = lonefas[i:i+subset_size, :] + subset_latefas = latefas[i:i+subset_size, :] + + # Call the interpolate function for the subset + subset_result, subset_weights, subset_indexes = self.interpolate_split(subset_lonefas, subset_latefas) + + # Collect the results back into the weights and indexes arrays + weights[i*lonefas.shape[1]:(i+subset_size)*lonefas.shape[1],:] = subset_weights + indexes[i*lonefas.shape[1]:(i+subset_size)*lonefas.shape[1],:] = subset_indexes + result[i*lonefas.shape[1]:(i+subset_size)*lonefas.shape[1]] = subset_result + + else: + result, weights, indexes = self.interpolate_split(lonefas, latefas) + + return result, weights, indexes + + + def interpolate_split(self, target_lons, target_lats): # Target coordinates HAVE to be rotated coords in case GRIB grid is rotated # Example of target rotated coords are COSMO lat/lon/dem PCRASTER maps self.target_latsOR=target_lats