From 917c83b28e17af9bff9e064df800f8e136bc0463 Mon Sep 17 00:00:00 2001
From: gjm174 <56946945+gjm174@users.noreply.github.com>
Date: Thu, 1 Aug 2024 08:41:00 +0200
Subject: [PATCH] Performance fix (#410)

* Improved NWP loading

* performance fix and black complaince
---
 pysteps/blending/steps.py               | 20 +++++++-------
 pysteps/blending/utils.py               | 35 +++++++++++--------------
 pysteps/extrapolation/semilagrangian.py |  2 +-
 3 files changed, 26 insertions(+), 31 deletions(-)

diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py
index e2cd9f00a..e5a282382 100644
--- a/pysteps/blending/steps.py
+++ b/pysteps/blending/steps.py
@@ -55,6 +55,8 @@
 from pysteps.postprocessing import probmatching
 from pysteps.timeseries import autoregression, correlation
 
+from copy import deepcopy
+
 try:
     import dask
 
@@ -710,15 +712,11 @@ def forecast(
         # 5. Repeat precip_cascade for n ensemble members
         # First, discard all except the p-1 last cascades because they are not needed
         # for the AR(p) model
-        precip_cascade = [
-            precip_cascade[i][-ar_order:] for i in range(n_cascade_levels)
-        ]
 
-        precip_cascade = [
-            [precip_cascade[j].copy() for j in range(n_cascade_levels)]
-            for i in range(n_ens_members)
-        ]
-        precip_cascade = np.stack(precip_cascade)
+        precip_cascade = np.stack(
+            [[precip_cascade[i][-ar_order:].copy() for i in range(n_cascade_levels)]]
+            * n_ens_members
+        )
 
         # 6. Initialize all the random generators and prepare for the forecast loop
         randgen_prec, vps, generate_vel_noise = _init_random_generators(
@@ -781,8 +779,10 @@ def forecast(
             starttime_mainloop = time.time()
 
         extrap_kwargs["return_displacement"] = True
-        forecast_prev = precip_cascade
-        noise_prev = noise_cascade
+
+        forecast_prev = deepcopy(precip_cascade)
+        noise_prev = deepcopy(noise_cascade)
+
         t_prev = [0.0 for j in range(n_ens_members)]
         t_total = [0.0 for j in range(n_ens_members)]
 
diff --git a/pysteps/blending/utils.py b/pysteps/blending/utils.py
index a7b56494b..486d7af88 100644
--- a/pysteps/blending/utils.py
+++ b/pysteps/blending/utils.py
@@ -475,11 +475,11 @@ def load_NWP(input_nc_path_decomp, input_path_velocities, start_time, n_timestep
     ncf_decomp = netCDF4.Dataset(input_nc_path_decomp, "r", format="NETCDF4")
     velocities = np.load(input_path_velocities)
 
-    # Initialise the decomposition dictionary
-    decomp_dict = dict()
-    decomp_dict["domain"] = ncf_decomp.domain
-    decomp_dict["normalized"] = bool(ncf_decomp.normalized)
-    decomp_dict["compact_output"] = bool(ncf_decomp.compact_output)
+    decomp_dict = {
+        "domain": ncf_decomp.domain,
+        "normalized": bool(ncf_decomp.normalized),
+        "compact_output": bool(ncf_decomp.compact_output),
+    }
 
     # Convert the start time and the timestep to datetime64 and timedelta64 type
     zero_time = np.datetime64("1970-01-01T00:00:00", "ns")
@@ -515,23 +515,18 @@ def load_NWP(input_nc_path_decomp, input_path_velocities, start_time, n_timestep
     # Initialise the list of dictionaries which will serve as the output (cf: the STEPS function)
     R_d = list()
 
-    for i in range(start_i, end_i):
-        decomp_dict_ = decomp_dict.copy()
+    pr_decomposed = ncf_decomp.variables["pr_decomposed"][start_i:end_i, :, :, :]
+    means = ncf_decomp.variables["means"][start_i:end_i, :]
+    stds = ncf_decomp.variables["stds"][start_i:end_i, :]
 
-        # Obtain the decomposed cascades for time step i
-        cascade_levels = ncf_decomp.variables["pr_decomposed"][i, :, :, :]
-        # Obtain the mean values
-        means = ncf_decomp.variables["means"][i, :]
-        # Obtain de standard deviations
-        stds = ncf_decomp.variables["stds"][i, :]
-
-        # Save the values in the dictionary as normal arrays with the filled method
-        decomp_dict_["cascade_levels"] = np.ma.filled(cascade_levels, fill_value=np.nan)
-        decomp_dict_["means"] = np.ma.filled(means, fill_value=np.nan)
-        decomp_dict_["stds"] = np.ma.filled(stds, fill_value=np.nan)
+    for i in range(n_timesteps + 1):
+        decomp_dict["cascade_levels"] = np.ma.filled(
+            pr_decomposed[i], fill_value=np.nan
+        )
+        decomp_dict["means"] = np.ma.filled(means[i], fill_value=np.nan)
+        decomp_dict["stds"] = np.ma.filled(stds[i], fill_value=np.nan)
 
-        # Append the output list
-        R_d.append(decomp_dict_)
+        R_d.append(decomp_dict.copy())
 
     ncf_decomp.close()
     return R_d, uv
diff --git a/pysteps/extrapolation/semilagrangian.py b/pysteps/extrapolation/semilagrangian.py
index 97aa90fc2..a0084292e 100644
--- a/pysteps/extrapolation/semilagrangian.py
+++ b/pysteps/extrapolation/semilagrangian.py
@@ -173,7 +173,7 @@ def extrapolate(
 
     if xy_coords is None:
         x_values, y_values = np.meshgrid(
-            np.arange(velocity.shape[2]), np.arange(velocity.shape[1])
+            np.arange(velocity.shape[2]), np.arange(velocity.shape[1]), copy=False
         )
 
         xy_coords = np.stack([x_values, y_values])