diff --git a/docs/source/basic_usage/trainingmodel.rst b/docs/source/basic_usage/trainingmodel.rst
index e6bc8c967..53cb8a8df 100644
--- a/docs/source/basic_usage/trainingmodel.rst
+++ b/docs/source/basic_usage/trainingmodel.rst
@@ -28,7 +28,7 @@ options to train a simple network with example data, namely
             parameters = mala.Parameters()
 
             parameters.data.input_rescaling_type = "feature-wise-standard"
-            parameters.data.output_rescaling_type = "normal"
+            parameters.data.output_rescaling_type = "minmax"
 
             parameters.network.layer_activations = ["ReLU"]
 
@@ -43,15 +43,18 @@ sub-objects dealing with the individual aspects of the workflow. In the first
 two lines, which data scaling MALA should employ. Scaling data greatly
 improves the performance of NN based ML models. Options are
 
-* ``None``: No normalization is applied.
+* ``None``: No scaling is applied.
 
-* ``standard``: Standardization (Scale to mean 0, standard deviation 1)
+* ``standard``: Standardization (Scale to mean 0, standard deviation 1) is
+  applied to the entire array.
 
-* ``normal``: Min-Max scaling (Scale to be in range 0...1)
+* ``minmax``: Min-Max scaling (Scale to be in range 0...1) is applied to the entire array.
 
-* ``feature-wise-standard``: Row Standardization (Scale to mean 0, standard deviation 1)
+* ``feature-wise-standard``: Standardization (Scale to mean 0, standard
+  deviation 1) is applied to each feature dimension individually.
 
-* ``feature-wise-normal``: Row Min-Max scaling (Scale to be in range 0...1)
+* ``feature-wise-minmax``: Min-Max scaling (Scale to be in range 0...1) is
+  applied to each feature dimension individually.
 
 Here, we specify that MALA should standardize the input (=descriptors)
 by feature (i.e., each entry of the vector separately on the grid) and
diff --git a/examples/advanced/ex01_checkpoint_training.py b/examples/advanced/ex01_checkpoint_training.py
index 5222a5232..af8ee5687 100644
--- a/examples/advanced/ex01_checkpoint_training.py
+++ b/examples/advanced/ex01_checkpoint_training.py
@@ -21,7 +21,7 @@ def initial_setup():
     parameters = mala.Parameters()
     parameters.data.data_splitting_type = "by_snapshot"
     parameters.data.input_rescaling_type = "feature-wise-standard"
-    parameters.data.output_rescaling_type = "normal"
+    parameters.data.output_rescaling_type = "minmax"
     parameters.network.layer_activations = ["ReLU"]
     parameters.running.max_number_epochs = 9
     parameters.running.mini_batch_size = 8
diff --git a/examples/advanced/ex03_tensor_board.py b/examples/advanced/ex03_tensor_board.py
index 97bc781cf..cf1e884a7 100644
--- a/examples/advanced/ex03_tensor_board.py
+++ b/examples/advanced/ex03_tensor_board.py
@@ -13,7 +13,7 @@
 
 parameters = mala.Parameters()
 parameters.data.input_rescaling_type = "feature-wise-standard"
-parameters.data.output_rescaling_type = "normal"
+parameters.data.output_rescaling_type = "minmax"
 parameters.targets.ldos_gridsize = 11
 parameters.targets.ldos_gridspacing_ev = 2.5
 parameters.targets.ldos_gridoffset_ev = -5
@@ -32,11 +32,19 @@
 
 data_handler = mala.DataHandler(parameters)
 data_handler.add_snapshot(
-    "Be_snapshot0.in.npy", data_path, "Be_snapshot0.out.npy", data_path, "tr",
+    "Be_snapshot0.in.npy",
+    data_path,
+    "Be_snapshot0.out.npy",
+    data_path,
+    "tr",
     calculation_output_file=os.path.join(data_path, "Be_snapshot0.out"),
 )
 data_handler.add_snapshot(
-    "Be_snapshot1.in.npy", data_path, "Be_snapshot1.out.npy", data_path, "va",
+    "Be_snapshot1.in.npy",
+    data_path,
+    "Be_snapshot1.out.npy",
+    data_path,
+    "va",
     calculation_output_file=os.path.join(data_path, "Be_snapshot1.out"),
 )
 data_handler.prepare_data()
diff --git a/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py b/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py
index 99a92fa35..7680c7a91 100644
--- a/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py
+++ b/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py
@@ -17,7 +17,7 @@
 def initial_setup():
     parameters = mala.Parameters()
     parameters.data.input_rescaling_type = "feature-wise-standard"
-    parameters.data.output_rescaling_type = "normal"
+    parameters.data.output_rescaling_type = "minmax"
     parameters.running.max_number_epochs = 10
     parameters.running.mini_batch_size = 40
     parameters.running.learning_rate = 0.00001
diff --git a/examples/advanced/ex06_distributed_hyperparameter_optimization.py b/examples/advanced/ex06_distributed_hyperparameter_optimization.py
index 215dd1ab2..4a6e42f9b 100644
--- a/examples/advanced/ex06_distributed_hyperparameter_optimization.py
+++ b/examples/advanced/ex06_distributed_hyperparameter_optimization.py
@@ -24,7 +24,7 @@
 parameters = mala.Parameters()
 # Specify the data scaling.
 parameters.data.input_rescaling_type = "feature-wise-standard"
-parameters.data.output_rescaling_type = "normal"
+parameters.data.output_rescaling_type = "minmax"
 parameters.running.max_number_epochs = 5
 parameters.running.mini_batch_size = 40
 parameters.running.learning_rate = 0.00001
diff --git a/examples/advanced/ex07_advanced_hyperparameter_optimization.py b/examples/advanced/ex07_advanced_hyperparameter_optimization.py
index 242ffd7dd..0072ed3a0 100644
--- a/examples/advanced/ex07_advanced_hyperparameter_optimization.py
+++ b/examples/advanced/ex07_advanced_hyperparameter_optimization.py
@@ -17,7 +17,7 @@ def optimize_hyperparameters(hyper_optimizer):
 
     parameters = mala.Parameters()
     parameters.data.input_rescaling_type = "feature-wise-standard"
-    parameters.data.output_rescaling_type = "normal"
+    parameters.data.output_rescaling_type = "minmax"
     parameters.running.max_number_epochs = 10
     parameters.running.mini_batch_size = 40
     parameters.running.learning_rate = 0.00001
diff --git a/examples/basic/ex01_train_network.py b/examples/basic/ex01_train_network.py
index 1eca8c6b7..c7a5ca782 100644
--- a/examples/basic/ex01_train_network.py
+++ b/examples/basic/ex01_train_network.py
@@ -20,7 +20,7 @@
 # Specify the data scaling. For regular bispectrum and LDOS data,
 # these have proven successful.
 parameters.data.input_rescaling_type = "feature-wise-standard"
-parameters.data.output_rescaling_type = "normal"
+parameters.data.output_rescaling_type = "minmax"
 # Specify the used activation function.
 parameters.network.layer_activations = ["ReLU"]
 # Specify the training parameters.
diff --git a/examples/basic/ex04_hyperparameter_optimization.py b/examples/basic/ex04_hyperparameter_optimization.py
index cebb4c42e..3160206c3 100644
--- a/examples/basic/ex04_hyperparameter_optimization.py
+++ b/examples/basic/ex04_hyperparameter_optimization.py
@@ -19,7 +19,7 @@
 ####################
 parameters = mala.Parameters()
 parameters.data.input_rescaling_type = "feature-wise-standard"
-parameters.data.output_rescaling_type = "normal"
+parameters.data.output_rescaling_type = "minmax"
 parameters.running.max_number_epochs = 20
 parameters.running.mini_batch_size = 40
 parameters.running.optimizer = "Adam"
diff --git a/mala/common/parameters.py b/mala/common/parameters.py
index eaa30e186..ad58cb8e2 100644
--- a/mala/common/parameters.py
+++ b/mala/common/parameters.py
@@ -568,27 +568,45 @@ class ParametersData(ParametersBase):
         Specifies how input quantities are normalized.
         Options:
 
-            - "None": No normalization is applied.
-            - "standard": Standardization (Scale to mean 0, standard
-              deviation 1)
-            - "normal": Min-Max scaling (Scale to be in range 0...1)
-            - "feature-wise-standard": Row Standardization (Scale to mean 0,
-              standard deviation 1)
-            - "feature-wise-normal": Row Min-Max scaling (Scale to be in range
-              0...1)
+            - "None": No scaling is applied.
+            - "standard": Standardization (Scale to mean 0,
+              standard deviation 1) is applied to the entire array.
+            - "minmax": Min-Max scaling (Scale to be in range 0...1) is applied
+              to the entire array.
+            - "feature-wise-standard": Standardization (Scale to mean 0,
+              standard deviation 1) is applied to each feature dimension
+              individually.
+              I.e., if your training data has dimensions (d,f), then each
+              of the f columns with d entries is scaled indiviually.
+            - "feature-wise-minmax": Min-Max scaling (Scale to be in range
+              0...1) is applied to each feature dimension individually.
+              I.e., if your training data has dimensions (d,f), then each
+              of the f columns with d entries is scaled indiviually.
+            - "normal": (DEPRECATED) Old name for "minmax".
+            - "feature-wise-normal": (DEPRECATED) Old name for
+              "feature-wise-minmax"
 
     output_rescaling_type : string
         Specifies how output quantities are normalized.
         Options:
 
-            - "None": No normalization is applied.
+            - "None": No scaling is applied.
             - "standard": Standardization (Scale to mean 0,
-              standard deviation 1)
-            - "normal": Min-Max scaling (Scale to be in range 0...1)
-            - "feature-wise-standard": Row Standardization (Scale to mean 0,
-              standard deviation 1)
-            - "feature-wise-normal": Row Min-Max scaling (Scale to be in
-              range 0...1)
+              standard deviation 1) is applied to the entire array.
+            - "minmax": Min-Max scaling (Scale to be in range 0...1) is applied
+              to the entire array.
+            - "feature-wise-standard": Standardization (Scale to mean 0,
+              standard deviation 1) is applied to each feature dimension
+              individually.
+              I.e., if your training data has dimensions (d,f), then each
+              of the f columns with d entries is scaled indiviually.
+            - "feature-wise-minmax": Min-Max scaling (Scale to be in range
+              0...1) is applied to each feature dimension individually.
+              I.e., if your training data has dimensions (d,f), then each
+              of the f columns with d entries is scaled indiviually.
+            - "normal": (DEPRECATED) Old name for "minmax".
+            - "feature-wise-normal": (DEPRECATED) Old name for
+              "feature-wise-minmax"
 
     use_lazy_loading : bool
         If True, data is lazily loaded, i.e. only the snapshots that are
diff --git a/mala/datahandling/data_handler.py b/mala/datahandling/data_handler.py
index 7b8fc2a43..9f63734fd 100644
--- a/mala/datahandling/data_handler.py
+++ b/mala/datahandling/data_handler.py
@@ -130,6 +130,8 @@ def clear_data(self):
         self.nr_training_snapshots = 0
         self.nr_test_snapshots = 0
         self.nr_validation_snapshots = 0
+        self.input_data_scaler.reset()
+        self.output_data_scaler.reset()
         super(DataHandler, self).clear_data()
 
     # Preparing data
@@ -303,7 +305,10 @@ def get_snapshot_calculation_output(self, snapshot_number):
     ######################
 
     def raw_numpy_to_converted_scaled_tensor(
-        self, numpy_array, data_type, units, convert3Dto1D=False
+        self,
+        numpy_array,
+        data_type,
+        units,
     ):
         """
         Transform a raw numpy array into a scaled torch tensor.
@@ -320,9 +325,6 @@ def raw_numpy_to_converted_scaled_tensor(
             processed.
         units : string
             Units of the data that is processed.
-        convert3Dto1D : bool
-            If True (default: False), then a (x,y,z,dim) array is transformed
-            into a (x*y*z,dim) array.
 
         Returns
         -------
@@ -341,12 +343,12 @@ def raw_numpy_to_converted_scaled_tensor(
         )
 
         # If desired, the dimensions can be changed.
-        if convert3Dto1D:
+        if len(np.shape(numpy_array)) == 4:
             if data_type == "in":
                 data_dimension = self.input_dimension
             else:
                 data_dimension = self.output_dimension
-            grid_size = np.prod(numpy_array[0:3])
+            grid_size = np.prod(np.shape(numpy_array)[0:3])
             desired_dimensions = [grid_size, data_dimension]
         else:
             desired_dimensions = None
@@ -815,7 +817,6 @@ def __parametrize_scalers(self):
         # scaling. This should save some performance.
 
         if self.parameters.use_lazy_loading:
-            self.input_data_scaler.start_incremental_fitting()
             # We need to perform the data scaling over the entirety of the
             # training data.
             for snapshot in self.parameters.snapshot_directories_list:
@@ -853,9 +854,7 @@ def __parametrize_scalers(self):
                         [snapshot.grid_size, self.input_dimension]
                     )
                     tmp = torch.from_numpy(tmp).float()
-                    self.input_data_scaler.incremental_fit(tmp)
-
-            self.input_data_scaler.finish_incremental_fitting()
+                    self.input_data_scaler.partial_fit(tmp)
 
         else:
             self.__load_data("training", "inputs")
@@ -876,7 +875,6 @@ def __parametrize_scalers(self):
 
         if self.parameters.use_lazy_loading:
             i = 0
-            self.output_data_scaler.start_incremental_fitting()
             # We need to perform the data scaling over the entirety of the
             # training data.
             for snapshot in self.parameters.snapshot_directories_list:
@@ -912,9 +910,8 @@ def __parametrize_scalers(self):
                         [snapshot.grid_size, self.output_dimension]
                     )
                     tmp = torch.from_numpy(tmp).float()
-                    self.output_data_scaler.incremental_fit(tmp)
+                    self.output_data_scaler.partial_fit(tmp)
                 i += 1
-            self.output_data_scaler.finish_incremental_fitting()
 
         else:
             self.__load_data("training", "outputs")
diff --git a/mala/datahandling/data_scaler.py b/mala/datahandling/data_scaler.py
index e3c8a5328..5f4491907 100644
--- a/mala/datahandling/data_scaler.py
+++ b/mala/datahandling/data_scaler.py
@@ -6,13 +6,19 @@
 import torch.distributed as dist
 
 from mala.common.parameters import printout
+from mala.common.parallelizer import parallel_warn
 
 
+# IMPORTANT: If you change the docstrings, make sure to also change them
+# in the ParametersData subclass, because users do usually not interact
+# with this class directly.
 class DataScaler:
     """Scales input and output data.
 
     Sort of emulates the functionality of the scikit-learn library, but by
-    implementing the class by ourselves we have more freedom.
+    implementing the class by ourselves we have more freedom. Specifically
+    assumes data of the form (d,f), where d=x*y*z, i.e., the product of spatial
+    dimensions, and f is the feature dimension.
 
     Parameters
     ----------
@@ -20,14 +26,23 @@ class DataScaler:
         Specifies how scaling should be performed.
         Options:
 
-        - "None": No normalization is applied.
+        - "None": No scaling is applied.
         - "standard": Standardization (Scale to mean 0,
-          standard deviation 1)
-        - "normal": Min-Max scaling (Scale to be in range 0...1)
-        - "feature-wise-standard": Row Standardization (Scale to mean 0,
-          standard deviation 1)
-        - "feature-wise-normal": Row Min-Max scaling (Scale to be in range
-          0...1)
+          standard deviation 1) is applied to the entire array.
+        - "minmax": Min-Max scaling (Scale to be in range 0...1) is applied
+          to the entire array.
+        - "feature-wise-standard": Standardization (Scale to mean 0,
+          standard deviation 1) is applied to each feature dimension
+          individually.
+          I.e., if your training data has dimensions (d,f), then each
+          of the f columns with d entries is scaled indiviually.
+        - "feature-wise-minmax": Min-Max scaling (Scale to be in range
+          0...1) is applied to each feature dimension individually.
+          I.e., if your training data has dimensions (d,f), then each
+          of the f columns with d entries is scaled indiviually.
+        - "normal": (DEPRECATED) Old name for "minmax".
+        - "feature-wise-normal": (DEPRECATED) Old name for
+          "feature-wise-minmax"
 
     use_ddp : bool
         If True, the DataScaler will use ddp to check that data is
@@ -38,7 +53,7 @@ def __init__(self, typestring, use_ddp=False):
         self.use_ddp = use_ddp
         self.typestring = typestring
         self.scale_standard = False
-        self.scale_normal = False
+        self.scale_minmax = False
         self.feature_wise = False
         self.cantransform = False
         self.__parse_typestring()
@@ -57,23 +72,32 @@ def __init__(self, typestring, use_ddp=False):
     def __parse_typestring(self):
         """Parse the typestring to class attributes."""
         self.scale_standard = False
-        self.scale_normal = False
+        self.scale_minmax = False
         self.feature_wise = False
 
         if "standard" in self.typestring:
             self.scale_standard = True
         if "normal" in self.typestring:
-            self.scale_normal = True
+            parallel_warn(
+                "Options 'normal' and 'feature-wise-normal' will be "
+                "deprecated, starting in MALA v1.4.0. Please use 'minmax' and "
+                "'feature-wise-minmax' instead.",
+                min_verbosity=0,
+                category=FutureWarning,
+            )
+            self.scale_minmax = True
+        if "minmax" in self.typestring:
+            self.scale_minmax = True
         if "feature-wise" in self.typestring:
             self.feature_wise = True
-        if self.scale_standard is False and self.scale_normal is False:
+        if self.scale_standard is False and self.scale_minmax is False:
             printout("No data rescaling will be performed.", min_verbosity=1)
             self.cantransform = True
             return
-        if self.scale_standard is True and self.scale_normal is True:
+        if self.scale_standard is True and self.scale_minmax is True:
             raise Exception("Invalid input data rescaling.")
 
-    def start_incremental_fitting(self):
+    def reset(self):
         """
         Start the incremental calculation of scaling parameters.
 
@@ -81,7 +105,7 @@ def start_incremental_fitting(self):
         """
         self.total_data_count = 0
 
-    def incremental_fit(self, unscaled):
+    def partial_fit(self, unscaled):
         """
         Add data to the incremental calculation of scaling parameters.
 
@@ -93,7 +117,16 @@ def incremental_fit(self, unscaled):
             Data that is to be added to the fit.
 
         """
-        if self.scale_standard is False and self.scale_normal is False:
+        if len(unscaled.size()) != 2:
+            raise ValueError(
+                "MALA DataScaler are designed for 2D-arrays, "
+                "while a {0}D-array has been provided.".format(
+                    len(unscaled.size())
+                )
+            )
+
+        if self.scale_standard is False and self.scale_minmax is False:
+            self.cantransform = True
             return
         else:
             with torch.no_grad():
@@ -142,7 +175,7 @@ def incremental_fit(self, unscaled):
                             self.stds = new_std
                         self.total_data_count += current_data_count
 
-                    if self.scale_normal:
+                    if self.scale_minmax:
                         new_maxs = torch.max(unscaled, 0, keepdim=True)
                         if list(self.maxs.size())[0] > 0:
                             for i in range(list(new_maxs.values.size())[1]):
@@ -205,7 +238,7 @@ def incremental_fit(self, unscaled):
                         self.total_std = torch.sqrt(self.total_std)
                         self.total_data_count += current_data_count
 
-                    if self.scale_normal:
+                    if self.scale_minmax:
                         new_max = torch.max(unscaled)
                         if new_max > self.total_max:
                             self.total_max = new_max
@@ -213,13 +246,6 @@ def incremental_fit(self, unscaled):
                         new_min = torch.min(unscaled)
                         if new_min < self.total_min:
                             self.total_min = new_min
-
-    def finish_incremental_fitting(self):
-        """
-        Indicate that all data has been added to the incremental calculation.
-
-        This is necessary for lazy loading.
-        """
         self.cantransform = True
 
     def fit(self, unscaled):
@@ -232,7 +258,15 @@ def fit(self, unscaled):
             Data that on which the scaling will be calculated.
 
         """
-        if self.scale_standard is False and self.scale_normal is False:
+        if len(unscaled.size()) != 2:
+            raise ValueError(
+                "MALA DataScaler are designed for 2D-arrays, "
+                "while a {0}D-array has been provided.".format(
+                    len(unscaled.size())
+                )
+            )
+
+        if self.scale_standard is False and self.scale_minmax is False:
             return
         else:
             with torch.no_grad():
@@ -246,7 +280,7 @@ def fit(self, unscaled):
                         self.means = torch.mean(unscaled, 0, keepdim=True)
                         self.stds = torch.std(unscaled, 0, keepdim=True)
 
-                    if self.scale_normal:
+                    if self.scale_minmax:
                         self.maxs = torch.max(unscaled, 0, keepdim=True).values
                         self.mins = torch.min(unscaled, 0, keepdim=True).values
 
@@ -260,13 +294,13 @@ def fit(self, unscaled):
                         self.total_mean = torch.mean(unscaled)
                         self.total_std = torch.std(unscaled)
 
-                    if self.scale_normal:
+                    if self.scale_minmax:
                         self.total_max = torch.max(unscaled)
                         self.total_min = torch.min(unscaled)
 
         self.cantransform = True
 
-    def transform(self, unscaled):
+    def transform(self, unscaled, copy=False):
         """
         Transform data from unscaled to scaled.
 
@@ -278,13 +312,29 @@ def transform(self, unscaled):
         unscaled : torch.Tensor
             Real world data.
 
+        copy : bool
+            If False, data is modified in-place. If True, a copy of the
+            data is modified. Default is False.
+
         Returns
         -------
         scaled : torch.Tensor
             Scaled data.
         """
+        if len(unscaled.size()) != 2:
+            raise ValueError(
+                "MALA DataScaler are designed for 2D-arrays, "
+                "while a {0}D-array has been provided.".format(
+                    len(unscaled.size())
+                )
+            )
+
+        # Backward compatability.
+        if not hasattr(self, "scale_minmax") and hasattr(self, "scale_normal"):
+            self.scale_minmax = self.scale_normal
+
         # First we need to find out if we even have to do anything.
-        if self.scale_standard is False and self.scale_normal is False:
+        if self.scale_standard is False and self.scale_minmax is False:
             pass
 
         elif self.cantransform is False:
@@ -295,6 +345,8 @@ def transform(self, unscaled):
 
         # Perform the actual scaling, but use no_grad to make sure
         # that the next couple of iterations stay untracked.
+        scaled = unscaled.clone() if copy else unscaled
+
         with torch.no_grad():
             if self.feature_wise:
 
@@ -303,12 +355,12 @@ def transform(self, unscaled):
                 ##########################
 
                 if self.scale_standard:
-                    unscaled -= self.means
-                    unscaled /= self.stds
+                    scaled -= self.means
+                    scaled /= self.stds
 
-                if self.scale_normal:
-                    unscaled -= self.mins
-                    unscaled /= self.maxs - self.mins
+                if self.scale_minmax:
+                    scaled -= self.mins
+                    scaled /= self.maxs - self.mins
 
             else:
 
@@ -317,14 +369,16 @@ def transform(self, unscaled):
                 ##########################
 
                 if self.scale_standard:
-                    unscaled -= self.total_mean
-                    unscaled /= self.total_std
+                    scaled -= self.total_mean
+                    scaled /= self.total_std
 
-                if self.scale_normal:
-                    unscaled -= self.total_min
-                    unscaled /= self.total_max - self.total_min
+                if self.scale_minmax:
+                    scaled -= self.total_min
+                    scaled /= self.total_max - self.total_min
 
-    def inverse_transform(self, scaled, as_numpy=False):
+        return scaled
+
+    def inverse_transform(self, scaled, copy=False, as_numpy=False):
         """
         Transform data from scaled to unscaled.
 
@@ -337,7 +391,11 @@ def inverse_transform(self, scaled, as_numpy=False):
             Scaled data.
 
         as_numpy : bool
-            If True, a numpy array is returned, otherwsie.
+            If True, a numpy array is returned, otherwise a torch tensor.
+
+        copy : bool
+            If False, data is modified in-place. If True, a copy of the
+            data is modified. Default is False.
 
         Returns
         -------
@@ -345,9 +403,25 @@ def inverse_transform(self, scaled, as_numpy=False):
             Real world data.
 
         """
+        if len(scaled.size()) != 2:
+            raise ValueError(
+                "MALA DataScaler are designed for 2D-arrays, "
+                "while a {0}D-array has been provided.".format(
+                    len(scaled.size())
+                )
+            )
+
+        # Backward compatability.
+        if not hasattr(self, "scale_minmax") and hasattr(self, "scale_normal"):
+            self.scale_minmax = self.scale_normal
+
+        # Perform the actual scaling, but use no_grad to make sure
+        # that the next couple of iterations stay untracked.
+        unscaled = scaled.clone() if copy else scaled
+
         # First we need to find out if we even have to do anything.
-        if self.scale_standard is False and self.scale_normal is False:
-            unscaled = scaled
+        if self.scale_standard is False and self.scale_minmax is False:
+            pass
 
         else:
             if self.cantransform is False:
@@ -366,12 +440,12 @@ def inverse_transform(self, scaled, as_numpy=False):
                     ##########################
 
                     if self.scale_standard:
-                        unscaled = (scaled * self.stds) + self.means
+                        unscaled *= self.stds
+                        unscaled += self.means
 
-                    if self.scale_normal:
-                        unscaled = (
-                            scaled * (self.maxs - self.mins)
-                        ) + self.mins
+                    if self.scale_minmax:
+                        unscaled *= self.maxs - self.mins
+                        unscaled += self.mins
 
                 else:
 
@@ -380,13 +454,13 @@ def inverse_transform(self, scaled, as_numpy=False):
                     ##########################
 
                     if self.scale_standard:
-                        unscaled = (scaled * self.total_std) + self.total_mean
+                        unscaled *= self.total_std
+                        unscaled += self.total_mean
+
+                    if self.scale_minmax:
+                        unscaled *= self.total_max - self.total_min
+                        unscaled += self.total_min
 
-                    if self.scale_normal:
-                        unscaled = (
-                            scaled * (self.total_max - self.total_min)
-                        ) + self.total_min
-        #
         if as_numpy:
             return unscaled.detach().numpy().astype(np.float64)
         else:
diff --git a/test/all_lazy_loading_test.py b/test/all_lazy_loading_test.py
index 5130266a7..e51501fae 100644
--- a/test/all_lazy_loading_test.py
+++ b/test/all_lazy_loading_test.py
@@ -30,7 +30,7 @@ def test_scaling(self):
         ####################
         test_parameters = Parameters()
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.descriptors.bispectrum_twojmax = 11
         test_parameters.targets.ldos_gridsize = 10
@@ -53,9 +53,9 @@ def test_scaling(self):
         training_tester = []
         for scalingtype in [
             "standard",
-            "normal",
+            "minmax",
             "feature-wise-standard",
-            "feature-wise-normal",
+            "feature-wise-minmax",
         ]:
             comparison = [scalingtype]
             for ll_type in [True, False]:
@@ -125,7 +125,7 @@ def test_scaling(self):
                         data_handler.output_data_scaler.total_std
                         / data_handler.nr_training_data
                     )
-                elif scalingtype == "normal":
+                elif scalingtype == "minmax":
                     torch.manual_seed(2002)
                     this_result.append(
                         data_handler.input_data_scaler.total_max
@@ -188,7 +188,7 @@ def test_scaling(self):
                             0
                         ].grid_size
                     )
-                elif scalingtype == "feature-wise-normal":
+                elif scalingtype == "feature-wise-minmax":
                     this_result.append(
                         torch.mean(data_handler.input_data_scaler.maxs)
                     )
@@ -255,7 +255,7 @@ def _train_lazy_loading(prefetching):
         test_parameters = Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.network.layer_activations = ["ReLU"]
         test_parameters.manual_seed = 1234
         test_parameters.running.max_number_epochs = 100
diff --git a/test/basic_gpu_test.py b/test/basic_gpu_test.py
index 514a70f21..46a44803f 100644
--- a/test/basic_gpu_test.py
+++ b/test/basic_gpu_test.py
@@ -82,7 +82,7 @@ def __run(use_gpu):
 
         # Specify the data scaling.
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
 
         # Specify the used activation function.
         test_parameters.network.layer_activations = ["ReLU"]
diff --git a/test/checkpoint_hyperopt_test.py b/test/checkpoint_hyperopt_test.py
index a1909f21b..3c64ffa71 100644
--- a/test/checkpoint_hyperopt_test.py
+++ b/test/checkpoint_hyperopt_test.py
@@ -61,7 +61,7 @@ def __original_setup(n_trials):
 
         # Specify the data scaling.
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
 
         # Specify the training parameters.
         test_parameters.running.max_number_epochs = 10
diff --git a/test/checkpoint_training_test.py b/test/checkpoint_training_test.py
index 3bc5e83e3..abb2921f0 100644
--- a/test/checkpoint_training_test.py
+++ b/test/checkpoint_training_test.py
@@ -137,7 +137,7 @@ def __original_setup(
 
         # Specify the data scaling.
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
 
         # Specify the used activation function.
         test_parameters.network.layer_activations = ["ReLU"]
diff --git a/test/complete_interfaces_test.py b/test/complete_interfaces_test.py
index 4ceb691d8..fefe113e0 100644
--- a/test/complete_interfaces_test.py
+++ b/test/complete_interfaces_test.py
@@ -180,7 +180,7 @@ def test_ase_calculator(self):
         test_parameters = mala.Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.network.layer_activations = ["ReLU"]
         test_parameters.running.max_number_epochs = 100
         test_parameters.running.mini_batch_size = 40
diff --git a/test/hyperopt_test.py b/test/hyperopt_test.py
index 77b0b9896..d9f966728 100644
--- a/test/hyperopt_test.py
+++ b/test/hyperopt_test.py
@@ -38,7 +38,7 @@ def test_hyperopt(self):
         test_parameters = mala.Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.running.max_number_epochs = 20
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
@@ -129,7 +129,7 @@ def test_distributed_hyperopt(self):
         test_parameters = mala.Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.running.max_number_epochs = 5
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
@@ -238,7 +238,7 @@ def test_naswot_eigenvalues(self):
         test_parameters.manual_seed = 1234
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.running.max_number_epochs = 10
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
@@ -306,7 +306,7 @@ def __optimize_hyperparameters(hyper_optimizer):
         test_parameters = mala.Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.running.max_number_epochs = 20
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
@@ -387,7 +387,7 @@ def test_hyperopt_optuna_requeue_zombie_trials(self, tmp_path):
         test_parameters = mala.Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.running.max_number_epochs = 2
         test_parameters.running.mini_batch_size = 40
         test_parameters.running.learning_rate = 0.00001
diff --git a/test/scaling_test.py b/test/scaling_test.py
index b7925cd9f..eed0c201f 100644
--- a/test/scaling_test.py
+++ b/test/scaling_test.py
@@ -19,8 +19,8 @@ def test_errors_and_accuracy(self):
             "feature-wise-standard",
             "standard",
             "None",
-            "normal",
-            "feature-wise-normal",
+            "minmax",
+            "feature-wise-minmax",
         ]:
             data = np.load(os.path.join(data_path, "Be_snapshot2.out.npy"))
             data = data.astype(np.float32)
@@ -43,3 +43,37 @@ def test_errors_and_accuracy(self):
             transformed = scaler.inverse_transform(transformed)
             relative_error = torch.sum(np.abs((data2 - transformed) / data2))
             assert relative_error < desired_accuracy
+
+    def test_array_referencing(self):
+        # Asserts that even with the new in-place scaling, data is referenced
+        # and not copied (unless that is explicitly asked)
+
+        for scaling in [
+            "feature-wise-standard",
+            "standard",
+            "None",
+            "minmax",
+            "feature-wise-minmax",
+        ]:
+            data = np.load(os.path.join(data_path, "Be_snapshot2.in.npy"))
+            data = data.astype(np.float32)
+            data = data.reshape(
+                [np.prod(np.shape(data)[0:3]), np.shape(data)[3]]
+            )
+            data = torch.from_numpy(data).float()
+
+            scaler = mala.DataScaler(scaling)
+            scaler.fit(data)
+
+            numpy_array = np.expand_dims(np.random.random(94), axis=0)
+            test_data = torch.from_numpy(numpy_array)
+            scaler.transform(test_data)
+            scaler.inverse_transform(test_data)
+            numpy_array *= 2
+            assert np.isclose(
+                np.sum(
+                    test_data.detach().numpy().astype(np.float64) - numpy_array
+                ),
+                0.0,
+                rtol=1e-16,
+            )
diff --git a/test/shuffling_test.py b/test/shuffling_test.py
index ffe6181bb..2ac098012 100644
--- a/test/shuffling_test.py
+++ b/test/shuffling_test.py
@@ -119,7 +119,7 @@ def test_training(self):
         test_parameters = mala.Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.network.layer_activations = ["ReLU"]
         test_parameters.running.max_number_epochs = 50
         test_parameters.running.mini_batch_size = 40
@@ -163,7 +163,7 @@ def test_training(self):
         test_parameters.data.shuffling_seed = 1234
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.network.layer_activations = ["ReLU"]
         test_parameters.running.max_number_epochs = 50
         test_parameters.running.mini_batch_size = 40
@@ -215,7 +215,7 @@ def test_training_openpmd(self):
         test_parameters = mala.Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.network.layer_activations = ["ReLU"]
         test_parameters.running.max_number_epochs = 50
         test_parameters.running.mini_batch_size = 40
@@ -261,7 +261,7 @@ def test_training_openpmd(self):
         test_parameters.data.shuffling_seed = 1234
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.network.layer_activations = ["ReLU"]
         test_parameters.running.max_number_epochs = 50
         test_parameters.running.mini_batch_size = 40
diff --git a/test/workflow_test.py b/test/workflow_test.py
index bdfde4266..6ec94b842 100644
--- a/test/workflow_test.py
+++ b/test/workflow_test.py
@@ -523,7 +523,7 @@ def __simple_training(
         test_parameters = mala.Parameters()
         test_parameters.data.data_splitting_type = "by_snapshot"
         test_parameters.data.input_rescaling_type = "feature-wise-standard"
-        test_parameters.data.output_rescaling_type = "normal"
+        test_parameters.data.output_rescaling_type = "minmax"
         test_parameters.network.layer_activations = ["ReLU"]
         test_parameters.running.max_number_epochs = 400
         test_parameters.running.mini_batch_size = 40