diff --git a/src/pqm/pqm.py b/src/pqm/pqm.py
index 4cc7097..a5bbdfa 100644
--- a/src/pqm/pqm.py
+++ b/src/pqm/pqm.py
@@ -7,7 +7,7 @@
 __all__ = ("pqm_chi2", "pqm_pvalue")
 
 
-def _pqm_test(x_samples: np.ndarray, y_samples: np.ndarray, num_refs: int):
+def _pqm_test(x_samples: np.ndarray, y_samples: np.ndarray, num_refs: int, whiten: bool):
     """
     Helper function to perform the PQM test and return the results from chi2_contingency.
 
@@ -19,6 +19,8 @@ def _pqm_test(x_samples: np.ndarray, y_samples: np.ndarray, num_refs: int):
         Samples from the second distribution, reference samples.
     num_refs : int
         Number of reference samples to use.
+    whiten : bool
+        If True, whiten the samples by subtracting the mean and dividing by the standard deviation.
 
     Returns
     -------
@@ -33,6 +35,11 @@ def _pqm_test(x_samples: np.ndarray, y_samples: np.ndarray, num_refs: int):
         print(
             "Warning: Number of y_samples is small (less than twice the number of reference samples). Result may have high variance."
         )
+    if whiten:
+        mean = np.mean(y_samples, axis=0)
+        std = np.std(y_samples, axis=0)
+        y_samples = (y_samples - mean) / std
+        x_samples = (x_samples - mean) / std
 
     refs = np.random.choice(len(y_samples), num_refs, replace=False)
     N = np.arange(len(y_samples))
@@ -60,6 +67,7 @@ def pqm_pvalue(
     y_samples: np.ndarray,
     num_refs: int = 100,
     bootstrap: Optional[int] = None,
+    whiten: bool = False,
 ):
     """
     Perform the PQM test of the null hypothesis that `x_samples` and `y_samples` are drawn form the same distribution.
@@ -74,6 +82,8 @@ def pqm_pvalue(
         Number of reference samples to use. Note that these will be drawn from y_samples, and then removed from the y_samples array.
     bootstrap : Optional[int]
         Number of bootstrap iterations to perform. No bootstrap if None (default).
+    whiten : bool
+        If True, whiten the samples by subtracting the mean and dividing by the standard deviation.
 
     Returns
     -------
@@ -81,8 +91,11 @@ def pqm_pvalue(
         pvalue(s). Null hypothesis that both samples are drawn from the same distribution.
     """
     if bootstrap is not None:
-        return [pqm_pvalue(x_samples, y_samples, num_refs=num_refs) for _ in range(bootstrap)]
-    _, pvalue, _, _ = _pqm_test(x_samples, y_samples, num_refs)
+        return [
+            pqm_pvalue(x_samples, y_samples, num_refs=num_refs, whiten=whiten)
+            for _ in range(bootstrap)
+        ]
+    _, pvalue, _, _ = _pqm_test(x_samples, y_samples, num_refs, whiten)
     return pvalue
 
 
@@ -91,6 +104,7 @@ def pqm_chi2(
     y_samples: np.ndarray,
     num_refs: int = 100,
     bootstrap: Optional[int] = None,
+    whiten: bool = False,
 ):
     """
     Perform the PQM test of the null hypothesis that `x_samples` and `y_samples` are drawn form the same distribution.
@@ -105,6 +119,8 @@ def pqm_chi2(
         Number of reference samples to use. Note that these will be drawn from y_samples, and then removed from the y_samples array.
     bootstrap : Optional[int]
         Number of bootstrap iterations to perform. No bootstrap if None (default).
+    whiten : bool
+        If True, whiten the samples by subtracting the mean and dividing by the standard deviation.
 
     Returns
     -------
@@ -112,8 +128,11 @@ def pqm_chi2(
         chi2 statistic(s) and degree(s) of freedom.
     """
     if bootstrap is not None:
-        return [pqm_chi2(x_samples, y_samples, num_refs=num_refs) for _ in range(bootstrap)]
-    chi2_stat, _, dof, _ = _pqm_test(x_samples, y_samples, num_refs)
+        return [
+            pqm_chi2(x_samples, y_samples, num_refs=num_refs, whiten=whiten)
+            for _ in range(bootstrap)
+        ]
+    chi2_stat, _, dof, _ = _pqm_test(x_samples, y_samples, num_refs, whiten)
     if dof != num_refs - 1:
         # Rescale chi2 to new value which has the same cumulative probability
         cp = chi2.cdf(chi2_stat, dof)