From db5cb1b9fa044898864c94a72fd439a19c8fc9d8 Mon Sep 17 00:00:00 2001 From: Connor Stone Date: Thu, 30 May 2024 09:44:10 -0400 Subject: [PATCH 1/2] Now whitens the input before performing PQM test --- src/pqm/pqm.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/pqm/pqm.py b/src/pqm/pqm.py index 242eefc..d51d7fe 100644 --- a/src/pqm/pqm.py +++ b/src/pqm/pqm.py @@ -12,6 +12,7 @@ def pqm_pvalue( y_samples: np.ndarray, num_refs: int = 100, bootstrap: Optional[int] = None, + whiten=True, ): """ Perform the PQM test of the null hypothesis that `x_samples` and `y_samples` are drawn form the same distribution. @@ -33,7 +34,10 @@ def pqm_pvalue( pvalue. Null hypothesis that both samples are drawn from the same distribution. """ if bootstrap is not None: - return list(pqm_pvalue(x_samples, y_samples, num_refs=num_refs) for _ in range(bootstrap)) + return list( + pqm_pvalue(x_samples.copy(), y_samples.copy(), num_refs=num_refs, whiten=whiten) + for _ in range(bootstrap) + ) if len(y_samples) < num_refs: raise ValueError( "Number of reference samples must be less than the number of true samples." @@ -42,6 +46,11 @@ def pqm_pvalue( print( "Warning: Number of y_samples is small (less than twice the number of reference samples). Result may have high variance." ) + if whiten: + mean = np.mean(y_samples, axis=0) + std = np.std(y_samples, axis=0) + y_samples = (y_samples - mean) / std + x_samples = (x_samples - mean) / std refs = np.random.choice(len(y_samples), num_refs, replace=False) N = np.arange(len(y_samples)) From 2b182ccce6081b309dd7b66862174b4dc29d3afb Mon Sep 17 00:00:00 2001 From: Connor Stone Date: Thu, 30 May 2024 15:20:59 -0400 Subject: [PATCH 2/2] set whiten to false by default --- src/pqm/pqm.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pqm/pqm.py b/src/pqm/pqm.py index d51d7fe..d2e2c17 100644 --- a/src/pqm/pqm.py +++ b/src/pqm/pqm.py @@ -12,7 +12,7 @@ def pqm_pvalue( y_samples: np.ndarray, num_refs: int = 100, bootstrap: Optional[int] = None, - whiten=True, + whiten: bool = False, ): """ Perform the PQM test of the null hypothesis that `x_samples` and `y_samples` are drawn form the same distribution. @@ -27,6 +27,8 @@ def pqm_pvalue( Number of reference samples to use. Note that these will be drawn from y_samples, and then removed from the y_samples array. bootstrap : Optional[int] Number of bootstrap iterations to perform. No bootstrap if None (default). + whiten : bool + If True, whiten the samples by subtracting the mean and dividing by the standard deviation. Returns -------