Skip to content

Commit

Permalink
Merge pull request #3 from Ciela-Institute/white_input
Browse files Browse the repository at this point in the history
Now whitens the input before performing PQM test
  • Loading branch information
ConnorStoneAstro authored May 31, 2024
2 parents 4a12758 + 3866d7c commit d567bc7
Showing 1 changed file with 24 additions and 5 deletions.
29 changes: 24 additions & 5 deletions src/pqm/pqm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
__all__ = ("pqm_chi2", "pqm_pvalue")


def _pqm_test(x_samples: np.ndarray, y_samples: np.ndarray, num_refs: int):
def _pqm_test(x_samples: np.ndarray, y_samples: np.ndarray, num_refs: int, whiten: bool):
"""
Helper function to perform the PQM test and return the results from chi2_contingency.
Expand All @@ -19,6 +19,8 @@ def _pqm_test(x_samples: np.ndarray, y_samples: np.ndarray, num_refs: int):
Samples from the second distribution, reference samples.
num_refs : int
Number of reference samples to use.
whiten : bool
If True, whiten the samples by subtracting the mean and dividing by the standard deviation.
Returns
-------
Expand All @@ -33,6 +35,11 @@ def _pqm_test(x_samples: np.ndarray, y_samples: np.ndarray, num_refs: int):
print(
"Warning: Number of y_samples is small (less than twice the number of reference samples). Result may have high variance."
)
if whiten:
mean = np.mean(y_samples, axis=0)
std = np.std(y_samples, axis=0)
y_samples = (y_samples - mean) / std
x_samples = (x_samples - mean) / std

refs = np.random.choice(len(y_samples), num_refs, replace=False)
N = np.arange(len(y_samples))
Expand Down Expand Up @@ -60,6 +67,7 @@ def pqm_pvalue(
y_samples: np.ndarray,
num_refs: int = 100,
bootstrap: Optional[int] = None,
whiten: bool = False,
):
"""
Perform the PQM test of the null hypothesis that `x_samples` and `y_samples` are drawn form the same distribution.
Expand All @@ -74,15 +82,20 @@ def pqm_pvalue(
Number of reference samples to use. Note that these will be drawn from y_samples, and then removed from the y_samples array.
bootstrap : Optional[int]
Number of bootstrap iterations to perform. No bootstrap if None (default).
whiten : bool
If True, whiten the samples by subtracting the mean and dividing by the standard deviation.
Returns
-------
float or list
pvalue(s). Null hypothesis that both samples are drawn from the same distribution.
"""
if bootstrap is not None:
return [pqm_pvalue(x_samples, y_samples, num_refs=num_refs) for _ in range(bootstrap)]
_, pvalue, _, _ = _pqm_test(x_samples, y_samples, num_refs)
return [
pqm_pvalue(x_samples, y_samples, num_refs=num_refs, whiten=whiten)
for _ in range(bootstrap)
]
_, pvalue, _, _ = _pqm_test(x_samples, y_samples, num_refs, whiten)
return pvalue


Expand All @@ -91,6 +104,7 @@ def pqm_chi2(
y_samples: np.ndarray,
num_refs: int = 100,
bootstrap: Optional[int] = None,
whiten: bool = False,
):
"""
Perform the PQM test of the null hypothesis that `x_samples` and `y_samples` are drawn form the same distribution.
Expand All @@ -105,15 +119,20 @@ def pqm_chi2(
Number of reference samples to use. Note that these will be drawn from y_samples, and then removed from the y_samples array.
bootstrap : Optional[int]
Number of bootstrap iterations to perform. No bootstrap if None (default).
whiten : bool
If True, whiten the samples by subtracting the mean and dividing by the standard deviation.
Returns
-------
float or list
chi2 statistic(s) and degree(s) of freedom.
"""
if bootstrap is not None:
return [pqm_chi2(x_samples, y_samples, num_refs=num_refs) for _ in range(bootstrap)]
chi2_stat, _, dof, _ = _pqm_test(x_samples, y_samples, num_refs)
return [
pqm_chi2(x_samples, y_samples, num_refs=num_refs, whiten=whiten)
for _ in range(bootstrap)
]
chi2_stat, _, dof, _ = _pqm_test(x_samples, y_samples, num_refs, whiten)
if dof != num_refs - 1:
# Rescale chi2 to new value which has the same cumulative probability
cp = chi2.cdf(chi2_stat, dof)
Expand Down

0 comments on commit d567bc7

Please sign in to comment.