Ciela-Institute · ConnorStoneAstro · Jul 19, 2024 · May 31, 2024 · May 31, 2024 · May 31, 2024
diff --git a/README.md b/README.md
@@ -15,20 +15,19 @@ pip install pqm
 This is the main use case:
 
 ```python
-from pqm import pqm_pvalue
+from pqm import pqm_pvalue, pqm_chi2
 import numpy as np
 
 x_sample = np.random.normal(size = (500, 10))
 y_sample = np.random.normal(size = (400, 10))
 
 # To get pvalues from PQMass
-pvalues = pqm_pvalue(x_sample, y_sample, num_refs = 100, bootstrap = 50)
+pvalues = pqm_pvalue(x_sample, y_sample, num_refs = 100, re_tessellation = 50)
 print(np.mean(pvalues), np.std(pvalues))
 
 # To get chi^2 from PQMass
-chi2_stat, dof = pqm_chi2(x_sample, y_sample, num_refs = 100, bootstrap = 50)
+chi2_stat = pqm_chi2(x_sample, y_sample, num_refs = 100, re_tessellation = 50)
 print(np.mean(chi2_stat), np.std(chi2_stat))
-print(np.unqiue(dof)) # This should be the same as num_refs - 1, if it is not, we suggest you use pqm_pvalue
 ```
 
 If your two samples are drawn from the same distribution, then the p-value should
@@ -44,9 +43,6 @@ it suggests that the samples are out of distribution. Conversely, if the histogr
 to the left, it indicates potential duplication or memorization (particularly relevant 
 for generative models).
 
-Note that the chi^2 metric faces limitations if you have a few samples. A solution could
-be to use bootstrapping. Another such solution is to pqm_pvalue. We leave it to the user to 
-identify the best solution for their problem.
 
 ## Developing
 

diff --git a/notebooks/mnist.ipynb b/notebooks/mnist.ipynb
diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb
diff --git a/notebooks/time_series.ipynb b/notebooks/time_series.ipynb
diff --git a/src/pqm/pqm.py b/src/pqm/pqm.py
@@ -66,7 +66,7 @@ def pqm_pvalue(
     x_samples: np.ndarray,
     y_samples: np.ndarray,
     num_refs: int = 100,
-    bootstrap: Optional[int] = None,
+    re_tessellation: Optional[int] = None,
     whiten: bool = False,
 ):
     """
@@ -80,8 +80,8 @@ def pqm_pvalue(
         Samples from the second distribution, reference samples. Must have shape (M, *D) M is the number of y samples, and D is the dimensionality of the samples.
     num_refs : int
         Number of reference samples to use. Note that these will be drawn from y_samples, and then removed from the y_samples array.
-    bootstrap : Optional[int]
-        Number of bootstrap iterations to perform. No bootstrap if None (default).
+    re_tessellation : Optional[int]
+        Number of times pqm_pvalue is called, re tesselating the space. No re_tessellation if None (default).
     whiten : bool
         If True, whiten the samples by subtracting the mean and dividing by the standard deviation.
 
@@ -90,10 +90,10 @@ def pqm_pvalue(
     float or list
         pvalue(s). Null hypothesis that both samples are drawn from the same distribution.
     """
-    if bootstrap is not None:
+    if re_tessellation is not None:
         return [
             pqm_pvalue(x_samples, y_samples, num_refs=num_refs, whiten=whiten)
-            for _ in range(bootstrap)
+            for _ in range(re_tessellation)
         ]
     _, pvalue, _, _ = _pqm_test(x_samples, y_samples, num_refs, whiten)
     return pvalue
@@ -103,7 +103,7 @@ def pqm_chi2(
     x_samples: np.ndarray,
     y_samples: np.ndarray,
     num_refs: int = 100,
-    bootstrap: Optional[int] = None,
+    re_tessellation: Optional[int] = None,
     whiten: bool = False,
 ):
     """
@@ -117,8 +117,8 @@ def pqm_chi2(
         Samples from the second distribution, reference samples. Must have shape (M, *D) M is the number of y samples, and D is the dimensionality of the samples.
     num_refs : int
         Number of reference samples to use. Note that these will be drawn from y_samples, and then removed from the y_samples array.
-    bootstrap : Optional[int]
-        Number of bootstrap iterations to perform. No bootstrap if None (default).
+    re_tessellation : Optional[int]
+        Number of times pqm_chi2 is called, re tesselating the space. No re_tessellation if None (default).
     whiten : bool
         If True, whiten the samples by subtracting the mean and dividing by the standard deviation.
 
@@ -127,10 +127,10 @@ def pqm_chi2(
     float or list
         chi2 statistic(s) and degree(s) of freedom.
     """
-    if bootstrap is not None:
+    if re_tessellation is not None:
         return [
             pqm_chi2(x_samples, y_samples, num_refs=num_refs, whiten=whiten)
-            for _ in range(bootstrap)
+            for _ in range(re_tessellation)
         ]
     chi2_stat, _, dof, _ = _pqm_test(x_samples, y_samples, num_refs, whiten)
     if dof != num_refs - 1:
@@ -141,4 +141,4 @@ def pqm_chi2(
         else:
             chi2_stat = chi2_stat * (num_refs - 1) / dof
         dof = num_refs - 1
-    return chi2_stat, dof
+    return chi2_stat
diff --git a/tests/test_gaussian.py b/tests/test_gaussian.py
@@ -21,7 +21,8 @@ def test_pass_chi2():
 
         new.append(pqm_chi2(x_samples, y_samples, num_refs=100))
     new = np.array(new)
-    assert np.abs(np.mean(new[:, 0]) / 99 - 1) < 0.15
+    print("np.abs(np.mean(new) / 99 - 1) < 0.15")
+    assert np.abs(np.mean(new) / 99 - 1) < 0.15
 
 
 def test_fail_pvalue():
@@ -35,12 +36,12 @@ def test_fail_pvalue():
     assert np.mean(new) < 1e-3
 
 
-def test_fail_chi2():
+def test_fail_chi2(num_refs = 50):
     new = []
     for _ in range(100):
         y_samples = np.random.normal(size=(500, 50))
         x_samples = np.random.normal(size=(250, 50)) + 0.5
 
-        new.append(pqm_chi2(x_samples, y_samples, num_refs=100))
+        new.append(pqm_chi2(x_samples, y_samples, num_refs=num_refs))
     new = np.array(new)
-    assert np.mean(new[:, 0]) / 99 > 2
+    assert np.mean(new) / num_refs-1 > 2