Merge pull request #92 from B612-Asteroid-Institute/jm/residuals-tests

Fix indexing and probability bug in Residuals.calculate
B612-Asteroid-Institute · Nov 8, 2023 · 0ce3ec4 · 0ce3ec4
2 parents 4a003e1 + ca124f5
commit 0ce3ec4
Show file tree

Hide file tree

Showing 2 changed files with 176 additions and 7 deletions.
diff --git a/adam_core/coordinates/residuals.py b/adam_core/coordinates/residuals.py
@@ -1,9 +1,10 @@
 from typing import List, Tuple, Union
 
 import numpy as np
+import numpy.typing as npt
 import pyarrow as pa
 import quivr as qv
-from scipy.stats import chi2
+from scipy import stats
 
 from .cartesian import CartesianCoordinates
 from .cometary import CometaryCoordinates
@@ -76,6 +77,14 @@ def calculate(
                 "Observed and predicted coordinates must be the same type, "
                 f"not {type(observed)} and {type(predicted)}."
             )
+        if (observed.origin != predicted.origin).all():
+            raise ValueError(
+                "Observed and predicted coordinates must have the same origin."
+            )
+        if observed.frame != predicted.frame:
+            raise ValueError(
+                f"Observed ({observed.frame}) and predicted ({predicted.frame}) coordinates must have the same frame."
+            )
 
         N, D = observed.values.shape
         p = np.empty(N, dtype=np.float64)
@@ -103,11 +112,25 @@ def calculate(
             batch_indices, batch_dimensions, batch_coords, batch_covariances
         ):
             if not np.all(np.isnan(covariances)):
-                # Calculate the chi2 for each coordinate
-                chi2_values = calculate_chi2(residuals[indices], covariances)
-
-                # Calculate the probability for each coordinate
-                p[indices] = 1 - chi2.cdf(np.sqrt(chi2_values), dof[indices])
+                # Filter residuals by dimensions that have values
+                residuals_i = residuals[:, dimensions]
+
+                # Then filter by rows that belong to this batch
+                residuals_i = residuals_i[indices, :]
+
+                # Calculate the chi2 for each coordinate (this is actually
+                # calculating mahalanobis distance squared -- both are equivalent
+                # when the covariance matrix is diagonal, mahalanobis distance is more
+                # general as it allows for covariates between dimensions)
+                chi2_values = calculate_chi2(residuals_i, covariances)
+
+                # For a normally distributed random variable, the mahalanobis distance
+                # squared in D dimesions follows a chi2 distribution with D degrees of freedom.
+                # So for each coordinate, calculate the probability that you would
+                # get a chi2 value greater than or equal to that coordinate's chi2 value.
+                # At a residual of zero this probability is 1.0, and at a residual of
+                # 1 sigma (for 1 degree of freedom) this probability is ~0.3173.
+                p[indices] = 1 - stats.chi2.cdf(chi2_values, dof[indices])
 
                 # Set the chi2 for each coordinate
                 chi2s[indices] = chi2_values
@@ -124,6 +147,17 @@ def calculate(
             probability=p,
         )
 
+    def to_array(self) -> npt.NDArray[np.float64]:
+        """
+        Convert the residuals to a numpy array.
+
+        Returns
+        -------
+        residuals : `~numpy.ndarray` (N, D)
+            Array of residuals.
+        """
+        return np.stack(self.values.to_numpy(zero_copy_only=False))
+
 
 def calculate_chi2(residuals: np.ndarray, covariances: np.ndarray) -> np.ndarray:
     """

diff --git a/adam_core/coordinates/tests/test_residuals.py b/adam_core/coordinates/tests/test_residuals.py
@@ -1,7 +1,10 @@
 import numpy as np
+import pytest
 from scipy.spatial.distance import mahalanobis
 
-from ..residuals import _batch_coords_and_covariances, calculate_chi2
+from ..cartesian import CartesianCoordinates, CoordinateCovariances
+from ..origin import Origin
+from ..residuals import Residuals, _batch_coords_and_covariances, calculate_chi2
 
 
 def test_calculate_chi2():
@@ -192,3 +195,135 @@ def test_batch_coords_and_covariances_multiple_batches():
     np.testing.assert_equal(batch_dimensions[1], np.array([1, 2]))
     np.testing.assert_equal(batch_coords[1], np.array([[3.0, 4.0]]))
     np.testing.assert_equal(batch_covariances[1], np.array([[[3.0, 0.0], [0.0, 4.0]]]))
+
+
+def test_Residuals_calculate():
+    # Test that Residuals.calculate correctly identifies the number of degrees of freedom,
+    # and correctly identifies the dimensions that have valid values and those that do not.
+    observed_array = np.array(
+        [
+            [0.2, np.nan, np.nan, np.nan, np.nan, np.nan],
+            [0.6, 1.0, 2.0, np.nan, np.nan, np.nan],
+            [np.nan, 3.0, np.nan, 4.0, np.nan, np.nan],
+            [0.5, 3.0, 0.5, 4.5, 0.1, 0.1],
+        ]
+    )
+    observed = CartesianCoordinates.from_kwargs(
+        x=observed_array[:, 0],
+        y=observed_array[:, 1],
+        z=observed_array[:, 2],
+        vx=observed_array[:, 3],
+        vy=observed_array[:, 4],
+        vz=observed_array[:, 5],
+        covariance=CoordinateCovariances.from_sigmas(np.full((4, 6), 0.1)),
+        origin=Origin.from_kwargs(code=np.full(4, "SUN")),
+        frame="ecliptic",
+    )
+    predicted_array = np.array(
+        [
+            [0.3, 0.4, 0.5, 0.6, 0.7, 0.8],
+            [0.5, 1.1, 1.9, 0.2, 0.1, 0.1],
+            [0.5, 2.9, 0.2, 4.1, 0.1, 0.1],
+            [0.5, 3.0, 0.5, 4.5, 0.1, 0.1],
+        ]
+    )
+    predicted = CartesianCoordinates.from_kwargs(
+        x=predicted_array[:, 0],
+        y=predicted_array[:, 1],
+        z=predicted_array[:, 2],
+        vx=predicted_array[:, 3],
+        vy=predicted_array[:, 4],
+        vz=predicted_array[:, 5],
+        origin=Origin.from_kwargs(code=np.full(4, "SUN")),
+        frame="ecliptic",
+    )
+
+    residuals = Residuals.calculate(observed, predicted)
+
+    # Calculate the expected residuals
+    desired_residuals = observed_array - predicted_array
+    np.testing.assert_almost_equal(
+        desired_residuals[0], np.array([-0.1, np.nan, np.nan, np.nan, np.nan, np.nan])
+    )
+    np.testing.assert_almost_equal(
+        desired_residuals[1], np.array([0.1, -0.1, 0.1, np.nan, np.nan, np.nan])
+    )
+    np.testing.assert_almost_equal(
+        desired_residuals[2], np.array([np.nan, 0.1, np.nan, -0.1, np.nan, np.nan])
+    )
+    np.testing.assert_almost_equal(
+        desired_residuals[3], np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
+    )
+
+    assert len(residuals) == 4
+    assert residuals.to_array().shape == (4, 6)
+    np.testing.assert_equal(residuals.to_array(), desired_residuals)
+    assert residuals.dof.to_pylist() == [1, 3, 2, 6]
+    np.testing.assert_almost_equal(
+        residuals.chi2.to_numpy(zero_copy_only=False), np.array([1, 3, 2, 0])
+    )
+
+    # Test that the probabilities for the first and last case are correct (these are more well known examples)
+    actual_probabilities = residuals.probability.to_numpy(zero_copy_only=False)
+    np.testing.assert_almost_equal(actual_probabilities[0], 0.31731050786291415)
+    np.testing.assert_almost_equal(actual_probabilities[3], 1.0)
+
+
+def test_Residuals_calculate_raises_frames():
+    # Test that an error is raised when the frames are not equal
+    observed_array = np.random.random((10, 6))
+    observed = CartesianCoordinates.from_kwargs(
+        x=observed_array[:, 0],
+        y=observed_array[:, 1],
+        z=observed_array[:, 2],
+        vx=observed_array[:, 3],
+        vy=observed_array[:, 4],
+        vz=observed_array[:, 5],
+        origin=Origin.from_kwargs(code=np.full(10, "SUN")),
+        frame="ecliptic",
+    )
+
+    predicted_array = np.random.random((10, 6))
+    predicted = CartesianCoordinates.from_kwargs(
+        x=predicted_array[:, 0],
+        y=predicted_array[:, 1],
+        z=predicted_array[:, 2],
+        vx=predicted_array[:, 3],
+        vy=predicted_array[:, 4],
+        vz=predicted_array[:, 5],
+        origin=Origin.from_kwargs(code=np.full(10, "SUN")),
+        frame="equatorial",
+    )
+
+    with pytest.raises(ValueError, match=r"coordinates must have the same frame."):
+        Residuals.calculate(observed, predicted)
+
+
+def test_Residuals_calculate_raises_origins():
+    # Test that an error is raised when the frames are not equal
+    observed_array = np.random.random((10, 6))
+    observed = CartesianCoordinates.from_kwargs(
+        x=observed_array[:, 0],
+        y=observed_array[:, 1],
+        z=observed_array[:, 2],
+        vx=observed_array[:, 3],
+        vy=observed_array[:, 4],
+        vz=observed_array[:, 5],
+        origin=Origin.from_kwargs(code=np.full(10, "SUN")),
+        frame="equatorial",
+    )
+
+    predicted_array = np.random.random((10, 6))
+    predicted = CartesianCoordinates.from_kwargs(
+        x=predicted_array[:, 0],
+        y=predicted_array[:, 1],
+        z=predicted_array[:, 2],
+        vx=predicted_array[:, 3],
+        vy=predicted_array[:, 4],
+        vz=predicted_array[:, 5],
+        origin=Origin.from_kwargs(code=np.full(10, "EARTH")),
+        frame="equatorial",
+    )
+
+    with pytest.raises(ValueError, match=r"coordinates must have the same origin."):
+        Residuals.calculate(observed, predicted)