Skip to content

Commit

Permalink
Merge pull request #92 from B612-Asteroid-Institute/jm/residuals-tests
Browse files Browse the repository at this point in the history
Fix indexing and probability bug in Residuals.calculate
  • Loading branch information
moeyensj authored Nov 8, 2023
2 parents 4a003e1 + ca124f5 commit 0ce3ec4
Show file tree
Hide file tree
Showing 2 changed files with 176 additions and 7 deletions.
46 changes: 40 additions & 6 deletions adam_core/coordinates/residuals.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from typing import List, Tuple, Union

import numpy as np
import numpy.typing as npt
import pyarrow as pa
import quivr as qv
from scipy.stats import chi2
from scipy import stats

from .cartesian import CartesianCoordinates
from .cometary import CometaryCoordinates
Expand Down Expand Up @@ -76,6 +77,14 @@ def calculate(
"Observed and predicted coordinates must be the same type, "
f"not {type(observed)} and {type(predicted)}."
)
if (observed.origin != predicted.origin).all():
raise ValueError(
"Observed and predicted coordinates must have the same origin."
)
if observed.frame != predicted.frame:
raise ValueError(
f"Observed ({observed.frame}) and predicted ({predicted.frame}) coordinates must have the same frame."
)

N, D = observed.values.shape
p = np.empty(N, dtype=np.float64)
Expand Down Expand Up @@ -103,11 +112,25 @@ def calculate(
batch_indices, batch_dimensions, batch_coords, batch_covariances
):
if not np.all(np.isnan(covariances)):
# Calculate the chi2 for each coordinate
chi2_values = calculate_chi2(residuals[indices], covariances)

# Calculate the probability for each coordinate
p[indices] = 1 - chi2.cdf(np.sqrt(chi2_values), dof[indices])
# Filter residuals by dimensions that have values
residuals_i = residuals[:, dimensions]

# Then filter by rows that belong to this batch
residuals_i = residuals_i[indices, :]

# Calculate the chi2 for each coordinate (this is actually
# calculating mahalanobis distance squared -- both are equivalent
# when the covariance matrix is diagonal, mahalanobis distance is more
# general as it allows for covariates between dimensions)
chi2_values = calculate_chi2(residuals_i, covariances)

# For a normally distributed random variable, the mahalanobis distance
# squared in D dimesions follows a chi2 distribution with D degrees of freedom.
# So for each coordinate, calculate the probability that you would
# get a chi2 value greater than or equal to that coordinate's chi2 value.
# At a residual of zero this probability is 1.0, and at a residual of
# 1 sigma (for 1 degree of freedom) this probability is ~0.3173.
p[indices] = 1 - stats.chi2.cdf(chi2_values, dof[indices])

# Set the chi2 for each coordinate
chi2s[indices] = chi2_values
Expand All @@ -124,6 +147,17 @@ def calculate(
probability=p,
)

def to_array(self) -> npt.NDArray[np.float64]:
"""
Convert the residuals to a numpy array.
Returns
-------
residuals : `~numpy.ndarray` (N, D)
Array of residuals.
"""
return np.stack(self.values.to_numpy(zero_copy_only=False))


def calculate_chi2(residuals: np.ndarray, covariances: np.ndarray) -> np.ndarray:
"""
Expand Down
137 changes: 136 additions & 1 deletion adam_core/coordinates/tests/test_residuals.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import numpy as np
import pytest
from scipy.spatial.distance import mahalanobis

from ..residuals import _batch_coords_and_covariances, calculate_chi2
from ..cartesian import CartesianCoordinates, CoordinateCovariances
from ..origin import Origin
from ..residuals import Residuals, _batch_coords_and_covariances, calculate_chi2


def test_calculate_chi2():
Expand Down Expand Up @@ -192,3 +195,135 @@ def test_batch_coords_and_covariances_multiple_batches():
np.testing.assert_equal(batch_dimensions[1], np.array([1, 2]))
np.testing.assert_equal(batch_coords[1], np.array([[3.0, 4.0]]))
np.testing.assert_equal(batch_covariances[1], np.array([[[3.0, 0.0], [0.0, 4.0]]]))


def test_Residuals_calculate():
# Test that Residuals.calculate correctly identifies the number of degrees of freedom,
# and correctly identifies the dimensions that have valid values and those that do not.
observed_array = np.array(
[
[0.2, np.nan, np.nan, np.nan, np.nan, np.nan],
[0.6, 1.0, 2.0, np.nan, np.nan, np.nan],
[np.nan, 3.0, np.nan, 4.0, np.nan, np.nan],
[0.5, 3.0, 0.5, 4.5, 0.1, 0.1],
]
)
observed = CartesianCoordinates.from_kwargs(
x=observed_array[:, 0],
y=observed_array[:, 1],
z=observed_array[:, 2],
vx=observed_array[:, 3],
vy=observed_array[:, 4],
vz=observed_array[:, 5],
covariance=CoordinateCovariances.from_sigmas(np.full((4, 6), 0.1)),
origin=Origin.from_kwargs(code=np.full(4, "SUN")),
frame="ecliptic",
)
predicted_array = np.array(
[
[0.3, 0.4, 0.5, 0.6, 0.7, 0.8],
[0.5, 1.1, 1.9, 0.2, 0.1, 0.1],
[0.5, 2.9, 0.2, 4.1, 0.1, 0.1],
[0.5, 3.0, 0.5, 4.5, 0.1, 0.1],
]
)
predicted = CartesianCoordinates.from_kwargs(
x=predicted_array[:, 0],
y=predicted_array[:, 1],
z=predicted_array[:, 2],
vx=predicted_array[:, 3],
vy=predicted_array[:, 4],
vz=predicted_array[:, 5],
origin=Origin.from_kwargs(code=np.full(4, "SUN")),
frame="ecliptic",
)

residuals = Residuals.calculate(observed, predicted)

# Calculate the expected residuals
desired_residuals = observed_array - predicted_array
np.testing.assert_almost_equal(
desired_residuals[0], np.array([-0.1, np.nan, np.nan, np.nan, np.nan, np.nan])
)
np.testing.assert_almost_equal(
desired_residuals[1], np.array([0.1, -0.1, 0.1, np.nan, np.nan, np.nan])
)
np.testing.assert_almost_equal(
desired_residuals[2], np.array([np.nan, 0.1, np.nan, -0.1, np.nan, np.nan])
)
np.testing.assert_almost_equal(
desired_residuals[3], np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
)

assert len(residuals) == 4
assert residuals.to_array().shape == (4, 6)
np.testing.assert_equal(residuals.to_array(), desired_residuals)
assert residuals.dof.to_pylist() == [1, 3, 2, 6]
np.testing.assert_almost_equal(
residuals.chi2.to_numpy(zero_copy_only=False), np.array([1, 3, 2, 0])
)

# Test that the probabilities for the first and last case are correct (these are more well known examples)
actual_probabilities = residuals.probability.to_numpy(zero_copy_only=False)
np.testing.assert_almost_equal(actual_probabilities[0], 0.31731050786291415)
np.testing.assert_almost_equal(actual_probabilities[3], 1.0)


def test_Residuals_calculate_raises_frames():
# Test that an error is raised when the frames are not equal
observed_array = np.random.random((10, 6))
observed = CartesianCoordinates.from_kwargs(
x=observed_array[:, 0],
y=observed_array[:, 1],
z=observed_array[:, 2],
vx=observed_array[:, 3],
vy=observed_array[:, 4],
vz=observed_array[:, 5],
origin=Origin.from_kwargs(code=np.full(10, "SUN")),
frame="ecliptic",
)

predicted_array = np.random.random((10, 6))
predicted = CartesianCoordinates.from_kwargs(
x=predicted_array[:, 0],
y=predicted_array[:, 1],
z=predicted_array[:, 2],
vx=predicted_array[:, 3],
vy=predicted_array[:, 4],
vz=predicted_array[:, 5],
origin=Origin.from_kwargs(code=np.full(10, "SUN")),
frame="equatorial",
)

with pytest.raises(ValueError, match=r"coordinates must have the same frame."):
Residuals.calculate(observed, predicted)


def test_Residuals_calculate_raises_origins():
# Test that an error is raised when the frames are not equal
observed_array = np.random.random((10, 6))
observed = CartesianCoordinates.from_kwargs(
x=observed_array[:, 0],
y=observed_array[:, 1],
z=observed_array[:, 2],
vx=observed_array[:, 3],
vy=observed_array[:, 4],
vz=observed_array[:, 5],
origin=Origin.from_kwargs(code=np.full(10, "SUN")),
frame="equatorial",
)

predicted_array = np.random.random((10, 6))
predicted = CartesianCoordinates.from_kwargs(
x=predicted_array[:, 0],
y=predicted_array[:, 1],
z=predicted_array[:, 2],
vx=predicted_array[:, 3],
vy=predicted_array[:, 4],
vz=predicted_array[:, 5],
origin=Origin.from_kwargs(code=np.full(10, "EARTH")),
frame="equatorial",
)

with pytest.raises(ValueError, match=r"coordinates must have the same origin."):
Residuals.calculate(observed, predicted)

0 comments on commit 0ce3ec4

Please sign in to comment.