Skip to content

Commit

Permalink
Change require_right_margin default and update warning message (#307)
Browse files Browse the repository at this point in the history
* Set require_right_margin to False by default

* Update margin cache message
  • Loading branch information
camposandro authored May 8, 2024
1 parent 2b102a3 commit c06454c
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 36 deletions.
4 changes: 2 additions & 2 deletions src/lsdb/core/crossmatch/bounded_kdtree_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def validate(
self,
n_neighbors: int = 1,
radius_arcsec: float = 1,
require_right_margin: bool = True,
require_right_margin: bool = False,
min_radius_arcsec: float = 0,
):
super().validate(n_neighbors, radius_arcsec, require_right_margin)
Expand All @@ -28,7 +28,7 @@ def crossmatch(
n_neighbors: int = 1,
radius_arcsec: float = 1,
# We need it here because the signature is shared with .validate()
require_right_margin: bool = True, # pylint: disable=unused-argument
require_right_margin: bool = False, # pylint: disable=unused-argument
min_radius_arcsec: float = 0,
) -> pd.DataFrame:
"""Perform a cross-match between the data from two HEALPix pixels
Expand Down
6 changes: 3 additions & 3 deletions src/lsdb/core/crossmatch/kdtree_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def validate(
self,
n_neighbors: int = 1,
radius_arcsec: float = 1,
require_right_margin=True,
require_right_margin: bool = False,
):
super().validate()
# Validate radius
Expand All @@ -32,7 +32,7 @@ def validate(
# Check that the margin exists and has a compatible radius.
if self.right_margin_hc_structure is None:
if require_right_margin:
raise ValueError("Right margin is required for cross-match")
raise ValueError("Right catalog margin cache is required for cross-match.")
else:
if self.right_margin_hc_structure.catalog_info.margin_threshold < radius_arcsec:
raise ValueError("Cross match radius is greater than margin threshold")
Expand All @@ -42,7 +42,7 @@ def crossmatch(
n_neighbors: int = 1,
radius_arcsec: float = 1,
# We need it here because the signature is shared with .validate()
require_right_margin=True, # pylint: disable=unused-argument
require_right_margin: bool = False, # pylint: disable=unused-argument
) -> pd.DataFrame:
"""Perform a cross-match between the data from two HEALPix pixels
Expand Down
5 changes: 4 additions & 1 deletion src/lsdb/dask/crossmatch_catalog_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,10 @@ def crossmatch_catalog_data(
meta_df_crossmatch.validate(**kwargs)

if right.margin is None:
warnings.warn("Right catalog does not have a margin cache. Results may be inaccurate", RuntimeWarning)
warnings.warn(
"Right catalog does not have a margin cache. Results may be incomplete and/or inaccurate.",
RuntimeWarning,
)

# perform alignment on the two catalogs
alignment = align_catalogs(left, right)
Expand Down
10 changes: 8 additions & 2 deletions src/lsdb/dask/join_catalog_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,10 @@ def join_catalog_data_on(
catalogs.
"""
if right.margin is None:
warnings.warn("Right catalog does not have a margin cache. Results may be inaccurate", RuntimeWarning)
warnings.warn(
"Right catalog does not have a margin cache. Results may be incomplete and/or inaccurate.",
RuntimeWarning,
)

alignment = align_catalogs(left, right)

Expand Down Expand Up @@ -245,7 +248,10 @@ def join_catalog_data_through(
)

if right.margin is None:
warnings.warn("Right catalog does not have a margin cache. Results may be inaccurate", RuntimeWarning)
warnings.warn(
"Right catalog does not have a margin cache. Results may be incomplete and/or inaccurate.",
RuntimeWarning,
)

alignment = align_catalogs(left, right)

Expand Down
39 changes: 15 additions & 24 deletions tests/lsdb/catalog/test_crossmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,9 @@
class TestCrossmatch:
@staticmethod
def test_kdtree_crossmatch(algo, small_sky_catalog, small_sky_xmatch_catalog, xmatch_correct):
with pytest.warns(RuntimeWarning, match="Results may be inaccurate"):
with pytest.warns(RuntimeWarning, match="Results may be incomplete and/or inaccurate"):
xmatched = small_sky_catalog.crossmatch(
small_sky_xmatch_catalog,
algorithm=algo,
radius_arcsec=0.01 * 3600,
require_right_margin=False,
small_sky_xmatch_catalog, algorithm=algo, radius_arcsec=0.01 * 3600
).compute()
assert len(xmatched) == len(xmatch_correct)
for _, correct_row in xmatch_correct.iterrows():
Expand All @@ -29,12 +26,11 @@ def test_kdtree_crossmatch(algo, small_sky_catalog, small_sky_xmatch_catalog, xm

@staticmethod
def test_kdtree_crossmatch_thresh(algo, small_sky_catalog, small_sky_xmatch_catalog, xmatch_correct_005):
with pytest.warns(RuntimeWarning, match="Results may be inaccurate"):
with pytest.warns(RuntimeWarning, match="Results may be incomplete and/or inaccurate"):
xmatched = small_sky_catalog.crossmatch(
small_sky_xmatch_catalog,
radius_arcsec=0.005 * 3600,
algorithm=algo,
require_right_margin=False,
).compute()
assert len(xmatched) == len(xmatch_correct_005)
for _, correct_row in xmatch_correct_005.iterrows():
Expand All @@ -47,13 +43,12 @@ def test_kdtree_crossmatch_thresh(algo, small_sky_catalog, small_sky_xmatch_cata
def test_kdtree_crossmatch_multiple_neighbors(
algo, small_sky_catalog, small_sky_xmatch_catalog, xmatch_correct_3n_2t_no_margin
):
with pytest.warns(RuntimeWarning, match="Results may be inaccurate"):
with pytest.warns(RuntimeWarning, match="Results may be incomplete and/or inaccurate"):
xmatched = small_sky_catalog.crossmatch(
small_sky_xmatch_catalog,
n_neighbors=3,
radius_arcsec=2 * 3600,
algorithm=algo,
require_right_margin=False,
).compute()
assert len(xmatched) == len(xmatch_correct_3n_2t_no_margin)
for _, correct_row in xmatch_correct_3n_2t_no_margin.iterrows():
Expand Down Expand Up @@ -121,13 +116,12 @@ class TestBoundedCrossmatch:
def test_kdtree_crossmatch_min_thresh(
algo, small_sky_catalog, small_sky_xmatch_catalog, xmatch_correct_002_005
):
with pytest.warns(RuntimeWarning, match="Results may be inaccurate"):
with pytest.warns(RuntimeWarning, match="Results may be incomplete and/or inaccurate"):
xmatched = small_sky_catalog.crossmatch(
small_sky_xmatch_catalog,
radius_arcsec=0.005 * 3600,
min_radius_arcsec=0.002 * 3600,
algorithm=algo,
require_right_margin=False,
).compute()
assert len(xmatched) == len(xmatch_correct_002_005)
for _, correct_row in xmatch_correct_002_005.iterrows():
Expand All @@ -153,7 +147,6 @@ def test_kdtree_crossmatch_min_thresh_multiple_neighbors_margin(
radius_arcsec=2 * 3600,
min_radius_arcsec=0.5 * 3600,
algorithm=algo,
require_right_margin=False,
).compute()
assert len(xmatched) == len(xmatch_correct_05_2_3n_margin)
for _, correct_row in xmatch_correct_05_2_3n_margin.iterrows():
Expand All @@ -171,13 +164,12 @@ def test_kdtree_crossmatch_no_close_neighbors(
):
# Set a very small minimum radius so that there is not a single point
# with a very close neighbor
with pytest.warns(RuntimeWarning, match="Results may be inaccurate"):
with pytest.warns(RuntimeWarning, match="Results may be incomplete and/or inaccurate"):
xmatched = small_sky_catalog.crossmatch(
small_sky_xmatch_catalog,
radius_arcsec=0.005 * 3600,
min_radius_arcsec=1,
algorithm=algo,
require_right_margin=False,
).compute()
assert len(xmatched) == len(xmatch_correct_005)
for _, correct_row in xmatch_correct_005.iterrows():
Expand All @@ -192,14 +184,13 @@ def test_crossmatch_more_neighbors_than_points_available(
):
# The small_sky_xmatch catalog has 3 partitions (2 of length 41 and 1 of length 29).
# Let's use n_neighbors above that to request more neighbors than there are points available.
with pytest.warns(RuntimeWarning, match="Results may be inaccurate"):
with pytest.warns(RuntimeWarning, match="Results may be incomplete and/or inaccurate"):
xmatched = small_sky_catalog.crossmatch(
small_sky_xmatch_catalog,
n_neighbors=50,
radius_arcsec=2 * 3600,
min_radius_arcsec=0.5 * 3600,
algorithm=algo,
require_right_margin=False,
).compute()
assert len(xmatched) == 72
assert all(xmatched.groupby("id_small_sky").size()) <= 50
Expand All @@ -209,13 +200,13 @@ def test_self_crossmatch(algo, small_sky_catalog, small_sky_dir):
# Read a second small sky catalog to not have duplicate labels
small_sky_catalog_2 = lsdb.read_hipscat(small_sky_dir)
small_sky_catalog_2.hc_structure.catalog_name = "small_sky_2"
xmatched = small_sky_catalog.crossmatch(
small_sky_catalog_2,
min_radius_arcsec=0,
radius_arcsec=0.005 * 3600,
algorithm=algo,
require_right_margin=False,
).compute()
with pytest.warns(RuntimeWarning, match="Results may be incomplete and/or inaccurate"):
xmatched = small_sky_catalog.crossmatch(
small_sky_catalog_2,
min_radius_arcsec=0,
radius_arcsec=0.005 * 3600,
algorithm=algo,
).compute()
assert len(xmatched) == len(small_sky_catalog.compute())
assert all(xmatched["_dist_arcsec"] == 0)

Expand Down Expand Up @@ -259,7 +250,7 @@ def crossmatch(self, mock_results: pd.DataFrame = None):


def test_custom_crossmatch_algorithm(small_sky_catalog, small_sky_xmatch_catalog, xmatch_mock):
with pytest.warns(RuntimeWarning, match="Results may be inaccurate"):
with pytest.warns(RuntimeWarning, match="Results may be incomplete and/or inaccurate"):
xmatched = small_sky_catalog.crossmatch(
small_sky_xmatch_catalog, algorithm=MockCrossmatchAlgorithm, mock_results=xmatch_mock
).compute()
Expand Down
6 changes: 2 additions & 4 deletions tests/lsdb/core/test_kdtree_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,8 @@ def test_bounded_kdtree_radius_invalid(bounded_kdtree_crossmatch):

def test_kdtree_no_margin(kdtree_crossmatch):
kdtree_crossmatch.right_margin_hc_structure = None
with pytest.raises(ValueError, match="Right margin is required"):
kdtree_crossmatch.validate()

kdtree_crossmatch.validate(require_right_margin=False)
with pytest.raises(ValueError, match="Right catalog margin cache is required for cross-match"):
kdtree_crossmatch.validate(require_right_margin=True)


def test_kdtree_left_columns(kdtree_crossmatch):
Expand Down

0 comments on commit c06454c

Please sign in to comment.