From 34974b4261d267e011583cdb0161e1e27bdc9df6 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Mon, 14 Oct 2024 17:46:52 -0400 Subject: [PATCH 1/3] use alignment moc in crossmatched/joined catalogs --- src/lsdb/catalog/catalog.py | 18 +++++++++++---- tests/lsdb/catalog/test_crossmatch.py | 5 ++++ tests/lsdb/catalog/test_join.py | 33 +++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/src/lsdb/catalog/catalog.py b/src/lsdb/catalog/catalog.py index a6f01e01..c93e5813 100644 --- a/src/lsdb/catalog/catalog.py +++ b/src/lsdb/catalog/catalog.py @@ -223,7 +223,9 @@ def crossmatch( dec_column=self.hc_structure.catalog_info.dec_column + suffixes[0], total_rows=None, ) - hc_catalog = hc.catalog.Catalog(new_catalog_info, alignment.pixel_tree, schema=get_arrow_schema(ddf)) + hc_catalog = hc.catalog.Catalog( + new_catalog_info, alignment.pixel_tree, schema=get_arrow_schema(ddf), moc=alignment.moc + ) return Catalog(ddf, ddf_map, hc_catalog) def cone_search(self, ra: float, dec: float, radius_arcsec: float, fine: bool = True) -> Catalog: @@ -441,7 +443,9 @@ def merge_asof( dec_column=self.hc_structure.catalog_info.dec_column + suffixes[0], total_rows=None, ) - hc_catalog = hc.catalog.Catalog(new_catalog_info, alignment.pixel_tree, schema=get_arrow_schema(ddf)) + hc_catalog = hc.catalog.Catalog( + new_catalog_info, alignment.pixel_tree, schema=get_arrow_schema(ddf), moc=alignment.moc + ) return Catalog(ddf, ddf_map, hc_catalog) def join( @@ -492,7 +496,7 @@ def join( total_rows=None, ) hc_catalog = hc.catalog.Catalog( - new_catalog_info, alignment.pixel_tree, schema=get_arrow_schema(ddf) + new_catalog_info, alignment.pixel_tree, schema=get_arrow_schema(ddf), moc=alignment.moc ) return Catalog(ddf, ddf_map, hc_catalog) if left_on is None or right_on is None: @@ -515,7 +519,9 @@ def join( dec_column=self.hc_structure.catalog_info.dec_column + suffixes[0], total_rows=None, ) - hc_catalog = hc.catalog.Catalog(new_catalog_info, alignment.pixel_tree, schema=get_arrow_schema(ddf)) + hc_catalog = hc.catalog.Catalog( + new_catalog_info, alignment.pixel_tree, schema=get_arrow_schema(ddf), moc=alignment.moc + ) return Catalog(ddf, ddf_map, hc_catalog) def join_nested( @@ -573,7 +579,9 @@ def join_nested( catalog_name=output_catalog_name, total_rows=None, ) - hc_catalog = hc.catalog.Catalog(new_catalog_info, alignment.pixel_tree) + hc_catalog = hc.catalog.Catalog( + new_catalog_info, alignment.pixel_tree, schema=get_arrow_schema(ddf), moc=alignment.moc + ) return Catalog(ddf, ddf_map, hc_catalog) def nest_lists( diff --git a/tests/lsdb/catalog/test_crossmatch.py b/tests/lsdb/catalog/test_crossmatch.py index c942ea84..c457d9fd 100644 --- a/tests/lsdb/catalog/test_crossmatch.py +++ b/tests/lsdb/catalog/test_crossmatch.py @@ -12,6 +12,7 @@ from lsdb.core.crossmatch.abstract_crossmatch_algorithm import AbstractCrossmatchAlgorithm from lsdb.core.crossmatch.bounded_kdtree_match import BoundedKdTreeCrossmatch from lsdb.core.crossmatch.kdtree_match import KdTreeCrossmatch +from lsdb.dask.merge_catalog_functions import align_catalogs @pytest.mark.parametrize("algo", [KdTreeCrossmatch]) @@ -24,6 +25,10 @@ def test_kdtree_crossmatch(algo, small_sky_catalog, small_sky_xmatch_catalog, xm ) assert isinstance(xmatched_cat._ddf, nd.NestedFrame) xmatched = xmatched_cat.compute() + alignment = align_catalogs(small_sky_catalog, small_sky_xmatch_catalog) + assert xmatched_cat.hc_structure.moc == alignment.moc + assert xmatched_cat.get_healpix_pixels() == alignment.pixel_tree.get_healpix_pixels() + assert isinstance(xmatched, npd.NestedFrame) assert len(xmatched) == len(xmatch_correct) for _, correct_row in xmatch_correct.iterrows(): diff --git a/tests/lsdb/catalog/test_join.py b/tests/lsdb/catalog/test_join.py index a147142e..4fe82050 100644 --- a/tests/lsdb/catalog/test_join.py +++ b/tests/lsdb/catalog/test_join.py @@ -1,3 +1,4 @@ +import hipscat as hc import nested_dask as nd import nested_pandas as npd import numpy as np @@ -5,6 +6,8 @@ import pytest from hipscat.pixel_math.hipscat_id import HIPSCAT_ID_COLUMN, hipscat_id_to_healpix +from lsdb.dask.merge_catalog_functions import align_catalogs + def test_small_sky_join_small_sky_order1( small_sky_catalog, small_sky_order1_catalog, assert_divisions_are_correct @@ -21,6 +24,9 @@ def test_small_sky_join_small_sky_order1( assert (col_name + suffixes[1], dtype) in joined.dtypes.items() assert joined._ddf.index.name == HIPSCAT_ID_COLUMN assert joined._ddf.index.dtype == np.uint64 + alignment = align_catalogs(small_sky_catalog, small_sky_order1_catalog) + assert joined.hc_structure.moc == alignment.moc + assert joined.get_healpix_pixels() == alignment.pixel_tree.get_healpix_pixels() joined_compute = joined.compute() assert isinstance(joined_compute, npd.NestedFrame) @@ -46,6 +52,11 @@ def test_small_sky_join_small_sky_order1_source( assert (col_name + suffixes[0], dtype) in joined.dtypes.items() for col_name, dtype in small_sky_order1_source_with_margin.dtypes.items(): assert (col_name + suffixes[1], dtype) in joined.dtypes.items() + + alignment = align_catalogs(small_sky_catalog, small_sky_order1_source_with_margin) + assert joined.hc_structure.moc == alignment.moc + assert joined.get_healpix_pixels() == alignment.pixel_tree.get_healpix_pixels() + joined_compute = joined.compute() small_sky_order1_compute = small_sky_order1_source_with_margin.compute() assert len(joined_compute) == len(small_sky_order1_compute) @@ -74,6 +85,10 @@ def test_join_association(small_sky_catalog, small_sky_xmatch_catalog, small_sky ) assert isinstance(joined._ddf, nd.NestedFrame) assert joined._ddf.npartitions == len(small_sky_to_xmatch_catalog.hc_structure.join_info.data_frame) + alignment = align_catalogs(small_sky_catalog, small_sky_xmatch_catalog) + assert joined.hc_structure.moc == alignment.moc + assert joined.get_healpix_pixels() == alignment.pixel_tree.get_healpix_pixels() + joined_data = joined.compute() assert isinstance(joined_data, npd.NestedFrame) association_data = small_sky_to_xmatch_catalog.compute() @@ -120,6 +135,10 @@ def test_join_association_source_margin( small_sky_order1_source_with_margin, through=small_sky_to_o1source_catalog, suffixes=suffixes ) assert joined._ddf.npartitions == len(small_sky_to_o1source_catalog.hc_structure.join_info.data_frame) + alignment = align_catalogs(small_sky_catalog, small_sky_order1_source_with_margin) + assert joined.hc_structure.moc == alignment.moc + assert joined.get_healpix_pixels() == alignment.pixel_tree.get_healpix_pixels() + joined_data = joined.compute() association_data = small_sky_to_o1source_catalog.compute() assert len(joined_data) == 17161 @@ -156,6 +175,9 @@ def test_join_association_soft(small_sky_catalog, small_sky_xmatch_catalog, smal small_sky_xmatch_catalog, through=small_sky_to_xmatch_soft_catalog, suffixes=suffixes ) assert joined._ddf.npartitions == len(small_sky_to_xmatch_soft_catalog.hc_structure.join_info.data_frame) + alignment = align_catalogs(small_sky_catalog, small_sky_xmatch_catalog) + assert joined.hc_structure.moc == alignment.moc + assert joined.get_healpix_pixels() == alignment.pixel_tree.get_healpix_pixels() with pytest.warns(match="margin"): joined_on = small_sky_catalog.join( @@ -177,6 +199,9 @@ def test_join_source_margin_soft( assert joined._ddf.npartitions == len( small_sky_to_o1source_soft_catalog.hc_structure.join_info.data_frame ) + alignment = align_catalogs(small_sky_catalog, small_sky_order1_source_with_margin) + assert joined.hc_structure.moc == alignment.moc + assert joined.get_healpix_pixels() == alignment.pixel_tree.get_healpix_pixels() joined_on = small_sky_catalog.join( small_sky_order1_source_with_margin, @@ -200,6 +225,10 @@ def test_join_nested(small_sky_catalog, small_sky_order1_source_with_margin, ass if col_name != "object_id": assert (col_name, dtype.pyarrow_dtype) in joined["sources"].dtypes.fields.items() assert_divisions_are_correct(joined) + alignment = align_catalogs(small_sky_catalog, small_sky_order1_source_with_margin) + assert joined.hc_structure.moc == alignment.moc + assert joined.get_healpix_pixels() == alignment.pixel_tree.get_healpix_pixels() + joined_compute = joined.compute() source_compute = small_sky_order1_source_with_margin.compute() assert isinstance(joined_compute, npd.NestedFrame) @@ -224,6 +253,10 @@ def test_merge_asof(small_sky_catalog, small_sky_xmatch_catalog, assert_division ) assert isinstance(joined._ddf, nd.NestedFrame) assert_divisions_are_correct(joined) + alignment = align_catalogs(small_sky_catalog, small_sky_xmatch_catalog) + assert joined.hc_structure.moc == alignment.moc + assert joined.get_healpix_pixels() == alignment.pixel_tree.get_healpix_pixels() + joined_compute = joined.compute() assert isinstance(joined_compute, npd.NestedFrame) small_sky_compute = small_sky_catalog.compute().rename( From fef8f8b854bebdde89891ecee35de3d5c2493318 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Mon, 14 Oct 2024 17:57:30 -0400 Subject: [PATCH 2/3] remove unused import --- tests/lsdb/catalog/test_join.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/lsdb/catalog/test_join.py b/tests/lsdb/catalog/test_join.py index 4fe82050..07348966 100644 --- a/tests/lsdb/catalog/test_join.py +++ b/tests/lsdb/catalog/test_join.py @@ -1,4 +1,3 @@ -import hipscat as hc import nested_dask as nd import nested_pandas as npd import numpy as np From c7dd949d7ccc3168fcd0e57b0217d556c46feee4 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Tue, 15 Oct 2024 15:51:44 -0400 Subject: [PATCH 3/3] update docs hipscat branch --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 260d801a..05d01c45 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -10,4 +10,4 @@ sphinx-autoapi sphinx-copybutton sphinx-book-theme sphinx-design -git+https://github.com/astronomy-commons/hipscat.git@main +git+https://github.com/astronomy-commons/hipscat.git@development