Skip to content

Commit

Permalink
Merge pull request #139 from astronomy-commons/sean/add-margin-catalog
Browse files Browse the repository at this point in the history
Add Margin Catalog
  • Loading branch information
smcguire-cmu authored Feb 1, 2024
2 parents 4c7e9fe + 0e1d841 commit 1ca91c3
Show file tree
Hide file tree
Showing 15 changed files with 128 additions and 0 deletions.
25 changes: 25 additions & 0 deletions src/lsdb/catalog/margin_catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import dask.dataframe as dd
import hipscat as hc

from lsdb.catalog.dataset.healpix_dataset import HealpixDataset
from lsdb.types import DaskDFPixelMap


class MarginCatalog(HealpixDataset):
"""LSDB Catalog DataFrame to contain the "margin" of another HiPSCat catalog.
spatial operations.
Attributes:
hc_structure: `hipscat.MarginCatalog` object representing the structure
and metadata of the HiPSCat catalog
"""

hc_structure: hc.catalog.MarginCatalog

def __init__(
self,
ddf: dd.DataFrame,
ddf_pixel_map: DaskDFPixelMap,
hc_structure: hc.catalog.MarginCatalog,
):
super().__init__(ddf, ddf_pixel_map, hc_structure)
3 changes: 3 additions & 0 deletions src/lsdb/loaders/hipscat/hipscat_loader_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@
from lsdb.catalog.association_catalog import AssociationCatalog
from lsdb.catalog.catalog import Catalog
from lsdb.catalog.dataset.dataset import Dataset
from lsdb.catalog.margin_catalog import MarginCatalog
from lsdb.loaders.hipscat.abstract_catalog_loader import AbstractCatalogLoader
from lsdb.loaders.hipscat.association_catalog_loader import AssociationCatalogLoader
from lsdb.loaders.hipscat.hipscat_catalog_loader import HipscatCatalogLoader
from lsdb.loaders.hipscat.hipscat_loading_config import HipscatLoadingConfig
from lsdb.loaders.hipscat.margin_catalog_loader import MarginCatalogLoader

loader_class_for_catalog_type: Dict[Type[Dataset], Type[AbstractCatalogLoader]] = {
Catalog: HipscatCatalogLoader,
AssociationCatalog: AssociationCatalogLoader,
MarginCatalog: MarginCatalogLoader,
}


Expand Down
19 changes: 19 additions & 0 deletions src/lsdb/loaders/hipscat/margin_catalog_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import hipscat as hc

from lsdb.catalog.margin_catalog import MarginCatalog
from lsdb.loaders.hipscat.abstract_catalog_loader import AbstractCatalogLoader


class MarginCatalogLoader(AbstractCatalogLoader[MarginCatalog]):
"""Loads an HiPSCat MarginCatalog"""

def load_catalog(self) -> MarginCatalog:
hc_catalog = self.load_hipscat_catalog()
dask_df, dask_df_pixel_map = self._load_dask_df_and_map(hc_catalog)
return MarginCatalog(dask_df, dask_df_pixel_map, hc_catalog)

def load_hipscat_catalog(self) -> hc.catalog.MarginCatalog:
"""Load `hipscat` library catalog object with catalog metadata and partition data"""
return hc.catalog.MarginCatalog.read_from_hipscat(
self.path, storage_options=self.storage_options
)
2 changes: 2 additions & 0 deletions src/lsdb/loaders/hipscat/read_hipscat.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@
from lsdb.catalog.association_catalog import AssociationCatalog
from lsdb.catalog.catalog import Catalog
from lsdb.catalog.dataset.dataset import Dataset
from lsdb.catalog.margin_catalog import MarginCatalog
from lsdb.loaders.hipscat.hipscat_loader_factory import get_loader_for_type
from lsdb.loaders.hipscat.hipscat_loading_config import HipscatLoadingConfig

dataset_class_for_catalog_type: Dict[CatalogType, Type[Dataset]] = {
CatalogType.OBJECT: Catalog,
CatalogType.SOURCE: Catalog,
CatalogType.ASSOCIATION: AssociationCatalog,
CatalogType.MARGIN: MarginCatalog,
}


Expand Down
12 changes: 12 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@
DATA_DIR_NAME = "data"
SMALL_SKY_DIR_NAME = "small_sky"
SMALL_SKY_XMATCH_NAME = "small_sky_xmatch"
SMALL_SKY_XMATCH_MARGIN_NAME = "small_sky_xmatch_margin"
SMALL_SKY_TO_XMATCH_NAME = "small_sky_to_xmatch"
SMALL_SKY_TO_XMATCH_SOFT_NAME = "small_sky_to_xmatch_soft"
SMALL_SKY_ORDER1_DIR_NAME = "small_sky_order1"
SMALL_SKY_ORDER1_CSV = "small_sky_order1.csv"
XMATCH_CORRECT_FILE = "xmatch_correct.csv"
XMATCH_CORRECT_005_FILE = "xmatch_correct_0_005.csv"
XMATCH_CORRECT_3N_2T_FILE = "xmatch_correct_3n_2t.csv"
XMATCH_CORRECT_3N_2T_NO_MARGIN_FILE = "xmatch_correct_3n_2t_no_margin.csv"
XMATCH_MOCK_FILE = "xmatch_mock.csv"
TEST_DIR = os.path.dirname(__file__)
Expand All @@ -37,6 +39,11 @@ def small_sky_xmatch_dir(test_data_dir):
return os.path.join(test_data_dir, SMALL_SKY_XMATCH_NAME)


@pytest.fixture
def small_sky_xmatch_margin_dir(test_data_dir):
return os.path.join(test_data_dir, SMALL_SKY_XMATCH_MARGIN_NAME)


@pytest.fixture
def small_sky_to_xmatch_dir(test_data_dir):
return os.path.join(test_data_dir, SMALL_SKY_TO_XMATCH_NAME)
Expand Down Expand Up @@ -112,6 +119,11 @@ def xmatch_correct_005(small_sky_xmatch_dir):
return pd.read_csv(os.path.join(small_sky_xmatch_dir, XMATCH_CORRECT_005_FILE))


@pytest.fixture
def xmatch_correct_3n_2t(small_sky_xmatch_dir):
return pd.read_csv(os.path.join(small_sky_xmatch_dir, XMATCH_CORRECT_3N_2T_FILE))


@pytest.fixture
def xmatch_correct_3n_2t_no_margin(small_sky_xmatch_dir):
return pd.read_csv(os.path.join(small_sky_xmatch_dir, XMATCH_CORRECT_3N_2T_NO_MARGIN_FILE))
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added tests/data/small_sky_xmatch_margin/_metadata
Binary file not shown.
7 changes: 7 additions & 0 deletions tests/data/small_sky_xmatch_margin/catalog_info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"catalog_name": "small_sky_xmatch_margin",
"catalog_type": "margin",
"total_rows": 26,
"primary_catalog": "data/small_sky_xmatch",
"margin_threshold": 7200
}
28 changes: 28 additions & 0 deletions tests/data/small_sky_xmatch_margin/provenance_info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"catalog_name": "small_sky_xmatch_margin",
"catalog_type": "margin",
"total_rows": 26,
"primary_catalog": "data/small_sky_xmatch",
"margin_threshold": 7200,
"version": "0.1.8",
"generation_date": "2024.01.11",
"tool_args": {
"tool_name": "hipscat_import",
"version": "0.1.3.dev12+gf50adee",
"runtime_args": {
"catalog_name": "small_sky_xmatch_margin",
"output_path": "data/small_sky_xmatch_margin",
"output_artifact_name": "small_sky_xmatch_margin",
"tmp_dir": "",
"overwrite": false,
"dask_tmp": "",
"dask_n_workers": 1,
"dask_threads_per_worker": 1,
"catalog_path": "data/small_sky_xmatch_margin/small_sky_xmatch_margin",
"tmp_path": "data/small_sky_xmatch_margin/small_sky_xmatch_margin/intermediate",
"input_catalog_path": "data/small_sky_xmatch",
"margin_threshold": 7200,
"margin_order": 2
}
}
}
32 changes: 32 additions & 0 deletions tests/lsdb/catalog/test_margin_catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import hipscat as hc
import pandas as pd

import lsdb
from lsdb.catalog.margin_catalog import MarginCatalog


def test_read_margin_catalog(small_sky_xmatch_margin_dir):
margin = lsdb.read_hipscat(small_sky_xmatch_margin_dir)
assert isinstance(margin, MarginCatalog)
hc_margin = hc.catalog.MarginCatalog.read_from_hipscat(small_sky_xmatch_margin_dir)
assert margin.hc_structure.catalog_info == hc_margin.catalog_info
assert margin.hc_structure.get_healpix_pixels() == hc_margin.get_healpix_pixels()
assert margin.get_healpix_pixels() == margin.hc_structure.get_healpix_pixels()
assert repr(margin) == repr(margin._ddf)
pd.testing.assert_frame_equal(margin.compute(), margin._ddf.compute())


def test_margin_catalog_partitions_correct(small_sky_xmatch_margin_dir):
margin = lsdb.read_hipscat(small_sky_xmatch_margin_dir)
assert isinstance(margin, MarginCatalog)
for healpix_pixel in margin.get_healpix_pixels():
hp_order = healpix_pixel.order
hp_pixel = healpix_pixel.pixel
path = hc.io.paths.pixel_catalog_file(
catalog_base_dir=small_sky_xmatch_margin_dir,
pixel_order=hp_order,
pixel_number=hp_pixel,
)
partition = margin.get_partition(hp_order, hp_pixel)
data = pd.read_parquet(path)
pd.testing.assert_frame_equal(partition.compute(), data)

0 comments on commit 1ca91c3

Please sign in to comment.