-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #139 from astronomy-commons/sean/add-margin-catalog
Add Margin Catalog
- Loading branch information
Showing
15 changed files
with
128 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import dask.dataframe as dd | ||
import hipscat as hc | ||
|
||
from lsdb.catalog.dataset.healpix_dataset import HealpixDataset | ||
from lsdb.types import DaskDFPixelMap | ||
|
||
|
||
class MarginCatalog(HealpixDataset): | ||
"""LSDB Catalog DataFrame to contain the "margin" of another HiPSCat catalog. | ||
spatial operations. | ||
Attributes: | ||
hc_structure: `hipscat.MarginCatalog` object representing the structure | ||
and metadata of the HiPSCat catalog | ||
""" | ||
|
||
hc_structure: hc.catalog.MarginCatalog | ||
|
||
def __init__( | ||
self, | ||
ddf: dd.DataFrame, | ||
ddf_pixel_map: DaskDFPixelMap, | ||
hc_structure: hc.catalog.MarginCatalog, | ||
): | ||
super().__init__(ddf, ddf_pixel_map, hc_structure) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import hipscat as hc | ||
|
||
from lsdb.catalog.margin_catalog import MarginCatalog | ||
from lsdb.loaders.hipscat.abstract_catalog_loader import AbstractCatalogLoader | ||
|
||
|
||
class MarginCatalogLoader(AbstractCatalogLoader[MarginCatalog]): | ||
"""Loads an HiPSCat MarginCatalog""" | ||
|
||
def load_catalog(self) -> MarginCatalog: | ||
hc_catalog = self.load_hipscat_catalog() | ||
dask_df, dask_df_pixel_map = self._load_dask_df_and_map(hc_catalog) | ||
return MarginCatalog(dask_df, dask_df_pixel_map, hc_catalog) | ||
|
||
def load_hipscat_catalog(self) -> hc.catalog.MarginCatalog: | ||
"""Load `hipscat` library catalog object with catalog metadata and partition data""" | ||
return hc.catalog.MarginCatalog.read_from_hipscat( | ||
self.path, storage_options=self.storage_options | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"catalog_name": "small_sky_xmatch_margin", | ||
"catalog_type": "margin", | ||
"total_rows": 26, | ||
"primary_catalog": "data/small_sky_xmatch", | ||
"margin_threshold": 7200 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
{ | ||
"catalog_name": "small_sky_xmatch_margin", | ||
"catalog_type": "margin", | ||
"total_rows": 26, | ||
"primary_catalog": "data/small_sky_xmatch", | ||
"margin_threshold": 7200, | ||
"version": "0.1.8", | ||
"generation_date": "2024.01.11", | ||
"tool_args": { | ||
"tool_name": "hipscat_import", | ||
"version": "0.1.3.dev12+gf50adee", | ||
"runtime_args": { | ||
"catalog_name": "small_sky_xmatch_margin", | ||
"output_path": "data/small_sky_xmatch_margin", | ||
"output_artifact_name": "small_sky_xmatch_margin", | ||
"tmp_dir": "", | ||
"overwrite": false, | ||
"dask_tmp": "", | ||
"dask_n_workers": 1, | ||
"dask_threads_per_worker": 1, | ||
"catalog_path": "data/small_sky_xmatch_margin/small_sky_xmatch_margin", | ||
"tmp_path": "data/small_sky_xmatch_margin/small_sky_xmatch_margin/intermediate", | ||
"input_catalog_path": "data/small_sky_xmatch", | ||
"margin_threshold": 7200, | ||
"margin_order": 2 | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import hipscat as hc | ||
import pandas as pd | ||
|
||
import lsdb | ||
from lsdb.catalog.margin_catalog import MarginCatalog | ||
|
||
|
||
def test_read_margin_catalog(small_sky_xmatch_margin_dir): | ||
margin = lsdb.read_hipscat(small_sky_xmatch_margin_dir) | ||
assert isinstance(margin, MarginCatalog) | ||
hc_margin = hc.catalog.MarginCatalog.read_from_hipscat(small_sky_xmatch_margin_dir) | ||
assert margin.hc_structure.catalog_info == hc_margin.catalog_info | ||
assert margin.hc_structure.get_healpix_pixels() == hc_margin.get_healpix_pixels() | ||
assert margin.get_healpix_pixels() == margin.hc_structure.get_healpix_pixels() | ||
assert repr(margin) == repr(margin._ddf) | ||
pd.testing.assert_frame_equal(margin.compute(), margin._ddf.compute()) | ||
|
||
|
||
def test_margin_catalog_partitions_correct(small_sky_xmatch_margin_dir): | ||
margin = lsdb.read_hipscat(small_sky_xmatch_margin_dir) | ||
assert isinstance(margin, MarginCatalog) | ||
for healpix_pixel in margin.get_healpix_pixels(): | ||
hp_order = healpix_pixel.order | ||
hp_pixel = healpix_pixel.pixel | ||
path = hc.io.paths.pixel_catalog_file( | ||
catalog_base_dir=small_sky_xmatch_margin_dir, | ||
pixel_order=hp_order, | ||
pixel_number=hp_pixel, | ||
) | ||
partition = margin.get_partition(hp_order, hp_pixel) | ||
data = pd.read_parquet(path) | ||
pd.testing.assert_frame_equal(partition.compute(), data) |