-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Index search and unit test data (#123)
- Loading branch information
1 parent
2ec29db
commit 537f2ea
Showing
10 changed files
with
113 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
from .cone_search import ConeSearch | ||
from .index_search import IndexSearch | ||
from .polygon_search import PolygonSearch |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from typing import List | ||
|
||
import pandas as pd | ||
from hipscat.catalog.index.index_catalog import IndexCatalog | ||
from hipscat.pixel_math import HealpixPixel | ||
|
||
from lsdb.core.search.abstract_search import AbstractSearch | ||
|
||
|
||
class IndexSearch(AbstractSearch): | ||
"""Find rows by ids (or other value indexed by a catalog index). | ||
Filters partitions in the catalog to those that could contain the ids requested. | ||
Filters to points that have matching values in the id field. | ||
NB: This requires a previously-computed catalog index table. | ||
""" | ||
|
||
def __init__(self, ids, catalog_index: IndexCatalog): | ||
self.ids = ids | ||
self.catalog_index = catalog_index | ||
|
||
def search_partitions(self, _: List[HealpixPixel]) -> List[HealpixPixel]: | ||
"""Determine the target partitions for further filtering.""" | ||
return self.catalog_index.loc_partitions(self.ids) | ||
|
||
def search_points(self, frame: pd.DataFrame) -> pd.DataFrame: | ||
"""Determine the search results within a data frame""" | ||
|
||
return frame[frame[self.catalog_index.catalog_info.indexing_column].isin(self.ids)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
{ | ||
"catalog_name": "small_sky_order1_id_index", | ||
"catalog_type": "index", | ||
"total_rows": 131, | ||
"primary_catalog": "/home/delucchi/git/hipscat/tests/data/small_sky_order1", | ||
"indexing_column": "id", | ||
"extra_columns": [] | ||
} |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
{ | ||
"catalog_name": "small_sky_order1_id_index", | ||
"catalog_type": "index", | ||
"total_rows": 131, | ||
"primary_catalog": "/home/delucchi/git/hipscat/tests/data/small_sky_order1", | ||
"indexing_column": "id", | ||
"extra_columns": [], | ||
"version": "0.2.1", | ||
"generation_date": "2024.01.09", | ||
"tool_args": { | ||
"tool_name": "hipscat_import", | ||
"version": "0.2.1", | ||
"runtime_args": { | ||
"catalog_name": "small_sky_order1_id_index", | ||
"output_path": "/home/delucchi/git/hipscat/tests/data/", | ||
"output_artifact_name": "small_sky_order1_id_index", | ||
"tmp_dir": "", | ||
"overwrite": true, | ||
"dask_tmp": "", | ||
"dask_n_workers": 1, | ||
"dask_threads_per_worker": 1, | ||
"catalog_path": "/home/delucchi/git/hipscat/tests/data/small_sky_order1_id_index", | ||
"tmp_path": "/home/delucchi/git/hipscat/tests/data/small_sky_order1_id_index/intermediate", | ||
"input_catalog_path": "/home/delucchi/git/hipscat/tests/data/small_sky_order1", | ||
"indexing_column": "id", | ||
"extra_columns": [], | ||
"include_hipscat_index": "False", | ||
"include_order_pixel": true | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from hipscat.catalog.index.index_catalog import IndexCatalog | ||
|
||
|
||
def test_index_search(small_sky_order1_catalog, small_sky_order1_id_index_dir, assert_divisions_are_correct): | ||
catalog_index = IndexCatalog.read_from_hipscat(small_sky_order1_id_index_dir) | ||
|
||
index_search_catalog = small_sky_order1_catalog.index_search([900], catalog_index) | ||
index_search_df = index_search_catalog.compute() | ||
assert len(index_search_df) == 0 | ||
assert_divisions_are_correct(index_search_catalog) | ||
|
||
index_search_catalog = small_sky_order1_catalog.index_search(["700"], catalog_index) | ||
index_search_df = index_search_catalog.compute() | ||
assert len(index_search_df) == 0 | ||
assert_divisions_are_correct(index_search_catalog) | ||
|
||
index_search_catalog = small_sky_order1_catalog.index_search([700], catalog_index) | ||
index_search_df = index_search_catalog.compute() | ||
assert len(index_search_df) == 1 | ||
assert_divisions_are_correct(index_search_catalog) |