Skip to content

Commit

Permalink
Remove margin fine filtering. (#435)
Browse files Browse the repository at this point in the history
  • Loading branch information
delucchi-cmu authored Nov 18, 2024
1 parent 9aa49c4 commit 1c5acb5
Show file tree
Hide file tree
Showing 8 changed files with 20 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pre-commit-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
push:
branches: [ main, development ]
pull_request:
branches: [ main, development ]
branches: [ main, development, margin ]

jobs:
pre-commit-ci:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/testing-and-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
push:
branches: [ main, development ]
pull_request:
branches: [ main, development ]
branches: [ main, development, margin ]

jobs:
build:
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
git+https://github.com/astronomy-commons/hats.git@main
git+https://github.com/astronomy-commons/hats.git@margin
5 changes: 4 additions & 1 deletion src/hats_import/margin_cache/margin_cache_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class MarginCacheArguments(RuntimeArguments):
order of healpix partitioning in the source catalog. if `margin_order` is left
default or set to -1, then the `margin_order` will be set dynamically to the
highest partition order plus 1."""
fine_filtering: bool = True
fine_filtering: bool = False
"""should we perform the precise boundary checking? if false, some results may be
greater than `margin_threshold` away from the border (but within `margin_order`)."""

Expand All @@ -54,6 +54,9 @@ def _check_arguments(self):
if len(self.catalog.get_healpix_pixels()) == 0:
raise ValueError("debug_filter_pixel_list has created empty catalog")

if self.fine_filtering:
raise NotImplementedError("Fine filtering temporarily removed.")

highest_order = int(self.catalog.partition_info.get_highest_order())

if self.margin_order < 0:
Expand Down
20 changes: 6 additions & 14 deletions src/hats_import/margin_cache/margin_cache_map_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
import pandas as pd
import pyarrow as pa
import pyarrow.dataset as ds
from hats import pixel_math
from hats.io import file_io, paths
from hats.pixel_math.healpix_pixel import HealpixPixel

from hats_import.margin_cache.margin_cache_resume_plan import MarginCachePlan
from hats_import.pipeline_resume_plan import get_pixel_cache_directory, print_task_failure


# pylint: disable=too-many-arguments
# pylint: disable=too-many-arguments, unused-argument
def map_pixel_shards(
partition_file,
mapping_key,
Expand All @@ -26,6 +25,9 @@ def map_pixel_shards(
):
"""Creates margin cache shards from a source partition file."""
try:
if fine_filtering:
raise NotImplementedError("Fine filtering temporarily removed.")

schema = file_io.read_parquet_metadata(original_catalog_metadata).schema.to_arrow_schema()
data = file_io.read_parquet_file_to_pandas(partition_file, schema=schema)
source_pixel = HealpixPixel(data["Norder"].iloc[0], data["Npix"].iloc[0])
Expand Down Expand Up @@ -78,6 +80,7 @@ def map_pixel_shards(
raise exception


# pylint: disable=too-many-arguments, unused-argument
def _to_pixel_shard(
filtered_data,
pixel,
Expand All @@ -89,18 +92,7 @@ def _to_pixel_shard(
fine_filtering,
):
"""Do boundary checking for the cached partition and then output remaining data."""
if fine_filtering:
margin_check = pixel_math.check_margin_bounds(
filtered_data[ra_column].values,
filtered_data[dec_column].values,
pixel.order,
pixel.pixel,
margin_threshold,
)

margin_data = filtered_data.iloc[margin_check]
else:
margin_data = filtered_data
margin_data = filtered_data

num_rows = len(margin_data)
if num_rows:
Expand Down
2 changes: 1 addition & 1 deletion tests/hats_import/margin_cache/test_margin_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_margin_cache_gen(small_sky_source_catalog, tmp_path, dask_client):

data = pd.read_parquet(test_file)

assert len(data) == 13
assert len(data) == 88

assert all(data[paths.PARTITION_ORDER] == norder)
assert all(data[paths.PARTITION_PIXEL] == npix)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def test_to_pixel_shard_equator(tmp_path, basic_data_shard_df):

assert os.path.exists(path)

validate_result_dataframe(path, 2)
validate_result_dataframe(path, 360)


@pytest.mark.timeout(5)
Expand All @@ -79,7 +79,7 @@ def test_to_pixel_shard_polar(tmp_path, polar_data_shard_df):
def test_map_pixel_shards_error(tmp_path, capsys):
"""Test error behavior on reduce stage. e.g. by not creating the original
catalog parquet files."""
with pytest.raises(FileNotFoundError):
with pytest.raises(NotImplementedError):
margin_cache_map_reduce.map_pixel_shards(
paths.pixel_catalog_file(tmp_path, HealpixPixel(1, 0)),
mapping_key="1_21",
Expand All @@ -94,9 +94,10 @@ def test_map_pixel_shards_error(tmp_path, capsys):
)

captured = capsys.readouterr()
assert "Parquet file does not exist" in captured.out
assert "Fine filtering temporarily removed" in captured.out


@pytest.mark.skip()
@pytest.mark.timeout(30)
def test_map_pixel_shards_fine(tmp_path, test_data_dir, small_sky_source_catalog):
"""Test basic mapping behavior, with fine filtering enabled."""
Expand Down
4 changes: 2 additions & 2 deletions tests/hats_import/margin_cache/test_margin_round_trip.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def test_margin_import_gaia_minimum(

data = pd.read_parquet(test_file)

assert len(data) == 1
assert len(data) == 4


@pytest.mark.dask(timeout=180)
Expand Down Expand Up @@ -117,7 +117,7 @@ def test_margin_import_mixed_schema_csv(
catalog = read_hats(args.catalog_path)
assert catalog.on_disk
assert catalog.catalog_path == args.catalog_path
assert len(catalog.get_healpix_pixels()) == 5
assert len(catalog.get_healpix_pixels()) == 19

norder = 2
npix = 187
Expand Down

0 comments on commit 1c5acb5

Please sign in to comment.