Skip to content

Commit

Permalink
Use the same convention to access column names
Browse files Browse the repository at this point in the history
  • Loading branch information
camposandro committed Feb 26, 2024
1 parent 6f61f5b commit a41ee2e
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 14 deletions.
30 changes: 19 additions & 11 deletions src/lsdb/loaders/dataframe/from_dataframe_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,17 @@ def _append_partition_information_to_dataframe(dataframe: pd.DataFrame, pixel: H
Returns:
The dataframe for a HEALPix, with data points and respective partition information.
"""
dataframe["Norder"] = pixel.order
dataframe["Dir"] = pixel.dir
dataframe["Npix"] = pixel.pixel
dataframe = dataframe.astype(
{
PartitionInfo.METADATA_ORDER_COLUMN_NAME: np.uint8,
PartitionInfo.METADATA_DIR_COLUMN_NAME: np.uint64,
PartitionInfo.METADATA_PIXEL_COLUMN_NAME: np.uint64,
}
)
columns_to_assign = {
PartitionInfo.METADATA_ORDER_COLUMN_NAME: pixel.order,
PartitionInfo.METADATA_DIR_COLUMN_NAME: pixel.dir,
PartitionInfo.METADATA_PIXEL_COLUMN_NAME: pixel.pixel,
}
column_types = {
PartitionInfo.METADATA_ORDER_COLUMN_NAME: np.uint8,
PartitionInfo.METADATA_DIR_COLUMN_NAME: np.uint64,
PartitionInfo.METADATA_PIXEL_COLUMN_NAME: np.uint64,
}
dataframe = dataframe.assign(**columns_to_assign).astype(column_types)
return _order_partition_dataframe_columns(dataframe)


Expand Down Expand Up @@ -95,7 +96,14 @@ def _order_partition_dataframe_columns(dataframe: pd.DataFrame) -> pd.DataFrame:
Returns:
The partition dataframe with the columns in the correct order.
"""
order_of_columns = ["margin_Norder", "margin_Dir", "margin_Npix", "Norder", "Dir", "Npix"]
order_of_columns = [
f"margin_{PartitionInfo.METADATA_ORDER_COLUMN_NAME}",
f"margin_{PartitionInfo.METADATA_DIR_COLUMN_NAME}",
f"margin_{PartitionInfo.METADATA_PIXEL_COLUMN_NAME}",
PartitionInfo.METADATA_ORDER_COLUMN_NAME,
PartitionInfo.METADATA_DIR_COLUMN_NAME,
PartitionInfo.METADATA_PIXEL_COLUMN_NAME,
]
unordered_columns = [col for col in dataframe.columns if col not in order_of_columns]
ordered_columns = [col for col in order_of_columns if col in dataframe.columns]
return dataframe[unordered_columns + ordered_columns]
6 changes: 3 additions & 3 deletions tests/lsdb/loaders/dataframe/test_from_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_from_dataframe_catalog_of_invalid_type(small_sky_order1_df, small_sky_o


def test_from_dataframe_when_threshold_and_partition_size_specified(
small_sky_order1_df, small_sky_order1_catalog
small_sky_order1_df, small_sky_order1_catalog
):
"""Tests that specifying simultaneously threshold and partition_size is invalid"""
kwargs = get_catalog_kwargs(small_sky_order1_catalog, partition_size=10, threshold=10_000)
Expand All @@ -79,7 +79,7 @@ def test_partitions_on_map_equal_partitions_in_df(small_sky_order1_df, small_sky
partition_df = catalog._ddf.partitions[partition_index].compute()
assert isinstance(partition_df, pd.DataFrame)
for _, row in partition_df.iterrows():
ipix = hp.ang2pix(2 ** hp_pixel.order, row["ra"], row["dec"], nest=True, lonlat=True)
ipix = hp.ang2pix(2**hp_pixel.order, row["ra"], row["dec"], nest=True, lonlat=True)
assert ipix == hp_pixel.pixel


Expand Down Expand Up @@ -138,7 +138,7 @@ def test_partitions_obey_threshold(small_sky_order1_df, small_sky_order1_catalog


def test_partitions_obey_default_threshold_when_no_arguments_specified(
small_sky_order1_df, small_sky_order1_catalog
small_sky_order1_df, small_sky_order1_catalog
):
"""Tests that partitions are limited by the default threshold
when no partition size or threshold is specified"""
Expand Down

0 comments on commit a41ee2e

Please sign in to comment.