diff --git a/src/lsdb/loaders/dataframe/from_dataframe_utils.py b/src/lsdb/loaders/dataframe/from_dataframe_utils.py index a68cedad..b079e24f 100644 --- a/src/lsdb/loaders/dataframe/from_dataframe_utils.py +++ b/src/lsdb/loaders/dataframe/from_dataframe_utils.py @@ -40,16 +40,17 @@ def _append_partition_information_to_dataframe(dataframe: pd.DataFrame, pixel: H Returns: The dataframe for a HEALPix, with data points and respective partition information. """ - dataframe["Norder"] = pixel.order - dataframe["Dir"] = pixel.dir - dataframe["Npix"] = pixel.pixel - dataframe = dataframe.astype( - { - PartitionInfo.METADATA_ORDER_COLUMN_NAME: np.uint8, - PartitionInfo.METADATA_DIR_COLUMN_NAME: np.uint64, - PartitionInfo.METADATA_PIXEL_COLUMN_NAME: np.uint64, - } - ) + columns_to_assign = { + PartitionInfo.METADATA_ORDER_COLUMN_NAME: pixel.order, + PartitionInfo.METADATA_DIR_COLUMN_NAME: pixel.dir, + PartitionInfo.METADATA_PIXEL_COLUMN_NAME: pixel.pixel, + } + column_types = { + PartitionInfo.METADATA_ORDER_COLUMN_NAME: np.uint8, + PartitionInfo.METADATA_DIR_COLUMN_NAME: np.uint64, + PartitionInfo.METADATA_PIXEL_COLUMN_NAME: np.uint64, + } + dataframe = dataframe.assign(**columns_to_assign).astype(column_types) return _order_partition_dataframe_columns(dataframe) @@ -95,7 +96,14 @@ def _order_partition_dataframe_columns(dataframe: pd.DataFrame) -> pd.DataFrame: Returns: The partition dataframe with the columns in the correct order. """ - order_of_columns = ["margin_Norder", "margin_Dir", "margin_Npix", "Norder", "Dir", "Npix"] + order_of_columns = [ + f"margin_{PartitionInfo.METADATA_ORDER_COLUMN_NAME}", + f"margin_{PartitionInfo.METADATA_DIR_COLUMN_NAME}", + f"margin_{PartitionInfo.METADATA_PIXEL_COLUMN_NAME}", + PartitionInfo.METADATA_ORDER_COLUMN_NAME, + PartitionInfo.METADATA_DIR_COLUMN_NAME, + PartitionInfo.METADATA_PIXEL_COLUMN_NAME, + ] unordered_columns = [col for col in dataframe.columns if col not in order_of_columns] ordered_columns = [col for col in order_of_columns if col in dataframe.columns] return dataframe[unordered_columns + ordered_columns] diff --git a/tests/lsdb/loaders/dataframe/test_from_dataframe.py b/tests/lsdb/loaders/dataframe/test_from_dataframe.py index 951ad841..87cb4783 100644 --- a/tests/lsdb/loaders/dataframe/test_from_dataframe.py +++ b/tests/lsdb/loaders/dataframe/test_from_dataframe.py @@ -63,7 +63,7 @@ def test_from_dataframe_catalog_of_invalid_type(small_sky_order1_df, small_sky_o def test_from_dataframe_when_threshold_and_partition_size_specified( - small_sky_order1_df, small_sky_order1_catalog + small_sky_order1_df, small_sky_order1_catalog ): """Tests that specifying simultaneously threshold and partition_size is invalid""" kwargs = get_catalog_kwargs(small_sky_order1_catalog, partition_size=10, threshold=10_000) @@ -79,7 +79,7 @@ def test_partitions_on_map_equal_partitions_in_df(small_sky_order1_df, small_sky partition_df = catalog._ddf.partitions[partition_index].compute() assert isinstance(partition_df, pd.DataFrame) for _, row in partition_df.iterrows(): - ipix = hp.ang2pix(2 ** hp_pixel.order, row["ra"], row["dec"], nest=True, lonlat=True) + ipix = hp.ang2pix(2**hp_pixel.order, row["ra"], row["dec"], nest=True, lonlat=True) assert ipix == hp_pixel.pixel @@ -138,7 +138,7 @@ def test_partitions_obey_threshold(small_sky_order1_df, small_sky_order1_catalog def test_partitions_obey_default_threshold_when_no_arguments_specified( - small_sky_order1_df, small_sky_order1_catalog + small_sky_order1_df, small_sky_order1_catalog ): """Tests that partitions are limited by the default threshold when no partition size or threshold is specified"""