Skip to content

Commit

Permalink
Add tests for hints for astropy tables to pandas indexing.
Browse files Browse the repository at this point in the history
  • Loading branch information
erykoff committed Dec 18, 2024
1 parent e054b7c commit 86b5993
Showing 1 changed file with 44 additions and 0 deletions.
44 changes: 44 additions & 0 deletions tests/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@

try:
from lsst.daf.butler.formatters.parquet import (
ASTROPY_PANDAS_INDEX_KEY,
ArrowAstropySchema,
ArrowNumpySchema,
DataFrameSchema,
Expand All @@ -105,6 +106,7 @@
_numpy_dtype_to_arrow_types,
_numpy_style_arrays_to_arrow_arrays,
_numpy_to_numpy_dict,
add_pandas_index_to_astropy,
arrow_to_astropy,
arrow_to_numpy,
arrow_to_numpy_dict,
Expand Down Expand Up @@ -1192,6 +1194,44 @@ def testBadAstropyColumnParquet(self):
with self.assertRaises(RuntimeError):
self.butler.put(bad_tab, self.datasetType, dataId={})

@unittest.skipUnless(pd is not None, "Cannot test ParquetFormatterDataFrame without pandas.")
def testWriteAstropyTableWithPandasIndexHint(self, testStrip=True):
tab1 = _makeSimpleAstropyTable()

add_pandas_index_to_astropy(tab1, "index")

self.butler.put(tab1, self.datasetType, dataId={})

# Read in as an astropy table and ensure index hint is still there.
tab2 = self.butler.get(self.datasetType, dataId={})

self.assertIn(ASTROPY_PANDAS_INDEX_KEY, tab2.meta)
self.assertEqual(tab2.meta[ASTROPY_PANDAS_INDEX_KEY], "index")

# Read as a dataframe and ensure index is set.
df3 = self.butler.get(self.datasetType, dataId={}, storageClass="DataFrame")

self.assertEqual(df3.index.name, "index")

# Read as a dataframe without naming the index column.
with self.assertLogs(level="WARNING") as cm:
_ = self.butler.get(
self.datasetType,
dataId={},
storageClass="DataFrame",
parameters={"columns": ["a", "b"]},
)
self.assertIn("Index column ``index``", cm.output[0])

if testStrip:
# Read as an astropy table without naming the index column.
tab5 = self.butler.get(self.datasetType, dataId={}, parameters={"columns": ["a", "b"]})

self.assertNotIn(ASTROPY_PANDAS_INDEX_KEY, tab5.meta)

with self.assertRaises(ValueError):
add_pandas_index_to_astropy(tab1, "not_a_column")


@unittest.skipUnless(atable is not None, "Cannot test InMemoryDatastore with AstropyTable without astropy.")
class InMemoryArrowAstropyDelegateTestCase(ParquetFormatterArrowAstropyTestCase):
Expand Down Expand Up @@ -1222,6 +1262,10 @@ def testBadInput(self):
with self.assertRaises(AttributeError):
delegate.getComponent(composite=tab1, componentName="nothing")

@unittest.skipUnless(pd is not None, "Cannot test ParquetFormatterDataFrame without pandas.")
def testWriteAstropyTableWithPandasIndexHint(self):
super().testWriteAstropyTableWithPandasIndexHint(testStrip=False)


@unittest.skipUnless(np is not None, "Cannot test ParquetFormatterArrowNumpy without numpy.")
@unittest.skipUnless(pa is not None, "Cannot test ParquetFormatterArrowNumpy without pyarrow.")
Expand Down

0 comments on commit 86b5993

Please sign in to comment.