Merge pull request #18 from Point72/pit/pandas_test_warnings

Clean up test warnings in pandas extension type and accessor tests
Point72 · Feb 6, 2024 · 7fb3bc6 · 7fb3bc6
2 parents 286e365 + 243fcab
commit 7fb3bc6
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 20 deletions.
diff --git a/csp/impl/pandas_accessor.py b/csp/impl/pandas_accessor.py
@@ -70,7 +70,7 @@ def find_valid_index(values: pd.Series, *, how: str):
             elif how == "last":
                 idxpos = len(values) - 1 - is_valid[::-1].argmax()
 
-            chk_notna = is_valid[idxpos]
+            chk_notna = is_valid.iloc[idxpos]
 
             if not chk_notna:
                 return None
@@ -263,7 +263,7 @@ def _flatten(series, columns, prepend_name, delim, recursive):
                 col_name = f"{series.name}{delim}{col}"
             else:
                 col_name = col
-            col_series = series.apply(getattr, convert_dtype=False, args=(col,)).astype(TsDtype(meta[col]))
+            col_series = series.astype(object).apply(getattr, args=(col,)).astype(TsDtype(meta[col]))
             col_series.name = col_name
             if recursive and issubclass(meta[col], csp.Struct):
                 data.update(CspSeriesAccessor._flatten(col_series, None, prepend_name, delim, recursive))
@@ -571,28 +571,24 @@ def sample(self, trigger: Union[timedelta, np.timedelta64, pd.Timedelta, ts[obje
         if not inplace:
             return df
 
-    def _collect(self, data, struct_type, padded):
+    def _collect(self, data, struct_type):
         data = data.copy()
         for field, typ in struct_type.metadata().items():
             if issubclass(typ, csp.Struct):
-                data[field] = self._collect(data[field], typ, padded)
+                data[field] = self._collect(data[field], typ)
         # Now apply collect to data
         df = pd.DataFrame(data)
 
         def row_collect(row):
             # Need to convert the "row" (ndarray of objects) into a dict (while dropping missing values)
             data = {k: v for k, v in row.items() if isinstance(v, Edge)}
-            if padded:
-                # FIXME
-                # data = _cast_to_obj(data)
-                ...
             if not data:
                 return csp.null_ts(struct_type)
             return struct_type.collectts(**data)
 
         return df.apply(row_collect, axis=1).astype(TsDtype(struct_type))
 
-    def collect(self, columns=None, struct_type=None, delim=" ", padded=False):
+    def collect(self, columns=None, struct_type=None, delim=" "):
         """Collects multiple ts columns of a frame into a series of ts or structs.
 
         :param columns: An optional subset of columns to map to the struct. If not provided, all columns
@@ -614,9 +610,6 @@ def collect(self, columns=None, struct_type=None, delim=" ", padded=False):
                 metatree = metatree.setdefault(part, {})
                 datatree = datatree.setdefault(part, {})
                 defaultstree = defaultstree.setdefault(part, {})
-            if padded:
-                metatree[parts[-1]] = object
-                defaultstree[parts[-1]] = np.nan
             else:
                 metatree[parts[-1]] = self._obj[col].dtype.subtype
             datatree[parts[-1]] = self._obj[col]
@@ -627,7 +620,7 @@ def collect(self, columns=None, struct_type=None, delim=" ", padded=False):
         if not data:
             return csp.null_ts(struct_type)
 
-        return self._collect(data, struct_type, padded)
+        return self._collect(data, struct_type)
 
     def show_graph(self):
         """Show the graph corresponding to the evaluation of all the edges.

diff --git a/csp/tests/impl/test_pandas_accessor.py b/csp/tests/impl/test_pandas_accessor.py
@@ -391,11 +391,19 @@ def test_run_repeat(self):
         # Test that running with repeat timestamps fails (because two different series have different indices, and one
         # of those indices has duplicates)
         t = datetime(2020, 1, 1)
-        bid = csp.curve(float, [(t, 99.0), (t, 98.0)])
-        ask = csp.curve(float, [(t, 100.0)])
-        self.df["bid"]["ABC"] = bid
-        self.df["ask"]["ABC"] = ask
-        self.df["bid"]["DEF"] = bid + 4
+        bid = pd.Series(
+            [csp.curve(float, [(t, 99.0), (t, 98.0)]), csp.curve(float, [(t, 103.0), (t, 102.0)]), np.nan],
+            dtype=TsDtype(float),
+            index=self.idx,
+        )
+        ask = pd.Series(
+            [csp.const(100.0), csp.timer(timedelta(seconds=2), 104.0), csp.const(100.0)],
+            dtype=TsDtype(float),
+            index=self.idx,
+        )
+        self.df["bid"] = bid
+        self.df["ask"] = ask
+
         out = self.df.csp.run(starttime=datetime(2020, 1, 1), endtime=timedelta(seconds=0))
         idx = pd.MultiIndex.from_tuples(
             [

diff --git a/csp/tests/impl/test_pandas_ext_type.py b/csp/tests/impl/test_pandas_ext_type.py
@@ -385,8 +385,8 @@ def test_groupby_agg_extension(self, data_for_grouping):
 
     def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
-        df.groupby("A").apply(groupby_apply_op)
-        df.groupby("A").B.apply(groupby_apply_op)
+        df.groupby("A", group_keys=False).apply(groupby_apply_op)
+        df.groupby("A", group_keys=False).B.apply(groupby_apply_op)
 
 
 class TestInterface(base.BaseInterfaceTests):