diff --git a/csp/impl/pandas_accessor.py b/csp/impl/pandas_accessor.py index ba59a15e..ec029913 100644 --- a/csp/impl/pandas_accessor.py +++ b/csp/impl/pandas_accessor.py @@ -70,7 +70,7 @@ def find_valid_index(values: pd.Series, *, how: str): elif how == "last": idxpos = len(values) - 1 - is_valid[::-1].argmax() - chk_notna = is_valid[idxpos] + chk_notna = is_valid.iloc[idxpos] if not chk_notna: return None @@ -263,7 +263,7 @@ def _flatten(series, columns, prepend_name, delim, recursive): col_name = f"{series.name}{delim}{col}" else: col_name = col - col_series = series.apply(getattr, convert_dtype=False, args=(col,)).astype(TsDtype(meta[col])) + col_series = series.astype(object).apply(getattr, args=(col,)).astype(TsDtype(meta[col])) col_series.name = col_name if recursive and issubclass(meta[col], csp.Struct): data.update(CspSeriesAccessor._flatten(col_series, None, prepend_name, delim, recursive)) @@ -571,28 +571,24 @@ def sample(self, trigger: Union[timedelta, np.timedelta64, pd.Timedelta, ts[obje if not inplace: return df - def _collect(self, data, struct_type, padded): + def _collect(self, data, struct_type): data = data.copy() for field, typ in struct_type.metadata().items(): if issubclass(typ, csp.Struct): - data[field] = self._collect(data[field], typ, padded) + data[field] = self._collect(data[field], typ) # Now apply collect to data df = pd.DataFrame(data) def row_collect(row): # Need to convert the "row" (ndarray of objects) into a dict (while dropping missing values) data = {k: v for k, v in row.items() if isinstance(v, Edge)} - if padded: - # FIXME - # data = _cast_to_obj(data) - ... if not data: return csp.null_ts(struct_type) return struct_type.collectts(**data) return df.apply(row_collect, axis=1).astype(TsDtype(struct_type)) - def collect(self, columns=None, struct_type=None, delim=" ", padded=False): + def collect(self, columns=None, struct_type=None, delim=" "): """Collects multiple ts columns of a frame into a series of ts or structs. :param columns: An optional subset of columns to map to the struct. If not provided, all columns @@ -614,9 +610,6 @@ def collect(self, columns=None, struct_type=None, delim=" ", padded=False): metatree = metatree.setdefault(part, {}) datatree = datatree.setdefault(part, {}) defaultstree = defaultstree.setdefault(part, {}) - if padded: - metatree[parts[-1]] = object - defaultstree[parts[-1]] = np.nan else: metatree[parts[-1]] = self._obj[col].dtype.subtype datatree[parts[-1]] = self._obj[col] @@ -627,7 +620,7 @@ def collect(self, columns=None, struct_type=None, delim=" ", padded=False): if not data: return csp.null_ts(struct_type) - return self._collect(data, struct_type, padded) + return self._collect(data, struct_type) def show_graph(self): """Show the graph corresponding to the evaluation of all the edges. diff --git a/csp/tests/impl/test_pandas_accessor.py b/csp/tests/impl/test_pandas_accessor.py index 6ad6b769..dc17a926 100644 --- a/csp/tests/impl/test_pandas_accessor.py +++ b/csp/tests/impl/test_pandas_accessor.py @@ -391,11 +391,19 @@ def test_run_repeat(self): # Test that running with repeat timestamps fails (because two different series have different indices, and one # of those indices has duplicates) t = datetime(2020, 1, 1) - bid = csp.curve(float, [(t, 99.0), (t, 98.0)]) - ask = csp.curve(float, [(t, 100.0)]) - self.df["bid"]["ABC"] = bid - self.df["ask"]["ABC"] = ask - self.df["bid"]["DEF"] = bid + 4 + bid = pd.Series( + [csp.curve(float, [(t, 99.0), (t, 98.0)]), csp.curve(float, [(t, 103.0), (t, 102.0)]), np.nan], + dtype=TsDtype(float), + index=self.idx, + ) + ask = pd.Series( + [csp.const(100.0), csp.timer(timedelta(seconds=2), 104.0), csp.const(100.0)], + dtype=TsDtype(float), + index=self.idx, + ) + self.df["bid"] = bid + self.df["ask"] = ask + out = self.df.csp.run(starttime=datetime(2020, 1, 1), endtime=timedelta(seconds=0)) idx = pd.MultiIndex.from_tuples( [ diff --git a/csp/tests/impl/test_pandas_ext_type.py b/csp/tests/impl/test_pandas_ext_type.py index 2d500ea2..453e67f4 100644 --- a/csp/tests/impl/test_pandas_ext_type.py +++ b/csp/tests/impl/test_pandas_ext_type.py @@ -385,8 +385,8 @@ def test_groupby_agg_extension(self, data_for_grouping): def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op): df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) - df.groupby("A").apply(groupby_apply_op) - df.groupby("A").B.apply(groupby_apply_op) + df.groupby("A", group_keys=False).apply(groupby_apply_op) + df.groupby("A", group_keys=False).B.apply(groupby_apply_op) class TestInterface(base.BaseInterfaceTests):