Skip to content

Commit

Permalink
Merge pull request #18 from Point72/pit/pandas_test_warnings
Browse files Browse the repository at this point in the history
Clean up test warnings in pandas extension type and accessor tests
  • Loading branch information
timkpaine authored Feb 6, 2024
2 parents 286e365 + 243fcab commit 7fb3bc6
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 20 deletions.
19 changes: 6 additions & 13 deletions csp/impl/pandas_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def find_valid_index(values: pd.Series, *, how: str):
elif how == "last":
idxpos = len(values) - 1 - is_valid[::-1].argmax()

chk_notna = is_valid[idxpos]
chk_notna = is_valid.iloc[idxpos]

if not chk_notna:
return None
Expand Down Expand Up @@ -263,7 +263,7 @@ def _flatten(series, columns, prepend_name, delim, recursive):
col_name = f"{series.name}{delim}{col}"
else:
col_name = col
col_series = series.apply(getattr, convert_dtype=False, args=(col,)).astype(TsDtype(meta[col]))
col_series = series.astype(object).apply(getattr, args=(col,)).astype(TsDtype(meta[col]))
col_series.name = col_name
if recursive and issubclass(meta[col], csp.Struct):
data.update(CspSeriesAccessor._flatten(col_series, None, prepend_name, delim, recursive))
Expand Down Expand Up @@ -571,28 +571,24 @@ def sample(self, trigger: Union[timedelta, np.timedelta64, pd.Timedelta, ts[obje
if not inplace:
return df

def _collect(self, data, struct_type, padded):
def _collect(self, data, struct_type):
data = data.copy()
for field, typ in struct_type.metadata().items():
if issubclass(typ, csp.Struct):
data[field] = self._collect(data[field], typ, padded)
data[field] = self._collect(data[field], typ)
# Now apply collect to data
df = pd.DataFrame(data)

def row_collect(row):
# Need to convert the "row" (ndarray of objects) into a dict (while dropping missing values)
data = {k: v for k, v in row.items() if isinstance(v, Edge)}
if padded:
# FIXME
# data = _cast_to_obj(data)
...
if not data:
return csp.null_ts(struct_type)
return struct_type.collectts(**data)

return df.apply(row_collect, axis=1).astype(TsDtype(struct_type))

def collect(self, columns=None, struct_type=None, delim=" ", padded=False):
def collect(self, columns=None, struct_type=None, delim=" "):
"""Collects multiple ts columns of a frame into a series of ts or structs.
:param columns: An optional subset of columns to map to the struct. If not provided, all columns
Expand All @@ -614,9 +610,6 @@ def collect(self, columns=None, struct_type=None, delim=" ", padded=False):
metatree = metatree.setdefault(part, {})
datatree = datatree.setdefault(part, {})
defaultstree = defaultstree.setdefault(part, {})
if padded:
metatree[parts[-1]] = object
defaultstree[parts[-1]] = np.nan
else:
metatree[parts[-1]] = self._obj[col].dtype.subtype
datatree[parts[-1]] = self._obj[col]
Expand All @@ -627,7 +620,7 @@ def collect(self, columns=None, struct_type=None, delim=" ", padded=False):
if not data:
return csp.null_ts(struct_type)

return self._collect(data, struct_type, padded)
return self._collect(data, struct_type)

def show_graph(self):
"""Show the graph corresponding to the evaluation of all the edges.
Expand Down
18 changes: 13 additions & 5 deletions csp/tests/impl/test_pandas_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,11 +391,19 @@ def test_run_repeat(self):
# Test that running with repeat timestamps fails (because two different series have different indices, and one
# of those indices has duplicates)
t = datetime(2020, 1, 1)
bid = csp.curve(float, [(t, 99.0), (t, 98.0)])
ask = csp.curve(float, [(t, 100.0)])
self.df["bid"]["ABC"] = bid
self.df["ask"]["ABC"] = ask
self.df["bid"]["DEF"] = bid + 4
bid = pd.Series(
[csp.curve(float, [(t, 99.0), (t, 98.0)]), csp.curve(float, [(t, 103.0), (t, 102.0)]), np.nan],
dtype=TsDtype(float),
index=self.idx,
)
ask = pd.Series(
[csp.const(100.0), csp.timer(timedelta(seconds=2), 104.0), csp.const(100.0)],
dtype=TsDtype(float),
index=self.idx,
)
self.df["bid"] = bid
self.df["ask"] = ask

out = self.df.csp.run(starttime=datetime(2020, 1, 1), endtime=timedelta(seconds=0))
idx = pd.MultiIndex.from_tuples(
[
Expand Down
4 changes: 2 additions & 2 deletions csp/tests/impl/test_pandas_ext_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,8 +385,8 @@ def test_groupby_agg_extension(self, data_for_grouping):

def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
df.groupby("A").apply(groupby_apply_op)
df.groupby("A").B.apply(groupby_apply_op)
df.groupby("A", group_keys=False).apply(groupby_apply_op)
df.groupby("A", group_keys=False).B.apply(groupby_apply_op)


class TestInterface(base.BaseInterfaceTests):
Expand Down

0 comments on commit 7fb3bc6

Please sign in to comment.