Skip to content

Commit

Permalink
Anticipate all aliases for non-identifier fields.
Browse files Browse the repository at this point in the history
  • Loading branch information
gitosaurus committed Nov 22, 2024
1 parent ea95adc commit 0d7ca7c
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 3 deletions.
28 changes: 25 additions & 3 deletions src/nested_pandas/nestedframe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ def __init__(self, outer: NestedFrame):
super().__init__()
# Pre-load the field resolvers for all columns which are known at present.
for column in outer.nested_columns:
self._initialize_field_resolver(column, outer)
self._initialize_column_resolver(column, outer)

def _initialize_field_resolver(self, column: str, outer: NestedFrame):
def _initialize_column_resolver(self, column: str, outer: NestedFrame):
"""
Initialize a resolver for the given nested column, and also an alias
for it, in the case of column names that have spaces or are otherwise
Expand All @@ -108,7 +108,7 @@ def __getitem__(self, item):
if not super().__contains__(top_nest):
if top_nest not in self._outer.nested_columns:
raise KeyError(f"Unknown nest {top_nest}")
self._initialize_field_resolver(top_nest, self._outer)
self._initialize_column_resolver(top_nest, self._outer)
return super().__getitem__(top_nest)

def __setitem__(self, item, _):
Expand Down Expand Up @@ -232,6 +232,8 @@ class NestedFrame(pd.DataFrame):
# Series produce instances of this class, preserving the type and origin.
__pandas_priority__ = 4500

_metadata = ["_aliases"]

@property
def _constructor(self) -> Self: # type: ignore[name-defined] # noqa: F821
return NestedFrame
Expand Down Expand Up @@ -309,6 +311,9 @@ def __getitem__(self, item):
def __setitem__(self, key, value):
"""Adds custom __setitem__ behavior for nested columns"""
components = parse_hierarchical_components(key)
aliases = getattr(self, "_aliases", None)
if aliases:
components = [aliases.get(x, x) for x in components]
# Replacing or adding columns to a nested structure
# Allows statements like ndf["nested.t"] = ndf["nested.t"] - 5
# Or ndf["nested.base_t"] = ndf["nested.t"] - 5
Expand Down Expand Up @@ -553,6 +558,23 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
--------
https://pandas.pydata.org/docs/reference/api/pandas.eval.html
"""
pattern = re.compile(r"`[^`]+`", re.MULTILINE)
# Gather all aliases for backtick-quoted names, and give them to the _NestResolver.
# We need to be proactive, syntactically, and not merely look up column names that
# presently exist, because both column and field names may be created by separate
# lines.
self._aliases: dict[str, str] = {}

def sub_and_alias(match):
original = match.group(0)[1:-1] # remove backticks
alias = clean_column_name(original)
if alias != original:
self._aliases[alias] = original
return alias

# Something like this will be done again; we're not saving this.
_ = pattern.sub(sub_and_alias, expr)

kwargs["resolvers"] = tuple(kwargs.get("resolvers", ())) + (_NestResolver(self),)
kwargs["inplace"] = inplace
kwargs["parser"] = "nested-pandas"
Expand Down
14 changes: 14 additions & 0 deletions tests/nested_pandas/nestedframe/test_nestedframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1045,6 +1045,20 @@ def test_eval_assignment():
assert (nf["p2.e"] == nf["packed.d"] * 2 + nf.c).all()
assert (nf["p2.f"] == nf["p2.e"] + nf.b).all()

# Verify that assignment can be done to nested columns and fields
# having names which are not valid Python identifiers, and must
# be quoted with backticks.
nf = NestedFrame(data={"dog": [1, 2, 3], "good dog": [2, 4, 6]}, index=[0, 1, 2])
nested = pd.DataFrame(
data={"n/a": [0, 2, 4, 1, 4, 3, 1, 4, 1], "n/b": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
)
nf = nf.add_nested(nested, "bad dog")
nfx = nf.eval("`bad dog`.`n/c` = `bad dog`.`n/b` + 2.5")
# The number of columns at the top should not have changed
assert len(nfx.columns) == len(nf.columns)
assert (nfx["bad dog"].nest["n/c"] == nf["bad dog"].nest["n/b"] + 2.5).all()


def test_access_non_existing_column():
"""Test that accessing a non-existing column raises a KeyError"""
Expand Down

0 comments on commit 0d7ca7c

Please sign in to comment.