Skip to content

Commit

Permalink
ENH: propagating attrs always uses deepcopy
Browse files Browse the repository at this point in the history
Always using a deepcopy prevents shared state and thus unintentional
modification of the attrs of other objects. IMHO this safety has a
higher priority than the slight performance cost of the deepcopy.
The implementation now skips the copying if *attrs* are not used (i.e.
an empty dict). This check takes only ~20ns. Thus, the attrs propagation
mechanism has no performance impact if attrs are not used.

Closes pandas-dev#54134.
  • Loading branch information
Tim Hoffmann committed Sep 28, 2023
1 parent 824a273 commit 99d67a5
Showing 1 changed file with 19 additions and 7 deletions.
26 changes: 19 additions & 7 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from __future__ import annotations

import collections
import copy
import datetime as dt
from functools import partial
import gc
Expand Down Expand Up @@ -364,6 +365,11 @@ def attrs(self) -> dict[Hashable, Any]:
attrs is experimental and may change without warning.
Many operations that create new datasets will copy *attrs*. Copies are
always deep so that changing *attrs* will only affect the present
dataset. `pandas.concatenate` copies *attrs* only if all input
datasets have the same *attrs*.
See Also
--------
DataFrame.flags : Global flags applying to this object.
Expand Down Expand Up @@ -6191,8 +6197,12 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self:
stable across pandas releases.
"""
if isinstance(other, NDFrame):
for name in other.attrs:
self.attrs[name] = other.attrs[name]
if other.attrs:
# One could make the deepcopy unconditionally, however a deepcopy
# of an empty dict is 50x more expensive than the empty check.
# However, we want attrs propagation to have minimal performance
# impact it attrs are not used; i.e. attrs is an empty dict.
self.attrs = copy.deepcopy(other.attrs)

self.flags.allows_duplicate_labels = other.flags.allows_duplicate_labels
# For subclasses using _metadata.
Expand All @@ -6201,11 +6211,13 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self:
object.__setattr__(self, name, getattr(other, name, None))

if method == "concat":
attrs = other.objs[0].attrs
check_attrs = all(objs.attrs == attrs for objs in other.objs[1:])
if check_attrs:
for name in attrs:
self.attrs[name] = attrs[name]
# propagate attrs only if all concat arguments have the same attrs
if all(bool(obj.attrs) for obj in other.objs):
# all concatenate arguments have non-empty attrs
attrs = other.objs[0].attrs
have_same_attrs = all(obj.attrs == attrs for obj in other.objs[1:])
if have_same_attrs:
self.attrs = copy.deepcopy(attrs)

allows_duplicate_labels = all(
x.flags.allows_duplicate_labels for x in other.objs
Expand Down

0 comments on commit 99d67a5

Please sign in to comment.