Skip to content

Commit

Permalink
CoW: Avoid warnings in stata code (pandas-dev#56392)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Dec 15, 2023
1 parent bb14870 commit d77d5e5
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 27 deletions.
23 changes: 9 additions & 14 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,10 +342,7 @@ def convert_delta_safe(base, deltas, unit) -> Series:
has_bad_values = False
if bad_locs.any():
has_bad_values = True
# reset cache to avoid SettingWithCopy checks (we own the DataFrame and the
# `dates` Series is used to overwrite itself in the DataFramae)
dates._reset_cacher()
dates[bad_locs] = 1.0 # Replace with NaT
dates._values[bad_locs] = 1.0 # Replace with NaT
dates = dates.astype(np.int64)

if fmt.startswith(("%tc", "tc")): # Delta ms relative to base
Expand Down Expand Up @@ -462,11 +459,10 @@ def g(x: datetime) -> int:
bad_loc = isna(dates)
index = dates.index
if bad_loc.any():
dates = Series(dates)
if lib.is_np_dtype(dates.dtype, "M"):
dates[bad_loc] = to_datetime(stata_epoch)
dates._values[bad_loc] = to_datetime(stata_epoch)
else:
dates[bad_loc] = stata_epoch
dates._values[bad_loc] = stata_epoch

if fmt in ["%tc", "tc"]:
d = parse_dates_safe(dates, delta=True)
Expand Down Expand Up @@ -596,9 +592,8 @@ def _cast_to_stata_types(data: DataFrame) -> DataFrame:
for col in data:
# Cast from unsupported types to supported types
is_nullable_int = isinstance(data[col].dtype, (IntegerDtype, BooleanDtype))
orig = data[col]
# We need to find orig_missing before altering data below
orig_missing = orig.isna()
orig_missing = data[col].isna()
if is_nullable_int:
missing_loc = data[col].isna()
if missing_loc.any():
Expand Down Expand Up @@ -1780,15 +1775,15 @@ def read(
for idx in valid_dtypes:
dtype = data.iloc[:, idx].dtype
if dtype not in (object_type, self._dtyplist[idx]):
data.iloc[:, idx] = data.iloc[:, idx].astype(dtype)
data.isetitem(idx, data.iloc[:, idx].astype(dtype))

data = self._do_convert_missing(data, convert_missing)

if convert_dates:
for i, fmt in enumerate(self._fmtlist):
if any(fmt.startswith(date_fmt) for date_fmt in _date_formats):
data.iloc[:, i] = _stata_elapsed_date_to_datetime_vec(
data.iloc[:, i], fmt
data.isetitem(
i, _stata_elapsed_date_to_datetime_vec(data.iloc[:, i], fmt)
)

if convert_categoricals and self._format_version > 108:
Expand Down Expand Up @@ -1863,7 +1858,7 @@ def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFra
replacements[i] = replacement
if replacements:
for idx, value in replacements.items():
data.iloc[:, idx] = value
data.isetitem(idx, value)
return data

def _insert_strls(self, data: DataFrame) -> DataFrame:
Expand All @@ -1873,7 +1868,7 @@ def _insert_strls(self, data: DataFrame) -> DataFrame:
if typ != "Q":
continue
# Wrap v_o in a string to allow uint64 values as keys on 32bit OS
data.iloc[:, i] = [self.GSO[str(k)] for k in data.iloc[:, i]]
data.isetitem(i, [self.GSO[str(k)] for k in data.iloc[:, i]])
return data

def _do_select_columns(self, data: DataFrame, columns: Sequence[str]) -> DataFrame:
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,6 @@ def test_read_expands_user_home_dir(
):
reader(path)

# TODO(CoW-warn) avoid warnings in the stata reader code
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
@pytest.mark.parametrize(
"reader, module, path",
[
Expand Down
14 changes: 3 additions & 11 deletions pandas/tests/io/test_stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,6 @@
read_stata,
)

# TODO(CoW-warn) avoid warnings in the stata reader code
pytestmark = pytest.mark.filterwarnings(
"ignore:Setting a value on a view:FutureWarning"
)


@pytest.fixture
def mixed_frame():
Expand Down Expand Up @@ -140,7 +135,6 @@ def test_read_dta1(self, file, datapath):

tm.assert_frame_equal(parsed, expected)

@pytest.mark.filterwarnings("always")
def test_read_dta2(self, datapath):
expected = DataFrame.from_records(
[
Expand Down Expand Up @@ -183,13 +177,11 @@ def test_read_dta2(self, datapath):
path2 = datapath("io", "data", "stata", "stata2_115.dta")
path3 = datapath("io", "data", "stata", "stata2_117.dta")

# TODO(CoW-warn) avoid warnings in the stata reader code
# once fixed -> remove `raise_on_extra_warnings=False` again
with tm.assert_produces_warning(UserWarning, raise_on_extra_warnings=False):
with tm.assert_produces_warning(UserWarning):
parsed_114 = self.read_dta(path1)
with tm.assert_produces_warning(UserWarning, raise_on_extra_warnings=False):
with tm.assert_produces_warning(UserWarning):
parsed_115 = self.read_dta(path2)
with tm.assert_produces_warning(UserWarning, raise_on_extra_warnings=False):
with tm.assert_produces_warning(UserWarning):
parsed_117 = self.read_dta(path3)
# FIXME: don't leave commented-out
# 113 is buggy due to limits of date format support in Stata
Expand Down

0 comments on commit d77d5e5

Please sign in to comment.