CoW: Avoid warnings in stata code (pandas-dev#56392)

MichaelTiemannOSC · Dec 15, 2023 · d77d5e5 · d77d5e5
1 parent bb14870
commit d77d5e5
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 27 deletions.
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -342,10 +342,7 @@ def convert_delta_safe(base, deltas, unit) -> Series:
     has_bad_values = False
     if bad_locs.any():
         has_bad_values = True
-        # reset cache to avoid SettingWithCopy checks (we own the DataFrame and the
-        # `dates` Series is used to overwrite itself in the DataFramae)
-        dates._reset_cacher()
-        dates[bad_locs] = 1.0  # Replace with NaT
+        dates._values[bad_locs] = 1.0  # Replace with NaT
     dates = dates.astype(np.int64)
 
     if fmt.startswith(("%tc", "tc")):  # Delta ms relative to base
@@ -462,11 +459,10 @@ def g(x: datetime) -> int:
     bad_loc = isna(dates)
     index = dates.index
     if bad_loc.any():
-        dates = Series(dates)
         if lib.is_np_dtype(dates.dtype, "M"):
-            dates[bad_loc] = to_datetime(stata_epoch)
+            dates._values[bad_loc] = to_datetime(stata_epoch)
         else:
-            dates[bad_loc] = stata_epoch
+            dates._values[bad_loc] = stata_epoch
 
     if fmt in ["%tc", "tc"]:
         d = parse_dates_safe(dates, delta=True)
@@ -596,9 +592,8 @@ def _cast_to_stata_types(data: DataFrame) -> DataFrame:
     for col in data:
         # Cast from unsupported types to supported types
         is_nullable_int = isinstance(data[col].dtype, (IntegerDtype, BooleanDtype))
-        orig = data[col]
         # We need to find orig_missing before altering data below
-        orig_missing = orig.isna()
+        orig_missing = data[col].isna()
         if is_nullable_int:
             missing_loc = data[col].isna()
             if missing_loc.any():
@@ -1780,15 +1775,15 @@ def read(
         for idx in valid_dtypes:
             dtype = data.iloc[:, idx].dtype
             if dtype not in (object_type, self._dtyplist[idx]):
-                data.iloc[:, idx] = data.iloc[:, idx].astype(dtype)
+                data.isetitem(idx, data.iloc[:, idx].astype(dtype))
 
         data = self._do_convert_missing(data, convert_missing)
 
         if convert_dates:
             for i, fmt in enumerate(self._fmtlist):
                 if any(fmt.startswith(date_fmt) for date_fmt in _date_formats):
-                    data.iloc[:, i] = _stata_elapsed_date_to_datetime_vec(
-                        data.iloc[:, i], fmt
+                    data.isetitem(
+                        i, _stata_elapsed_date_to_datetime_vec(data.iloc[:, i], fmt)
                     )
 
         if convert_categoricals and self._format_version > 108:
@@ -1863,7 +1858,7 @@ def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFra
             replacements[i] = replacement
         if replacements:
             for idx, value in replacements.items():
-                data.iloc[:, idx] = value
+                data.isetitem(idx, value)
         return data
 
     def _insert_strls(self, data: DataFrame) -> DataFrame:
@@ -1873,7 +1868,7 @@ def _insert_strls(self, data: DataFrame) -> DataFrame:
             if typ != "Q":
                 continue
             # Wrap v_o in a string to allow uint64 values as keys on 32bit OS
-            data.iloc[:, i] = [self.GSO[str(k)] for k in data.iloc[:, i]]
+            data.isetitem(i, [self.GSO[str(k)] for k in data.iloc[:, i]])
         return data
 
     def _do_select_columns(self, data: DataFrame, columns: Sequence[str]) -> DataFrame:

diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
@@ -289,8 +289,6 @@ def test_read_expands_user_home_dir(
         ):
             reader(path)
 
-    # TODO(CoW-warn) avoid warnings in the stata reader code
-    @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
     @pytest.mark.parametrize(
         "reader, module, path",
         [

diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
@@ -32,11 +32,6 @@
     read_stata,
 )
 
-# TODO(CoW-warn) avoid warnings in the stata reader code
-pytestmark = pytest.mark.filterwarnings(
-    "ignore:Setting a value on a view:FutureWarning"
-)
-
 
 @pytest.fixture
 def mixed_frame():
@@ -140,7 +135,6 @@ def test_read_dta1(self, file, datapath):
 
         tm.assert_frame_equal(parsed, expected)
 
-    @pytest.mark.filterwarnings("always")
     def test_read_dta2(self, datapath):
         expected = DataFrame.from_records(
             [
@@ -183,13 +177,11 @@ def test_read_dta2(self, datapath):
         path2 = datapath("io", "data", "stata", "stata2_115.dta")
         path3 = datapath("io", "data", "stata", "stata2_117.dta")
 
-        # TODO(CoW-warn) avoid warnings in the stata reader code
-        # once fixed -> remove `raise_on_extra_warnings=False` again
-        with tm.assert_produces_warning(UserWarning, raise_on_extra_warnings=False):
+        with tm.assert_produces_warning(UserWarning):
             parsed_114 = self.read_dta(path1)
-        with tm.assert_produces_warning(UserWarning, raise_on_extra_warnings=False):
+        with tm.assert_produces_warning(UserWarning):
             parsed_115 = self.read_dta(path2)
-        with tm.assert_produces_warning(UserWarning, raise_on_extra_warnings=False):
+        with tm.assert_produces_warning(UserWarning):
             parsed_117 = self.read_dta(path3)
             # FIXME: don't leave commented-out
             # 113 is buggy due to limits of date format support in Stata