TEST-#6708: Create test files using 'tmp_path' fixture

Signed-off-by: Anatoly Myachev <[email protected]>
modin-project · Nov 5, 2023 · e5169b2 · e5169b2
1 parent 7a9415e
commit e5169b2
Show file tree

Hide file tree

Showing 4 changed files with 213 additions and 278 deletions.
diff --git a/modin/conftest.py b/modin/conftest.py
@@ -306,63 +306,46 @@ def pytest_runtest_call(item):
 
 
 @pytest.fixture(scope="class")
-def TestReadCSVFixture():
+def TestReadCSVFixture(tmp_path_factory):
+    tmp_path = tmp_path_factory.mktemp("TestReadCSVFixture")
     filenames = []
-    files_ids = [
-        "test_read_csv_regular",
-        "test_read_csv_blank_lines",
-        "test_read_csv_yes_no",
-        "test_read_csv_nans",
-        "test_read_csv_bad_lines",
-    ]
     # each xdist worker spawned in separate process with separate namespace and dataset
-    pytest.csvs_names = {file_id: get_unique_filename() for file_id in files_ids}
+    pytest.csvs_names = {}
     # test_read_csv_col_handling, test_read_csv_parsing
-    _make_csv_file(filenames)(
-        filename=pytest.csvs_names["test_read_csv_regular"],
-    )
+    pytest.csvs_names["test_read_csv_regular"] = _make_csv_file(data_dir=tmp_path)()
     # test_read_csv_parsing
-    _make_csv_file(filenames)(
-        filename=pytest.csvs_names["test_read_csv_yes_no"],
+    pytest.csvs_names["test_read_csv_yes_no"] = _make_csv_file(data_dir=tmp_path)(
         additional_col_values=["Yes", "true", "No", "false"],
     )
     # test_read_csv_col_handling
-    _make_csv_file(filenames)(
-        filename=pytest.csvs_names["test_read_csv_blank_lines"],
+    pytest.csvs_names["test_read_csv_blank_lines"] = _make_csv_file(data_dir=tmp_path)(
         add_blank_lines=True,
     )
     # test_read_csv_nans_handling
-    _make_csv_file(filenames)(
-        filename=pytest.csvs_names["test_read_csv_nans"],
+    pytest.csvs_names["test_read_csv_nans"] = _make_csv_file(data_dir=tmp_path)(
         add_blank_lines=True,
         additional_col_values=["<NA>", "N/A", "NA", "NULL", "custom_nan", "73"],
     )
     # test_read_csv_error_handling
-    _make_csv_file(filenames)(
-        filename=pytest.csvs_names["test_read_csv_bad_lines"],
+    pytest.csvs_names["test_read_csv_bad_lines"] = _make_csv_file(data_dir=tmp_path)(
         add_bad_lines=True,
     )
-
+    filenames.extend(pytest.csvs_names.values())
     yield
     # Delete csv files that were created
     teardown_test_files(filenames)
 
 
 @pytest.fixture
 @doc(_doc_pytest_fixture, file_type="csv")
-def make_csv_file():
-    filenames = []
-
-    yield _make_csv_file(filenames)
-
-    # Delete csv files that were created
-    teardown_test_files(filenames)
+def make_csv_file(tmp_path):
+    yield _make_csv_file(data_dir=tmp_path)
 
 
 def create_fixture(file_type):
     @doc(_doc_pytest_fixture, file_type=file_type)
-    def fixture():
-        func, filenames = make_default_file(file_type=file_type)
+    def fixture(tmp_path):
+        func, filenames = make_default_file(file_type=file_type, data_dir=tmp_path)
         yield func
         teardown_test_files(filenames)
 
@@ -476,20 +459,18 @@ def _sql_connection(filename, table=""):
 
 
 @pytest.fixture(scope="class")
-def TestReadGlobCSVFixture():
-    filenames = []
+def TestReadGlobCSVFixture(tmp_path_factory):
+    tmp_path = tmp_path_factory.mktemp("TestReadGlobCSVFixture")
 
     base_name = get_unique_filename(extension="")
-    pytest.glob_path = "{}_*.csv".format(base_name)
-    pytest.files = ["{}_{}.csv".format(base_name, i) for i in range(11)]
+    pytest.glob_path = str(tmp_path / "{}_*.csv".format(base_name))
+    pytest.files = [str(tmp_path / "{}_{}.csv".format(base_name, i)) for i in range(11)]
     for fname in pytest.files:
         # Glob does not guarantee ordering so we have to remove the randomness in the generated csvs.
-        _make_csv_file(filenames)(fname, row_size=11, remove_randomness=True)
+        _make_csv_file(data_dir=tmp_path)(fname, row_size=11, remove_randomness=True)
 
     yield
 
-    teardown_test_files(filenames)
-
 
 @pytest.fixture
 def get_generated_doc_urls():

diff --git a/modin/experimental/pandas/test/test_io_exp.py b/modin/experimental/pandas/test/test_io_exp.py
@@ -41,7 +41,7 @@
 def test_from_sql_distributed(tmp_path, make_sql_connection):
     filename = "test_from_sql_distributed.db"
     table = "test_from_sql_distributed"
-    conn = make_sql_connection(tmp_path / filename, table)
+    conn = make_sql_connection(str(tmp_path / filename), table)
     query = "select * from {0}".format(table)
 
     pandas_df = pandas.read_sql(query, conn)
@@ -73,7 +73,7 @@ def test_from_sql_distributed(tmp_path, make_sql_connection):
 def test_from_sql_defaults(tmp_path, make_sql_connection):
     filename = "test_from_sql_distributed.db"
     table = "test_from_sql_distributed"
-    conn = make_sql_connection(tmp_path / filename, table)
+    conn = make_sql_connection(str(tmp_path / filename), table)
     query = "select * from {0}".format(table)
 
     pandas_df = pandas.read_sql(query, conn)
@@ -134,15 +134,15 @@ def test_read_csv_without_glob(self):
                     storage_options={"anon": True},
                 )
 
-    def test_read_csv_glob_4373(self):
+    def test_read_csv_glob_4373(self, tmp_path):
         columns, filename = ["col0"], "1x1.csv"
         df = pd.DataFrame([[1]], columns=columns)
         with (
             warns_that_defaulting_to_pandas()
             if Engine.get() == "Dask"
             else contextlib.nullcontext()
         ):
-            df.to_csv(filename)
+            df.to_csv(str(tmp_path / filename))
 
         kwargs = {"filepath_or_buffer": filename, "usecols": columns}
         modin_df = pd.read_csv_glob(**kwargs)
@@ -203,9 +203,6 @@ def _pandas_read_csv_glob(path, storage_options):
     )
 
 
-test_default_to_pickle_filename = "test_default_to_pickle.pkl"
-
-
 @pytest.mark.skipif(
     Engine.get() not in ("Ray", "Unidist", "Dask"),
     reason=f"{Engine.get()} does not have experimental API",
@@ -245,9 +242,9 @@ def _pandas_read_csv_glob(path, storage_options):
 )
 @pytest.mark.parametrize("compression", [None, "gzip"])
 @pytest.mark.parametrize(
-    "filename", [test_default_to_pickle_filename, "test_to_pickle*.pkl"]
+    "filename", ["test_default_to_pickle.pkl", "test_to_pickle*.pkl"]
 )
-def test_distributed_pickling(filename, compression):
+def test_distributed_pickling(tmp_path, filename, compression):
     data = test_data["int_data"]
     df = pd.DataFrame(data)
 
@@ -257,11 +254,13 @@ def test_distributed_pickling(filename, compression):
 
     with (
         warns_that_defaulting_to_pandas()
-        if filename_param == test_default_to_pickle_filename
+        if filename_param == "test_default_to_pickle.pkl"
         else contextlib.nullcontext()
     ):
-        df.to_pickle_distributed(filename, compression=compression)
-        pickled_df = pd.read_pickle_distributed(filename, compression=compression)
+        df.to_pickle_distributed(str(tmp_path / filename), compression=compression)
+        pickled_df = pd.read_pickle_distributed(
+            str(tmp_path / filename), compression=compression
+        )
     df_equals(pickled_df, df)
 
     pickle_files = glob.glob(filename)