Skip to content

Commit

Permalink
TEST-#6708: Create test files using 'tmp_path' fixture
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev committed Nov 5, 2023
1 parent 7a9415e commit e5169b2
Show file tree
Hide file tree
Showing 4 changed files with 213 additions and 278 deletions.
55 changes: 18 additions & 37 deletions modin/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,63 +306,46 @@ def pytest_runtest_call(item):


@pytest.fixture(scope="class")
def TestReadCSVFixture():
def TestReadCSVFixture(tmp_path_factory):
tmp_path = tmp_path_factory.mktemp("TestReadCSVFixture")
filenames = []
files_ids = [
"test_read_csv_regular",
"test_read_csv_blank_lines",
"test_read_csv_yes_no",
"test_read_csv_nans",
"test_read_csv_bad_lines",
]
# each xdist worker spawned in separate process with separate namespace and dataset
pytest.csvs_names = {file_id: get_unique_filename() for file_id in files_ids}
pytest.csvs_names = {}
# test_read_csv_col_handling, test_read_csv_parsing
_make_csv_file(filenames)(
filename=pytest.csvs_names["test_read_csv_regular"],
)
pytest.csvs_names["test_read_csv_regular"] = _make_csv_file(data_dir=tmp_path)()
# test_read_csv_parsing
_make_csv_file(filenames)(
filename=pytest.csvs_names["test_read_csv_yes_no"],
pytest.csvs_names["test_read_csv_yes_no"] = _make_csv_file(data_dir=tmp_path)(
additional_col_values=["Yes", "true", "No", "false"],
)
# test_read_csv_col_handling
_make_csv_file(filenames)(
filename=pytest.csvs_names["test_read_csv_blank_lines"],
pytest.csvs_names["test_read_csv_blank_lines"] = _make_csv_file(data_dir=tmp_path)(
add_blank_lines=True,
)
# test_read_csv_nans_handling
_make_csv_file(filenames)(
filename=pytest.csvs_names["test_read_csv_nans"],
pytest.csvs_names["test_read_csv_nans"] = _make_csv_file(data_dir=tmp_path)(
add_blank_lines=True,
additional_col_values=["<NA>", "N/A", "NA", "NULL", "custom_nan", "73"],
)
# test_read_csv_error_handling
_make_csv_file(filenames)(
filename=pytest.csvs_names["test_read_csv_bad_lines"],
pytest.csvs_names["test_read_csv_bad_lines"] = _make_csv_file(data_dir=tmp_path)(
add_bad_lines=True,
)

filenames.extend(pytest.csvs_names.values())
yield
# Delete csv files that were created
teardown_test_files(filenames)


@pytest.fixture
@doc(_doc_pytest_fixture, file_type="csv")
def make_csv_file():
filenames = []

yield _make_csv_file(filenames)

# Delete csv files that were created
teardown_test_files(filenames)
def make_csv_file(tmp_path):
yield _make_csv_file(data_dir=tmp_path)


def create_fixture(file_type):
@doc(_doc_pytest_fixture, file_type=file_type)
def fixture():
func, filenames = make_default_file(file_type=file_type)
def fixture(tmp_path):
func, filenames = make_default_file(file_type=file_type, data_dir=tmp_path)
yield func
teardown_test_files(filenames)

Expand Down Expand Up @@ -476,20 +459,18 @@ def _sql_connection(filename, table=""):


@pytest.fixture(scope="class")
def TestReadGlobCSVFixture():
filenames = []
def TestReadGlobCSVFixture(tmp_path_factory):
tmp_path = tmp_path_factory.mktemp("TestReadGlobCSVFixture")

Check warning on line 463 in modin/conftest.py

View check run for this annotation

Codecov / codecov/patch

modin/conftest.py#L463

Added line #L463 was not covered by tests

base_name = get_unique_filename(extension="")
pytest.glob_path = "{}_*.csv".format(base_name)
pytest.files = ["{}_{}.csv".format(base_name, i) for i in range(11)]
pytest.glob_path = str(tmp_path / "{}_*.csv".format(base_name))
pytest.files = [str(tmp_path / "{}_{}.csv".format(base_name, i)) for i in range(11)]

Check warning on line 467 in modin/conftest.py

View check run for this annotation

Codecov / codecov/patch

modin/conftest.py#L466-L467

Added lines #L466 - L467 were not covered by tests
for fname in pytest.files:
# Glob does not guarantee ordering so we have to remove the randomness in the generated csvs.
_make_csv_file(filenames)(fname, row_size=11, remove_randomness=True)
_make_csv_file(data_dir=tmp_path)(fname, row_size=11, remove_randomness=True)

Check warning on line 470 in modin/conftest.py

View check run for this annotation

Codecov / codecov/patch

modin/conftest.py#L470

Added line #L470 was not covered by tests

yield

teardown_test_files(filenames)


@pytest.fixture
def get_generated_doc_urls():
Expand Down
23 changes: 11 additions & 12 deletions modin/experimental/pandas/test/test_io_exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
def test_from_sql_distributed(tmp_path, make_sql_connection):
filename = "test_from_sql_distributed.db"
table = "test_from_sql_distributed"
conn = make_sql_connection(tmp_path / filename, table)
conn = make_sql_connection(str(tmp_path / filename), table)
query = "select * from {0}".format(table)

pandas_df = pandas.read_sql(query, conn)
Expand Down Expand Up @@ -73,7 +73,7 @@ def test_from_sql_distributed(tmp_path, make_sql_connection):
def test_from_sql_defaults(tmp_path, make_sql_connection):
filename = "test_from_sql_distributed.db"
table = "test_from_sql_distributed"
conn = make_sql_connection(tmp_path / filename, table)
conn = make_sql_connection(str(tmp_path / filename), table)
query = "select * from {0}".format(table)

pandas_df = pandas.read_sql(query, conn)
Expand Down Expand Up @@ -134,15 +134,15 @@ def test_read_csv_without_glob(self):
storage_options={"anon": True},
)

def test_read_csv_glob_4373(self):
def test_read_csv_glob_4373(self, tmp_path):
columns, filename = ["col0"], "1x1.csv"
df = pd.DataFrame([[1]], columns=columns)
with (
warns_that_defaulting_to_pandas()
if Engine.get() == "Dask"
else contextlib.nullcontext()
):
df.to_csv(filename)
df.to_csv(str(tmp_path / filename))

kwargs = {"filepath_or_buffer": filename, "usecols": columns}
modin_df = pd.read_csv_glob(**kwargs)
Expand Down Expand Up @@ -203,9 +203,6 @@ def _pandas_read_csv_glob(path, storage_options):
)


test_default_to_pickle_filename = "test_default_to_pickle.pkl"


@pytest.mark.skipif(
Engine.get() not in ("Ray", "Unidist", "Dask"),
reason=f"{Engine.get()} does not have experimental API",
Expand Down Expand Up @@ -245,9 +242,9 @@ def _pandas_read_csv_glob(path, storage_options):
)
@pytest.mark.parametrize("compression", [None, "gzip"])
@pytest.mark.parametrize(
"filename", [test_default_to_pickle_filename, "test_to_pickle*.pkl"]
"filename", ["test_default_to_pickle.pkl", "test_to_pickle*.pkl"]
)
def test_distributed_pickling(filename, compression):
def test_distributed_pickling(tmp_path, filename, compression):
data = test_data["int_data"]
df = pd.DataFrame(data)

Expand All @@ -257,11 +254,13 @@ def test_distributed_pickling(filename, compression):

with (
warns_that_defaulting_to_pandas()
if filename_param == test_default_to_pickle_filename
if filename_param == "test_default_to_pickle.pkl"
else contextlib.nullcontext()
):
df.to_pickle_distributed(filename, compression=compression)
pickled_df = pd.read_pickle_distributed(filename, compression=compression)
df.to_pickle_distributed(str(tmp_path / filename), compression=compression)
pickled_df = pd.read_pickle_distributed(
str(tmp_path / filename), compression=compression
)
df_equals(pickled_df, df)

pickle_files = glob.glob(filename)
Expand Down
Loading

0 comments on commit e5169b2

Please sign in to comment.