From 81d708f7fc32b3cacd4fc7a547a512cbff81f450 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Bel=C3=A1k?= <michal.belak@datamole.cz>
Date: Thu, 29 Feb 2024 16:08:48 +0100
Subject: [PATCH 1/4] test: use fixtures

---
 tests/test_bivariate_analysis.py          | 27 ++++----
 tests/test_group_analysis.py              | 84 +++++++++++------------
 tests/test_multivariate_analysis.py       | 42 ++++++------
 tests/test_overview_section.py            | 27 ++++----
 tests/test_report.py                      | 22 +++---
 tests/test_timeseries_analysis.py         | 33 ++++-----
 tests/test_univariate_analysis_section.py | 15 ++--
 7 files changed, 119 insertions(+), 131 deletions(-)

diff --git a/tests/test_bivariate_analysis.py b/tests/test_bivariate_analysis.py
index 0f52775..6b1460e 100644
--- a/tests/test_bivariate_analysis.py
+++ b/tests/test_bivariate_analysis.py
@@ -14,7 +14,8 @@
 from .pyarrow_utils import pyarrow_parameterize
 
 
-def get_test_df(pyarrow_dtypes: bool = False) -> pd.DataFrame:
+@pytest.fixture
+def test_df(pyarrow_dtypes: bool = False) -> pd.DataFrame:
     test_df = pd.DataFrame(data=[[1.1, "a"], [2.2, "b"], [3.3, "c"]], columns=["A", "B"])
     if pyarrow_dtypes:
         test_df = test_df.convert_dtypes(dtype_backend="pyarrow")
@@ -125,7 +126,7 @@ def test_section_adding():
     ), "Subsection should be ContingencyTable"
 
 
-def test_code_export_verbosity_low():
+def test_code_export_verbosity_low(test_df: pd.DataFrame):
     bivariate_section = bivariate_analysis.BivariateAnalysis(verbosity=Verbosity.LOW)
     # Export code
     exported_cells = []
@@ -138,10 +139,10 @@ def test_code_export_verbosity_low():
     assert len(exported_code) == 1
     assert exported_code[0] == expected_code[0], "Exported code mismatch"
 
-    check_section_executes(bivariate_section, df=get_test_df())
+    check_section_executes(bivariate_section, df=test_df)
 
 
-def test_code_export_verbosity_low_with_subsections():
+def test_code_export_verbosity_low_with_subsections(test_df: pd.DataFrame):
     bivariate_section = bivariate_analysis.BivariateAnalysis(
         subsections=[
             BivariateAnalysisSubsection.ContingencyTable,
@@ -164,7 +165,7 @@ def test_code_export_verbosity_low_with_subsections():
     assert len(exported_code) == 1
     assert exported_code[0] == expected_code[0], "Exported code mismatch"
 
-    check_section_executes(bivariate_section, df=get_test_df())
+    check_section_executes(bivariate_section, df=test_df)
 
 
 def test_generated_code_verbosity_low_columns():
@@ -209,7 +210,7 @@ def test_generated_code_verbosity_low_columns():
     check_section_executes(bivariate_section, df=test_df)
 
 
-def test_generated_code_verbosity_medium():
+def test_generated_code_verbosity_medium(test_df: pd.DataFrame):
     bivariate_section = bivariate_analysis.BivariateAnalysis(
         verbosity=Verbosity.MEDIUM,
         subsections=[
@@ -233,7 +234,7 @@ def test_generated_code_verbosity_medium():
     for expected_line, exported_line in zip(expected_code, exported_code):
         assert expected_line == exported_line, "Exported code mismatch"
 
-    check_section_executes(bivariate_section, df=get_test_df())
+    check_section_executes(bivariate_section, df=test_df)
 
 
 def test_generated_code_verbosity_medium_columns_x_y():
@@ -307,7 +308,7 @@ def test_generated_code_verbosity_medium_columns_pairs():
     check_section_executes(bivariate_section, df=test_df)
 
 
-def test_generated_code_verbosity_high():
+def test_generated_code_verbosity_high(test_df: pd.DataFrame):
     bivariate_section = bivariate_analysis.BivariateAnalysis(
         verbosity=Verbosity.HIGH,
         subsections=[
@@ -345,10 +346,10 @@ def test_generated_code_verbosity_high():
     for expected_line, exported_line in zip(expected_code, exported_code):
         assert expected_line == exported_line, "Exported code mismatch"
 
-    check_section_executes(bivariate_section, df=get_test_df())
+    check_section_executes(bivariate_section, df=test_df)
 
 
-def test_verbosity_low_different_subsection_verbosities():
+def test_verbosity_low_different_subsection_verbosities(test_df: pd.DataFrame):
     bivariate_section = BivariateAnalysis(
         verbosity=Verbosity.LOW,
         subsections=[
@@ -377,7 +378,7 @@ def test_verbosity_low_different_subsection_verbosities():
     for expected_line, exported_line in zip(expected_code, exported_code):
         assert expected_line == exported_line, "Exported code mismatch"
 
-    check_section_executes(bivariate_section, df=get_test_df())
+    check_section_executes(bivariate_section, df=test_df)
 
 
 def test_imports_verbosity_low():
@@ -450,9 +451,9 @@ def test_imports_verbosity_low_different_subsection_verbosities():
 
 
 @pyarrow_parameterize
-def test_show(pyarrow_dtypes: bool):
+def test_show(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     bivariate_section = BivariateAnalysis()
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", UserWarning)
         with redirect_stdout(None):
-            bivariate_section.show(get_test_df(pyarrow_dtypes=pyarrow_dtypes))
+            bivariate_section.show(test_df)
diff --git a/tests/test_group_analysis.py b/tests/test_group_analysis.py
index eeb6c67..6a431df 100644
--- a/tests/test_group_analysis.py
+++ b/tests/test_group_analysis.py
@@ -28,7 +28,8 @@
 plotly.io.renderers.default = "json"
 
 
-def get_test_df(pyarrow_dtypes: bool = False) -> pd.DataFrame:
+@pytest.fixture
+def test_df(pyarrow_dtypes: bool = False) -> pd.DataFrame:
     test_df = pd.DataFrame(
         data=[
             ["P" if np.random.uniform() < 0.4 else "N", 1.5 * i, "X" if i % 2 == 0 else "Y"]
@@ -54,50 +55,46 @@ def test_invalid_verbosities():
 
 
 @pyarrow_parameterize
-def test_groupby_nonexistent_col(pyarrow_dtypes: bool):
+def test_groupby_nonexistent_col(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     with pytest.raises(ValueError):
-        show_group_analysis(df=get_test_df(pyarrow_dtypes=pyarrow_dtypes), groupby=["non-existent"])
+        show_group_analysis(df=test_df, groupby=["non-existent"])
     with pytest.raises(ValueError):
-        group_missing_values(
-            df=get_test_df(pyarrow_dtypes=pyarrow_dtypes), groupby=["non-existent"]
-        )
+        group_missing_values(df=test_df, groupby=["non-existent"])
 
 
 @pyarrow_parameterize
-def test_static_methods(pyarrow_dtypes: bool):
-    df = get_test_df(pyarrow_dtypes=pyarrow_dtypes)
+def test_static_methods(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     with redirect_stdout(None):
-        show_group_analysis(df=df, groupby="C")
-        show_group_analysis(df=df, groupby=["C"], columns=["A"])
-        show_group_analysis(df=df, groupby=["C"], columns=["A", "B"])
-        show_group_analysis(df=df, groupby="C", columns=["A", "B", "C"])
-        show_group_analysis(df=df, groupby="C", columns=["C"])
+        show_group_analysis(df=test_df, groupby="C")
+        show_group_analysis(df=test_df, groupby=["C"], columns=["A"])
+        show_group_analysis(df=test_df, groupby=["C"], columns=["A", "B"])
+        show_group_analysis(df=test_df, groupby="C", columns=["A", "B", "C"])
+        show_group_analysis(df=test_df, groupby="C", columns=["C"])
 
-        group_barplot(df, groupby=["A"], column="B")
-        group_barplot(df, groupby=["A"], column="A")
-        group_barplot(df, groupby=["A", "C"], column="B")
-        group_barplot(df, groupby=["A"], column="C")
-        group_barplot(df, groupby=["A"], column="C")
+        group_barplot(test_df, groupby=["A"], column="B")
+        group_barplot(test_df, groupby=["A"], column="A")
+        group_barplot(test_df, groupby=["A", "C"], column="B")
+        group_barplot(test_df, groupby=["A"], column="C")
+        group_barplot(test_df, groupby=["A"], column="C")
 
-        group_missing_values(df, groupby=["C"])
-        group_missing_values(df, groupby=["C"], columns=["A", "B"])
-        group_missing_values(df, groupby=["C"], columns=["A", "B", "C"])
-        group_missing_values(df, groupby=["C"], columns=["C"])
+        group_missing_values(test_df, groupby=["C"])
+        group_missing_values(test_df, groupby=["C"], columns=["A", "B"])
+        group_missing_values(test_df, groupby=["C"], columns=["A", "B", "C"])
+        group_missing_values(test_df, groupby=["C"], columns=["C"])
 
-        overlaid_histograms(df, groupby=["A"], column="B")
-        overlaid_histograms(df, groupby=["A", "C"], column="B")
-        overlaid_histograms(df, groupby=["A", "C"], column="B")
-        overlaid_histograms(df, groupby=["B"], column="B")
+        overlaid_histograms(test_df, groupby=["A"], column="B")
+        overlaid_histograms(test_df, groupby=["A", "C"], column="B")
+        overlaid_histograms(test_df, groupby=["A", "C"], column="B")
+        overlaid_histograms(test_df, groupby=["B"], column="B")
 
 
 @pyarrow_parameterize
-def test_code_export_verbosity_low(pyarrow_dtypes: bool):
-    df = get_test_df(pyarrow_dtypes=pyarrow_dtypes)
+def test_code_export_verbosity_low(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     group_section = GroupAnalysis(groupby="B", verbosity=Verbosity.LOW)
 
     # Export code
     exported_cells = []
-    group_section.add_cells(exported_cells, df=df)
+    group_section.add_cells(exported_cells, df=test_df)
     # Remove markdown and other cells and get code strings
     exported_code = [cell["source"] for cell in exported_cells if cell["cell_type"] == "code"]
     # Define expected code
@@ -106,17 +103,16 @@ def test_code_export_verbosity_low(pyarrow_dtypes: bool):
     assert len(exported_code) == 1
     assert exported_code[0] == expected_code[0], "Exported code mismatch"
 
-    check_section_executes(group_section, df)
+    check_section_executes(group_section, test_df)
 
 
 @pyarrow_parameterize
-def test_code_export_verbosity_medium(pyarrow_dtypes: bool):
-    df = get_test_df(pyarrow_dtypes=pyarrow_dtypes)
+def test_code_export_verbosity_medium(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     group_section = GroupAnalysis(groupby="A", verbosity=Verbosity.MEDIUM)
 
     # Export code
     exported_cells = []
-    group_section.add_cells(exported_cells, df=df)
+    group_section.add_cells(exported_cells, df=test_df)
     # Remove markdown and other cells and get code strings
     exported_code = [cell["source"] for cell in exported_cells if cell["cell_type"] == "code"]
     # Define expected code
@@ -135,17 +131,16 @@ def test_code_export_verbosity_medium(pyarrow_dtypes: bool):
     for expected_line, exported_line in zip(expected_code, exported_code):
         assert expected_line == exported_line, "Exported code mismatch"
 
-    check_section_executes(group_section, df)
+    check_section_executes(group_section, test_df)
 
 
 @pyarrow_parameterize
-def test_code_export_verbosity_high(pyarrow_dtypes: bool):
-    df = get_test_df(pyarrow_dtypes=pyarrow_dtypes)
+def test_code_export_verbosity_high(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     group_section = GroupAnalysis(groupby="A", verbosity=Verbosity.HIGH)
 
     # Export code
     exported_cells = []
-    group_section.add_cells(exported_cells, df=df)
+    group_section.add_cells(exported_cells, df=test_df)
     # Remove markdown and other cells and get code strings
     exported_code = [cell["source"] for cell in exported_cells if cell["cell_type"] == "code"]
     # Define expected code
@@ -192,12 +187,11 @@ def test_code_export_verbosity_high(pyarrow_dtypes: bool):
     for expected_line, exported_line in zip(expected_code, exported_code):
         assert expected_line == exported_line, "Exported code mismatch"
 
-    check_section_executes(group_section, df)
+    check_section_executes(group_section, test_df)
 
 
 @pyarrow_parameterize
-def test_columns_parameter(pyarrow_dtypes: bool):
-    df = get_test_df(pyarrow_dtypes=pyarrow_dtypes)
+def test_columns_parameter(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     ga = GroupAnalysis(groupby="A", columns=["B"])
     assert ga.groupby == ["A"]
     assert ga.columns == ["B"]
@@ -205,8 +199,8 @@ def test_columns_parameter(pyarrow_dtypes: bool):
     ga = GroupAnalysis(groupby="A")
     assert ga.groupby == ["A"]
     assert ga.columns is None
-    ga.show(df)
-    ga.add_cells([], df=df)
+    ga.show(test_df)
+    ga.add_cells([], df=test_df)
     assert ga.groupby == ["A"]
     assert ga.columns is None
 
@@ -218,10 +212,10 @@ def test_column_list_not_modified():
 
 
 @pyarrow_parameterize
-def test_show(pyarrow_dtypes: bool):
-    df = get_test_df(pyarrow_dtypes=pyarrow_dtypes)
+def test_show(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+    df = test_df
     group_section = GroupAnalysis(groupby="A")
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", UserWarning)
         with redirect_stdout(None):
-            group_section.show(df)
+            group_section.show(test_df)
diff --git a/tests/test_multivariate_analysis.py b/tests/test_multivariate_analysis.py
index 8ccdc55..48299ca 100644
--- a/tests/test_multivariate_analysis.py
+++ b/tests/test_multivariate_analysis.py
@@ -27,7 +27,8 @@
 from .pyarrow_utils import pyarrow_parameterize
 
 
-def get_test_df(pyarrow_dtypes: bool = False) -> pd.DataFrame:
+@pytest.fixture
+def test_df(pyarrow_dtypes: bool = False) -> pd.DataFrame:
     test_df = pd.DataFrame(
         data=[
             [1.1, "a", 3.7, 3.9],
@@ -138,8 +139,8 @@ def test_section_adding():
 
 
 @pyarrow_parameterize
-def test_code_export_verbosity_low(pyarrow_dtypes: bool):
-    df = get_test_df(pyarrow_dtypes=pyarrow_dtypes)
+def test_code_export_verbosity_low(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+    df = test_df
     multivariate_section = MultivariateAnalysis(verbosity=Verbosity.LOW)
     # Export code
     exported_cells = []
@@ -156,12 +157,12 @@ def test_code_export_verbosity_low(pyarrow_dtypes: bool):
 
 
 @pyarrow_parameterize
-def test_code_export_verbosity_low_with_subsections(pyarrow_dtypes: bool):
+def test_code_export_verbosity_low_with_subsections(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     subsec = MultivariateAnalysisSubsection
     subsections = [subsec.ParallelCategories, subsec.PCA, subsec.ParallelCoordinates, subsec.PCA]
     if UMAP_AVAILABLE:
         subsections.append(subsec.UMAP)
-    df = get_test_df()
+    df = test_df
     multivariate_section = multivariate_analysis.MultivariateAnalysis(
         subsections=subsections, verbosity=Verbosity.LOW
     )
@@ -196,8 +197,7 @@ def test_code_export_verbosity_low_with_subsections(pyarrow_dtypes: bool):
     check_section_executes(multivariate_section, df)
 
 
-@pyarrow_parameterize
-def test_code_export_verbosity_medium_all_cols_valid(pyarrow_dtypes: bool):
+def test_code_export_verbosity_medium_all_cols_valid():
     all_numeric_df = pd.DataFrame(
         data=[[1.1, 1, -2], [2.2, 2, -5.3], [3.3, 3, 4]], columns=["col1", "col2", "col3"]
     )
@@ -228,12 +228,11 @@ def test_code_export_verbosity_medium_all_cols_valid(pyarrow_dtypes: bool):
 
 
 @pyarrow_parameterize
-def test_generated_code_verbosity_1(pyarrow_dtypes: bool):
+def test_generated_code_verbosity_1(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     multivariate_section = MultivariateAnalysis(verbosity=Verbosity.MEDIUM)
-    df = get_test_df(pyarrow_dtypes=pyarrow_dtypes)
 
     exported_cells = []
-    multivariate_section.add_cells(exported_cells, df=df)
+    multivariate_section.add_cells(exported_cells, df=test_df)
     exported_code = [cell["source"] for cell in exported_cells if cell["cell_type"] == "code"]
     if UMAP_AVAILABLE:
         expected_code = [
@@ -263,16 +262,15 @@ def test_generated_code_verbosity_1(pyarrow_dtypes: bool):
     for expected_line, exported_line in zip(expected_code, exported_code):
         assert expected_line == exported_line, "Exported code mismatch"
 
-    check_section_executes(multivariate_section, df)
+    check_section_executes(multivariate_section, test_df)
 
 
 @pyarrow_parameterize
-def test_generated_code_verbosity_2(pyarrow_dtypes: bool):
-    df = get_test_df(pyarrow_dtypes=pyarrow_dtypes)
+def test_generated_code_verbosity_2(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     multivariate_section = MultivariateAnalysis(verbosity=Verbosity.HIGH)
 
     multivariate_cells = []
-    multivariate_section.add_cells(multivariate_cells, df=df)
+    multivariate_section.add_cells(multivariate_cells, df=test_df)
     exported_code = [cell["source"] for cell in multivariate_cells if cell["cell_type"] == "code"]
     expected_code = [
         get_code(select_numeric_columns),
@@ -331,7 +329,7 @@ def test_generated_code_verbosity_2(pyarrow_dtypes: bool):
     for expected_line, exported_line in zip(expected_code, exported_code):
         assert expected_line == exported_line, "Exported code mismatch"
 
-    check_section_executes(multivariate_section, df)
+    check_section_executes(multivariate_section, test_df)
 
 
 @pyarrow_parameterize
@@ -360,7 +358,9 @@ def test_verbosity_medium_non_categorical_col(pyarrow_dtypes: bool):
 
 
 @pyarrow_parameterize
-def test_verbosity_low_different_subsection_verbosities(pyarrow_dtypes: bool):
+def test_verbosity_low_different_subsection_verbosities(
+    pyarrow_dtypes: bool, test_df: pd.DataFrame
+):
     subsections = [
         MultivariateAnalysisSubsection.PCA,
         MultivariateAnalysisSubsection.PCA,
@@ -369,7 +369,6 @@ def test_verbosity_low_different_subsection_verbosities(pyarrow_dtypes: bool):
     ]
     if UMAP_AVAILABLE:
         subsections.insert(2, MultivariateAnalysisSubsection.UMAP)
-    df = get_test_df(pyarrow_dtypes=pyarrow_dtypes)
     multivariate_section = MultivariateAnalysis(
         verbosity=Verbosity.LOW,
         subsections=subsections,
@@ -378,7 +377,7 @@ def test_verbosity_low_different_subsection_verbosities(pyarrow_dtypes: bool):
     )
 
     multivariate_cells = []
-    multivariate_section.add_cells(multivariate_cells, df=df)
+    multivariate_section.add_cells(multivariate_cells, df=test_df)
     exported_code = [cell["source"] for cell in multivariate_cells if cell["cell_type"] == "code"]
     expected_subsections = [
         "MultivariateAnalysisSubsection.PCA",
@@ -405,7 +404,7 @@ def test_verbosity_low_different_subsection_verbosities(pyarrow_dtypes: bool):
     for expected_line, exported_line in zip(expected_code, exported_code):
         assert expected_line == exported_line, "Exported code mismatch"
 
-    check_section_executes(multivariate_section, df)
+    check_section_executes(multivariate_section, test_df)
 
 
 def test_imports_verbosity_low():
@@ -481,10 +480,9 @@ def test_imports_verbosity_low_different_subsection_verbosities():
 
 
 @pyarrow_parameterize
-def test_show(pyarrow_dtypes: bool):
-    df = get_test_df(pyarrow_dtypes=pyarrow_dtypes)
+def test_show(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     multivariate_section = MultivariateAnalysis()
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", UserWarning)
         with redirect_stdout(None):
-            multivariate_section.show(df)
+            multivariate_section.show(test_df)
diff --git a/tests/test_overview_section.py b/tests/test_overview_section.py
index a1ae9bf..34338f6 100644
--- a/tests/test_overview_section.py
+++ b/tests/test_overview_section.py
@@ -23,7 +23,8 @@
 from .execution_utils import check_section_executes
 
 
-def get_test_df() -> pd.DataFrame:
+@pytest.fixture
+def test_df() -> pd.DataFrame:
     test_df = pd.DataFrame(data=[[1.1, "a"], [2.2, "b"], [3.3, "c"]], columns=["A", "B"])
 
     return test_df
@@ -126,7 +127,7 @@ def test_section_adding():
     ), "Subsection should be DuplicateRows"
 
 
-def test_code_export_verbosity_low():
+def test_code_export_verbosity_low(test_df: pd.DataFrame):
     overview_section = Overview(verbosity=Verbosity.LOW)
     # Export code
     exported_cells = []
@@ -138,10 +139,10 @@ def test_code_export_verbosity_low():
     # Test code equivalence
     assert exported_code[0] == expected_code[0], "Exported code mismatch"
 
-    check_section_executes(overview_section, df=get_test_df())
+    check_section_executes(overview_section, df=test_df)
 
 
-def test_code_export_verbosity_low_with_subsections():
+def test_code_export_verbosity_low_with_subsections(test_df: pd.DataFrame):
     overview_section = Overview(
         subsections=[
             OverviewSubsection.QuickInfo,
@@ -162,10 +163,10 @@ def test_code_export_verbosity_low_with_subsections():
     # Test code equivalence
     assert exported_code[0] == expected_code[0], "Exported code mismatch"
 
-    check_section_executes(overview_section, df=get_test_df())
+    check_section_executes(overview_section, df=test_df)
 
 
-def test_code_export_verbosity_medium():
+def test_code_export_verbosity_medium(test_df: pd.DataFrame):
     # Construct overview section
     overview_section = Overview(
         subsections=[
@@ -198,10 +199,10 @@ def test_code_export_verbosity_medium():
     for i in range(len(exported_code)):
         assert exported_code[i] == expected_code[i], "Exported code mismatch"
 
-    check_section_executes(overview_section, df=get_test_df())
+    check_section_executes(overview_section, df=test_df)
 
 
-def test_code_export_verbosity_high():
+def test_code_export_verbosity_high(test_df: pd.DataFrame):
     # Construct overview section
     overview_section = Overview(
         subsections=[
@@ -278,10 +279,10 @@ def test_code_export_verbosity_high():
     for i in range(len(exported_code)):
         assert exported_code[i] == expected_code[i], "Exported code mismatch"
 
-    check_section_executes(overview_section, df=get_test_df())
+    check_section_executes(overview_section, df=test_df)
 
 
-def test_verbosity_low_different_subsection_verbosities():
+def test_verbosity_low_different_subsection_verbosities(test_df: pd.DataFrame):
     overview_section = Overview(
         verbosity=Verbosity.LOW,
         verbosity_quick_info=Verbosity.MEDIUM,
@@ -313,7 +314,7 @@ def test_verbosity_low_different_subsection_verbosities():
     for expected_line, exported_line in zip(expected_code, exported_code):
         assert expected_line == exported_line, "Exported code mismatch"
 
-    check_section_executes(overview_section, df=get_test_df())
+    check_section_executes(overview_section, df=test_df)
 
 
 def test_imports_verbosity_low():
@@ -377,9 +378,9 @@ def test_imports_verbosity_low_different_subsection_verbosities():
     assert set(exported_imports) == set(expected_imports)
 
 
-def test_show():
+def test_show(test_df: pd.DataFrame):
     overview_section = Overview()
-    df = get_test_df()
+    df = test_df
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", UserWarning)
         with redirect_stdout(None):
diff --git a/tests/test_report.py b/tests/test_report.py
index 808f4b2..8c1137d 100644
--- a/tests/test_report.py
+++ b/tests/test_report.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 import pandas as pd
+import pytest
 
 from edvart.report import DefaultReport, Report
 from edvart.report_sections.bivariate_analysis import BivariateAnalysis
@@ -10,14 +11,15 @@
 from edvart.report_sections.univariate_analysis import UnivariateAnalysis
 
 
-def _get_test_df() -> pd.DataFrame:
+@pytest.fixture
+def test_df() -> pd.DataFrame:
     return pd.DataFrame(
         data=np.random.random_sample((50, 20)), columns=[f"Col{i}" for i in range(20)]
     )
 
 
-def test_report():
-    report = Report(dataframe=_get_test_df())
+def test_report(test_df: pd.DataFrame):
+    report = Report(dataframe=test_df)
     assert len(report.sections) == 0, "Report should be empty"
 
     report.add_overview(verbosity=Verbosity.MEDIUM)
@@ -33,11 +35,11 @@ def test_report():
     assert report.sections[1].columns == ["Col1", "Col2", "Col3"], "Wrong columns"
 
 
-def test_add_section():
+def test_add_section(test_df: pd.DataFrame):
     bivariate_analysis_section = BivariateAnalysis()
     univariate_analysis_section = UnivariateAnalysis()
     report = (
-        Report(dataframe=_get_test_df())
+        Report(dataframe=test_df)
         .add_section(bivariate_analysis_section)
         .add_section(univariate_analysis_section)
     )
@@ -45,9 +47,9 @@ def test_add_section():
     assert report.sections == [bivariate_analysis_section, univariate_analysis_section]
 
 
-def test_default_report():
+def test_default_report(test_df: pd.DataFrame):
     report = DefaultReport(
-        dataframe=_get_test_df(),
+        dataframe=test_df,
         verbosity_overview=Verbosity.MEDIUM,
         verbosity_univariate_analysis=Verbosity.HIGH,
         columns_bivariate_analysis=["Col1", "Col2", "Col3"],
@@ -64,8 +66,7 @@ def test_default_report():
     assert report.sections[2].columns == ["Col1", "Col2", "Col3"], "Wrong columns"
 
 
-def test_column_selection():
-    test_df = _get_test_df()
+def test_column_selection(test_df: pd.DataFrame):
     report = Report(dataframe=test_df)
 
     # Default column selection
@@ -82,8 +83,7 @@ def test_column_selection():
     assert set(report.sections[2].columns) == {"Col5", "Col7", "Col13"}, "Wrong column selection"
 
 
-def test_show():
-    test_df = _get_test_df()
+def test_show(test_df: pd.DataFrame):
     report = Report(dataframe=test_df)
 
     with warnings.catch_warnings():
diff --git a/tests/test_timeseries_analysis.py b/tests/test_timeseries_analysis.py
index f536309..fce8fa8 100644
--- a/tests/test_timeseries_analysis.py
+++ b/tests/test_timeseries_analysis.py
@@ -22,7 +22,8 @@
 pio.renderers.default = "json"
 
 
-def get_test_df() -> pd.DataFrame:
+@pytest.fixture
+def test_df() -> pd.DataFrame:
     n_rows = 20
     columns = ["a", "b", "c"]
     return pd.DataFrame(
@@ -185,9 +186,9 @@ def test_ft_no_sampling_rate_error():
         )
 
 
-def test_code_export_verbosity_low():
+def test_code_export_verbosity_low(test_df: pd.DataFrame):
     ts_section = TimeseriesAnalysis(verbosity=Verbosity.LOW)
-    test_df = get_test_df()
+    test_df = test_df
     # Export code
     exported_cells = []
     ts_section.add_cells(exported_cells, df=test_df)
@@ -202,7 +203,7 @@ def test_code_export_verbosity_low():
     check_section_executes(ts_section, test_df)
 
 
-def test_code_export_verbosity_low_with_subsections():
+def test_code_export_verbosity_low_with_subsections(test_df: pd.DataFrame):
     ts_section = TimeseriesAnalysis(
         subsections=[
             TimeseriesAnalysisSubsection.RollingStatistics,
@@ -210,10 +211,10 @@ def test_code_export_verbosity_low_with_subsections():
         ],
         verbosity=Verbosity.LOW,
     )
-    test_df = get_test_df()
+    test_df = test_df
     # Export code
     exported_cells = []
-    ts_section.add_cells(exported_cells, df=get_test_df())
+    ts_section.add_cells(exported_cells, df=test_df)
     # Remove markdown and other cells and get code strings
     exported_code = [cell["source"] for cell in exported_cells if cell["cell_type"] == "code"]
     # Define expected code
@@ -229,7 +230,7 @@ def test_code_export_verbosity_low_with_subsections():
     check_section_executes(ts_section, test_df)
 
 
-def test_code_export_verbosity_low_with_fft_stft():
+def test_code_export_verbosity_low_with_fft_stft(test_df: pd.DataFrame):
     ts_section = TimeseriesAnalysis(
         subsections=[
             TimeseriesAnalysisSubsection.FourierTransform,
@@ -239,7 +240,6 @@ def test_code_export_verbosity_low_with_fft_stft():
         sampling_rate=1,
         stft_window_size=1,
     )
-    test_df = get_test_df()
     # Export code
     exported_cells = []
     ts_section.add_cells(exported_cells, df=test_df)
@@ -259,9 +259,8 @@ def test_code_export_verbosity_low_with_fft_stft():
     check_section_executes(ts_section, test_df)
 
 
-def test_generated_code_verbosity_medium():
+def test_generated_code_verbosity_medium(test_df: pd.DataFrame):
     ts_section = TimeseriesAnalysis(verbosity=Verbosity.MEDIUM)
-    test_df = get_test_df()
 
     exported_cells = []
     ts_section.add_cells(exported_cells, df=test_df)
@@ -283,8 +282,7 @@ def test_generated_code_verbosity_medium():
     check_section_executes(ts_section, test_df)
 
 
-def test_generated_code_verbosity_high():
-    test_df = get_test_df()
+def test_generated_code_verbosity_high(test_df: pd.DataFrame):
     ts_section = TimeseriesAnalysis(verbosity=Verbosity.HIGH, sampling_rate=1, stft_window_size=1)
 
     pairplot_cells = []
@@ -354,8 +352,7 @@ def test_generated_code_verbosity_high():
     check_section_executes(ts_section, test_df)
 
 
-def test_verbosity_low_different_subsection_verbosities():
-    test_df = get_test_df()
+def test_verbosity_low_different_subsection_verbosities(test_df: pd.DataFrame):
     ts_section = TimeseriesAnalysis(
         verbosity=Verbosity.LOW,
         subsections=[
@@ -396,7 +393,7 @@ def test_verbosity_low_different_subsection_verbosities():
         assert expected_line == exported_line, "Exported code mismatch"
 
 
-def test_boxplots_over_time_def():
+def test_boxplots_over_time_def(test_df: pd.DataFrame):
     def month_func(x: datetime) -> str:
         return str(x.month)
 
@@ -420,10 +417,10 @@ def month_func(x: datetime) -> str:
     for expected_line, exported_line in zip(expected_code, exported_code):
         assert expected_line == exported_line, "Exported code mismatch"
 
-    check_section_executes(boxplots_sub, get_test_df())
+    check_section_executes(boxplots_sub, test_df)
 
 
-def test_boxplots_over_time_lambda():
+def test_boxplots_over_time_lambda(test_df: pd.DataFrame):
     month_lambda = lambda x: x.month
 
     boxplots_sub = BoxplotsOverTime(grouping_name="Month", grouping_function=month_lambda)
@@ -443,7 +440,7 @@ def test_boxplots_over_time_lambda():
     for expected_line, exported_line in zip(expected_code, exported_code):
         assert expected_line == exported_line, "Exported code mismatch"
 
-    check_section_executes(boxplots_sub, get_test_df())
+    check_section_executes(boxplots_sub, test_df)
 
 
 def test_imports_verbosity_low():
diff --git a/tests/test_univariate_analysis_section.py b/tests/test_univariate_analysis_section.py
index f69a4d9..1c9df3f 100644
--- a/tests/test_univariate_analysis_section.py
+++ b/tests/test_univariate_analysis_section.py
@@ -13,7 +13,8 @@
 from .pyarrow_utils import pyarrow_parameterize
 
 
-def get_test_df(pyarrow_dtypes: bool) -> pd.DataFrame:
+@pytest.fixture
+def test_df(pyarrow_dtypes: bool) -> pd.DataFrame:
     test_df = pd.DataFrame(data=[[1.9, "a"], [2.1, "b"], [3.3, "c"]], columns=["A", "B"])
     if pyarrow_dtypes:
         test_df = test_df.convert_dtypes(dtype_backend="pyarrow")
@@ -33,8 +34,7 @@ def test_invalid_verbosity():
 
 
 @pyarrow_parameterize
-def test_code_export_verbosity_low(pyarrow_dtypes: bool):
-    test_df = get_test_df(pyarrow_dtypes=pyarrow_dtypes)
+def test_code_export_verbosity_low(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     # Construct univariate analysis section
     univariate_section = univariate_analysis.UnivariateAnalysis(verbosity=Verbosity.LOW)
     # Export code
@@ -51,8 +51,7 @@ def test_code_export_verbosity_low(pyarrow_dtypes: bool):
 
 
 @pyarrow_parameterize
-def test_code_export_verbosity_medium(pyarrow_dtypes: bool):
-    test_df = get_test_df(pyarrow_dtypes=pyarrow_dtypes)
+def test_code_export_verbosity_medium(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     # Construct univariate analysis section
     univariate_section = univariate_analysis.UnivariateAnalysis(verbosity=Verbosity.MEDIUM)
     # Export code
@@ -73,8 +72,7 @@ def test_code_export_verbosity_medium(pyarrow_dtypes: bool):
 
 
 @pyarrow_parameterize
-def test_code_export_verbosity_high(pyarrow_dtypes: bool):
-    test_df = get_test_df(pyarrow_dtypes=pyarrow_dtypes)
+def test_code_export_verbosity_high(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     # Construct univariate analysis section
     univariate_section = univariate_analysis.UnivariateAnalysis(verbosity=Verbosity.HIGH)
     # Export code
@@ -126,8 +124,7 @@ def test_code_export_verbosity_high(pyarrow_dtypes: bool):
 
 
 @pyarrow_parameterize
-def test_show(pyarrow_dtypes: bool):
-    test_df = get_test_df(pyarrow_dtypes=pyarrow_dtypes)
+def test_show(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     univariate_section = univariate_analysis.UnivariateAnalysis()
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", UserWarning)

From dfa281d6be703c4dd45f0608779c1827bdc9f371 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Bel=C3=A1k?= <michal.belak@datamole.cz>
Date: Fri, 1 Mar 2024 15:33:38 +0100
Subject: [PATCH 2/4] use parameterization of fixtures properly

---
 tests/pyarrow_utils.py                    |  6 ++--
 tests/test_bivariate_analysis.py          | 11 ++++---
 tests/test_group_analysis.py              | 30 +++++++------------
 tests/test_multivariate_analysis.py       | 36 ++++++++---------------
 tests/test_univariate_analysis_section.py | 20 +++++--------
 tests/test_utils.py                       |  5 ++--
 6 files changed, 41 insertions(+), 67 deletions(-)

diff --git a/tests/pyarrow_utils.py b/tests/pyarrow_utils.py
index 6bbc8fe..c31372c 100644
--- a/tests/pyarrow_utils.py
+++ b/tests/pyarrow_utils.py
@@ -1,8 +1,6 @@
-import pytest
-
 from edvart.data_types import PYARROW_PANDAS_BACKEND_AVAILABLE
 
 if PYARROW_PANDAS_BACKEND_AVAILABLE:
-    pyarrow_parameterize = pytest.mark.parametrize("pyarrow_dtypes", [False, True])
+    pyarrow_params = [True, False]
 else:
-    pyarrow_parameterize = pytest.mark.parametrize("pyarrow_dtypes", [False])
+    pyarrow_params = [False]
diff --git a/tests/test_bivariate_analysis.py b/tests/test_bivariate_analysis.py
index 6b1460e..31a833f 100644
--- a/tests/test_bivariate_analysis.py
+++ b/tests/test_bivariate_analysis.py
@@ -11,13 +11,13 @@
 from edvart.report_sections.section_base import Verbosity
 
 from .execution_utils import check_section_executes
-from .pyarrow_utils import pyarrow_parameterize
+from .pyarrow_utils import pyarrow_params
 
 
-@pytest.fixture
-def test_df(pyarrow_dtypes: bool = False) -> pd.DataFrame:
+@pytest.fixture(params=pyarrow_params)
+def test_df(request) -> pd.DataFrame:
     test_df = pd.DataFrame(data=[[1.1, "a"], [2.2, "b"], [3.3, "c"]], columns=["A", "B"])
-    if pyarrow_dtypes:
+    if request.param:
         test_df = test_df.convert_dtypes(dtype_backend="pyarrow")
 
     return test_df
@@ -450,8 +450,7 @@ def test_imports_verbosity_low_different_subsection_verbosities():
     assert set(exported_imports) == set(expected_imports)
 
 
-@pyarrow_parameterize
-def test_show(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+def test_show(test_df: pd.DataFrame):
     bivariate_section = BivariateAnalysis()
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", UserWarning)
diff --git a/tests/test_group_analysis.py b/tests/test_group_analysis.py
index 6a431df..ddb98ba 100644
--- a/tests/test_group_analysis.py
+++ b/tests/test_group_analysis.py
@@ -22,14 +22,14 @@
 from edvart.report_sections.section_base import Verbosity
 
 from .execution_utils import check_section_executes
-from .pyarrow_utils import pyarrow_parameterize
+from .pyarrow_utils import pyarrow_params
 
 # Workaround to prevent multiple browser tabs opening with figures
 plotly.io.renderers.default = "json"
 
 
-@pytest.fixture
-def test_df(pyarrow_dtypes: bool = False) -> pd.DataFrame:
+@pytest.fixture(params=pyarrow_params)
+def test_df(request) -> pd.DataFrame:
     test_df = pd.DataFrame(
         data=[
             ["P" if np.random.uniform() < 0.4 else "N", 1.5 * i, "X" if i % 2 == 0 else "Y"]
@@ -37,7 +37,7 @@ def test_df(pyarrow_dtypes: bool = False) -> pd.DataFrame:
         ],
         columns=["A", "B", "C"],
     )
-    if pyarrow_dtypes:
+    if request.param:
         test_df = test_df.convert_dtypes(dtype_backend="pyarrow")
     return test_df
 
@@ -54,16 +54,14 @@ def test_invalid_verbosities():
         GroupAnalysis(groupby=[], verbosity=-1)
 
 
-@pyarrow_parameterize
-def test_groupby_nonexistent_col(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+def test_groupby_nonexistent_col(test_df: pd.DataFrame):
     with pytest.raises(ValueError):
         show_group_analysis(df=test_df, groupby=["non-existent"])
     with pytest.raises(ValueError):
         group_missing_values(df=test_df, groupby=["non-existent"])
 
 
-@pyarrow_parameterize
-def test_static_methods(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+def test_static_methods(test_df: pd.DataFrame):
     with redirect_stdout(None):
         show_group_analysis(df=test_df, groupby="C")
         show_group_analysis(df=test_df, groupby=["C"], columns=["A"])
@@ -88,8 +86,7 @@ def test_static_methods(pyarrow_dtypes: bool, test_df: pd.DataFrame):
         overlaid_histograms(test_df, groupby=["B"], column="B")
 
 
-@pyarrow_parameterize
-def test_code_export_verbosity_low(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+def test_code_export_verbosity_low(test_df: pd.DataFrame):
     group_section = GroupAnalysis(groupby="B", verbosity=Verbosity.LOW)
 
     # Export code
@@ -106,8 +103,7 @@ def test_code_export_verbosity_low(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     check_section_executes(group_section, test_df)
 
 
-@pyarrow_parameterize
-def test_code_export_verbosity_medium(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+def test_code_export_verbosity_medium(test_df: pd.DataFrame):
     group_section = GroupAnalysis(groupby="A", verbosity=Verbosity.MEDIUM)
 
     # Export code
@@ -134,8 +130,7 @@ def test_code_export_verbosity_medium(pyarrow_dtypes: bool, test_df: pd.DataFram
     check_section_executes(group_section, test_df)
 
 
-@pyarrow_parameterize
-def test_code_export_verbosity_high(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+def test_code_export_verbosity_high(test_df: pd.DataFrame):
     group_section = GroupAnalysis(groupby="A", verbosity=Verbosity.HIGH)
 
     # Export code
@@ -190,8 +185,7 @@ def test_code_export_verbosity_high(pyarrow_dtypes: bool, test_df: pd.DataFrame)
     check_section_executes(group_section, test_df)
 
 
-@pyarrow_parameterize
-def test_columns_parameter(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+def test_columns_parameter(test_df: pd.DataFrame):
     ga = GroupAnalysis(groupby="A", columns=["B"])
     assert ga.groupby == ["A"]
     assert ga.columns == ["B"]
@@ -211,9 +205,7 @@ def test_column_list_not_modified():
     assert columns == ["C"], "Column list modified"
 
 
-@pyarrow_parameterize
-def test_show(pyarrow_dtypes: bool, test_df: pd.DataFrame):
-    df = test_df
+def test_show(test_df: pd.DataFrame):
     group_section = GroupAnalysis(groupby="A")
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", UserWarning)
diff --git a/tests/test_multivariate_analysis.py b/tests/test_multivariate_analysis.py
index 48299ca..8b91f42 100644
--- a/tests/test_multivariate_analysis.py
+++ b/tests/test_multivariate_analysis.py
@@ -17,18 +17,14 @@
     MultivariateAnalysisSubsection,
 )
 from edvart.report_sections.section_base import Verbosity
-from edvart.utils import (
-    get_default_discrete_colorscale,
-    make_discrete_colorscale,
-    select_numeric_columns,
-)
+from edvart.utils import select_numeric_columns
 
 from .execution_utils import check_section_executes
-from .pyarrow_utils import pyarrow_parameterize
+from .pyarrow_utils import pyarrow_params
 
 
-@pytest.fixture
-def test_df(pyarrow_dtypes: bool = False) -> pd.DataFrame:
+@pytest.fixture(params=pyarrow_params)
+def test_df(request) -> pd.DataFrame:
     test_df = pd.DataFrame(
         data=[
             [1.1, "a", 3.7, 3.9],
@@ -39,7 +35,7 @@ def test_df(pyarrow_dtypes: bool = False) -> pd.DataFrame:
         ],
         columns=["A", "B", "C", "D"],
     )
-    if pyarrow_dtypes:
+    if request.param:
         test_df = test_df.convert_dtypes(dtype_backend="pyarrow")
 
     return test_df
@@ -138,8 +134,7 @@ def test_section_adding():
         ), "Subsection should be UMAP"
 
 
-@pyarrow_parameterize
-def test_code_export_verbosity_low(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+def test_code_export_verbosity_low(test_df: pd.DataFrame):
     df = test_df
     multivariate_section = MultivariateAnalysis(verbosity=Verbosity.LOW)
     # Export code
@@ -156,8 +151,7 @@ def test_code_export_verbosity_low(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     check_section_executes(multivariate_section, df)
 
 
-@pyarrow_parameterize
-def test_code_export_verbosity_low_with_subsections(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+def test_code_export_verbosity_low_with_subsections(test_df: pd.DataFrame):
     subsec = MultivariateAnalysisSubsection
     subsections = [subsec.ParallelCategories, subsec.PCA, subsec.ParallelCoordinates, subsec.PCA]
     if UMAP_AVAILABLE:
@@ -227,8 +221,7 @@ def test_code_export_verbosity_medium_all_cols_valid():
     check_section_executes(multivariate_section, all_numeric_df)
 
 
-@pyarrow_parameterize
-def test_generated_code_verbosity_1(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+def test_generated_code_verbosity_1(test_df: pd.DataFrame):
     multivariate_section = MultivariateAnalysis(verbosity=Verbosity.MEDIUM)
 
     exported_cells = []
@@ -265,8 +258,7 @@ def test_generated_code_verbosity_1(pyarrow_dtypes: bool, test_df: pd.DataFrame)
     check_section_executes(multivariate_section, test_df)
 
 
-@pyarrow_parameterize
-def test_generated_code_verbosity_2(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+def test_generated_code_verbosity_2(test_df: pd.DataFrame):
     multivariate_section = MultivariateAnalysis(verbosity=Verbosity.HIGH)
 
     multivariate_cells = []
@@ -332,7 +324,7 @@ def test_generated_code_verbosity_2(pyarrow_dtypes: bool, test_df: pd.DataFrame)
     check_section_executes(multivariate_section, test_df)
 
 
-@pyarrow_parameterize
+@pytest.mark.parametrize("pyarrow_dtypes", pyarrow_params)
 def test_verbosity_medium_non_categorical_col(pyarrow_dtypes: bool):
     random_array = np.random.randint(low=1, high=40, size=(100, 3))
     random_df = pd.DataFrame(data=random_array, columns=["integral", "floating", "cat"])
@@ -357,10 +349,7 @@ def test_verbosity_medium_non_categorical_col(pyarrow_dtypes: bool):
     check_section_executes(multivariate_section, random_df)
 
 
-@pyarrow_parameterize
-def test_verbosity_low_different_subsection_verbosities(
-    pyarrow_dtypes: bool, test_df: pd.DataFrame
-):
+def test_verbosity_low_different_subsection_verbosities(test_df: pd.DataFrame):
     subsections = [
         MultivariateAnalysisSubsection.PCA,
         MultivariateAnalysisSubsection.PCA,
@@ -479,8 +468,7 @@ def test_imports_verbosity_low_different_subsection_verbosities():
     assert set(exported_imports) == set(expected_imports)
 
 
-@pyarrow_parameterize
-def test_show(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+def test_show(test_df: pd.DataFrame):
     multivariate_section = MultivariateAnalysis()
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", UserWarning)
diff --git a/tests/test_univariate_analysis_section.py b/tests/test_univariate_analysis_section.py
index 1c9df3f..d5cf6ea 100644
--- a/tests/test_univariate_analysis_section.py
+++ b/tests/test_univariate_analysis_section.py
@@ -10,13 +10,13 @@
 from edvart.report_sections.section_base import Verbosity
 
 from .execution_utils import check_section_executes
-from .pyarrow_utils import pyarrow_parameterize
+from .pyarrow_utils import pyarrow_params
 
 
-@pytest.fixture
-def test_df(pyarrow_dtypes: bool) -> pd.DataFrame:
+@pytest.fixture(params=pyarrow_params)
+def test_df(request) -> pd.DataFrame:
     test_df = pd.DataFrame(data=[[1.9, "a"], [2.1, "b"], [3.3, "c"]], columns=["A", "B"])
-    if pyarrow_dtypes:
+    if request.param:
         test_df = test_df.convert_dtypes(dtype_backend="pyarrow")
 
     return test_df
@@ -33,8 +33,7 @@ def test_invalid_verbosity():
         univariate_analysis.UnivariateAnalysis(verbosity="1")
 
 
-@pyarrow_parameterize
-def test_code_export_verbosity_low(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+def test_code_export_verbosity_low(test_df: pd.DataFrame):
     # Construct univariate analysis section
     univariate_section = univariate_analysis.UnivariateAnalysis(verbosity=Verbosity.LOW)
     # Export code
@@ -50,8 +49,7 @@ def test_code_export_verbosity_low(pyarrow_dtypes: bool, test_df: pd.DataFrame):
     check_section_executes(univariate_section, test_df)
 
 
-@pyarrow_parameterize
-def test_code_export_verbosity_medium(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+def test_code_export_verbosity_medium(test_df: pd.DataFrame):
     # Construct univariate analysis section
     univariate_section = univariate_analysis.UnivariateAnalysis(verbosity=Verbosity.MEDIUM)
     # Export code
@@ -71,8 +69,7 @@ def test_code_export_verbosity_medium(pyarrow_dtypes: bool, test_df: pd.DataFram
     check_section_executes(univariate_section, test_df)
 
 
-@pyarrow_parameterize
-def test_code_export_verbosity_high(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+def test_code_export_verbosity_high(test_df: pd.DataFrame):
     # Construct univariate analysis section
     univariate_section = univariate_analysis.UnivariateAnalysis(verbosity=Verbosity.HIGH)
     # Export code
@@ -123,8 +120,7 @@ def test_code_export_verbosity_high(pyarrow_dtypes: bool, test_df: pd.DataFrame)
     check_section_executes(univariate_section, test_df)
 
 
-@pyarrow_parameterize
-def test_show(pyarrow_dtypes: bool, test_df: pd.DataFrame):
+def test_show(test_df: pd.DataFrame):
     univariate_section = univariate_analysis.UnivariateAnalysis()
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", UserWarning)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index aad6be5..d097993 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -4,13 +4,14 @@
 
 import numpy as np
 import pandas as pd
+import pytest
 
 from edvart import utils
 
-from .pyarrow_utils import pyarrow_parameterize
+from .pyarrow_utils import pyarrow_params
 
 
-@pyarrow_parameterize
+@pytest.mark.parametrize("pyarrow_dtypes", pyarrow_params)
 def test_full_na_series(pyarrow_dtypes: bool):
     series = pd.Series([None, np.nan, None])
     if pyarrow_dtypes:

From 00b255c28d6cef05c9b3528b8d858953e3244bec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Bel=C3=A1k?= <michal.belak@datamole.cz>
Date: Mon, 4 Mar 2024 13:35:10 +0100
Subject: [PATCH 3/4] test: test data type inference with pyarrow

---
 tests/test_data_type_inference.py | 211 ++++++++++++++++--------------
 1 file changed, 112 insertions(+), 99 deletions(-)

diff --git a/tests/test_data_type_inference.py b/tests/test_data_type_inference.py
index aedc9f2..4095fe9 100644
--- a/tests/test_data_type_inference.py
+++ b/tests/test_data_type_inference.py
@@ -1,115 +1,128 @@
 import numpy as np
 import pandas as pd
+import pytest
 
 from edvart import data_types
 
+from .pyarrow_utils import pyarrow_params
 
-def test_inference():
-    assert (
-        data_types.infer_data_type(pd.Series([0.12, 3565.3, 234, 1, -49, 14, 5, 88, 12312]))
-        == data_types.DataType.NUMERIC
-    ), "Should be numeric type"
-    assert (
-        data_types.infer_data_type(
-            pd.Series(["2014-01-01 12:05:02", "2014-01-02 13:05:02", "2014-12-03 14:05:02"])
-        )
-        == data_types.DataType.DATE
-    ), "Should be date type"
-    assert (
-        data_types.infer_data_type(pd.Series(["A", "B", "C", "C", "A", "B"]))
-        == data_types.DataType.CATEGORICAL
-    ), "Should be categorical type"
-    assert (
-        data_types.infer_data_type(pd.Series([True, False, False, True, True]))
-        == data_types.DataType.BOOLEAN
-    ), "Should be boolean type"
-    assert data_types.infer_data_type(
-        pd.Series([None, None, np.nan, float("nan")]) == data_types.DataType.MISSING
-    ), "Should be missing"
-    assert (
-        data_types.infer_data_type(pd.Series(list(range(10)))) == data_types.DataType.UNIQUE
-    ), "Should be unique"
-    assert (
-        data_types.infer_data_type(pd.Series([1] + list(range(100)))) == data_types.DataType.NUMERIC
-    ), "Should be numeric"
-    assert (
-        data_types.infer_data_type(pd.Series(dtype=pd.Float64Dtype)) == data_types.DataType.UNKNOWN
-    ), "Should be unknown"
-    assert data_types.infer_data_type(
-        pd.Series([True, False]) == data_types.DataType.BOOLEAN
-    ), "Should be boolean"
 
+@pytest.mark.parametrize("pyarrow_dtypes", pyarrow_params)
+@pytest.mark.parametrize(
+    "data, expected",
+    [
+        (pd.Series([0.12, 3565.3, 234, 1, -49, 14, 5, 88, 12312]), data_types.DataType.NUMERIC),
+        (
+            pd.Series(["2014-01-01 12:05:02", "2014-01-02 13:05:02", "2014-12-03 14:05:02"]),
+            data_types.DataType.DATE,
+        ),
+        (pd.Series(["A", "B", "C", "C", "A", "B"]), data_types.DataType.CATEGORICAL),
+        (pd.Series([True, False, False, True, True]), data_types.DataType.BOOLEAN),
+        (pd.Series([None, None, np.nan, float("nan")]), data_types.DataType.MISSING),
+        (pd.Series(list(range(10))), data_types.DataType.UNIQUE),
+        (pd.Series([1] + list(range(100))), data_types.DataType.NUMERIC),
+        (pd.Series(dtype=pd.Float64Dtype), data_types.DataType.UNKNOWN),
+        (pd.Series([True, False]), data_types.DataType.BOOLEAN),
+    ],
+)
+def test_inference(data, expected, pyarrow_dtypes):
+    if pyarrow_dtypes:
+        data = data.convert_dtypes(dtype_backend="pyarrow")
+    assert data_types.infer_data_type(data) == expected
 
-def test_missing_series():
-    assert data_types.is_missing(pd.Series([None, None, np.nan, float("nan")])), "Should be missing"
-    assert data_types.is_missing(pd.Series([pd.NA])), "Should be missing"
-    assert not data_types.is_missing(pd.Series([1, np.nan, None])), "Should not be missing"
-    assert not data_types.is_missing(pd.Series(["2023-01-01", None])), "Should not be missing"
 
+@pytest.mark.parametrize("pyarrow_dtypes", pyarrow_params)
+@pytest.mark.parametrize(
+    "data, is_missing",
+    [
+        (pd.Series([None, None, np.nan, float("nan")]), True),
+        (pd.Series([pd.NA]), True),
+        (pd.Series([1, np.nan, None]), False),
+        (pd.Series(["2023-01-01", None]), False),
+    ],
+)
+def test_missing_series(data, is_missing, pyarrow_dtypes):
+    if pyarrow_dtypes:
+        data = data.convert_dtypes(dtype_backend="pyarrow")
+    assert data_types.is_missing(data) == is_missing
 
-def test_numeric_series():
-    assert data_types.is_numeric(
-        pd.Series([0.12, 3565.3, 234, 1, -49, 14, 5, 88, 12312])
-    ), "Should be numeric type"
-    assert data_types.is_numeric(pd.Series([23, 45, 2, 1.2, -3, -66])), "Should be numeric type"
-    assert not data_types.is_numeric(
-        pd.Series([23, 45, 2, 1, -3, -66, "NULL", "a string"])
-    ), "Should not be numeric type"
-    assert not data_types.is_numeric(
-        pd.Series([23, 45, 2, 1, -3, -66, "99", "-1207"])
-    ), "Should not be numeric type"
-    assert not data_types.is_numeric(
-        pd.Series([None, None, np.nan, float("nan")])
-    ), "Should not be numeric"
 
+@pytest.mark.parametrize("pyarrow_dtypes", pyarrow_params)
+@pytest.mark.parametrize(
+    "data, is_numeric",
+    [
+        (pd.Series([0.12, 3565.3, 234, 1, -49, 14, 5, 88, 12312]), True),
+        (pd.Series([23, 45, 2, 1.2, -3, -66]), True),
+        (pd.Series([23, 45, 2, 1, -3, -66, "NULL", "a string"]), False),
+        (pd.Series([23, 45, 2, 1, -3, -66, "99", "-1207"]), False),
+        (pd.Series([None, None, np.nan, float("nan")]), False),
+    ],
+)
+def test_numeric_series(data, is_numeric, pyarrow_dtypes):
+    if pyarrow_dtypes:
+        data = data.convert_dtypes(dtype_backend="pyarrow")
+    assert data_types.is_numeric(data) == is_numeric
 
-def test_categorical_series():
-    assert data_types.is_categorical(pd.Series(["A", "B", "C", "D"])), "Should be categorical"
-    assert data_types.is_categorical(
-        pd.Series([1, 2, 3, 4, 4, 4, 1, 1, 1, 2, 2, 3, 4])
-    ), "Should be categorical"
-    assert not data_types.is_categorical(
-        pd.Series([1, 2, 31, 4, 52, 6, 87, 87.7, 9, 1, 3, 4, 1, 10, 123123, 9876, 1.2, 6.8])
-    ), "Should not be categorical"
-    assert not data_types.is_categorical(
-        pd.Series([None, None, np.nan, float("nan")])
-    ), "Should not be categorical"
-    assert not data_types.is_categorical(pd.Series([pd.NA])), "Should not be categorical"
 
+@pytest.mark.parametrize("pyarrow_dtypes", pyarrow_params)
+@pytest.mark.parametrize(
+    "data, is_categorical",
+    [
+        (pd.Series(["A", "B", "C", "D"]), True),
+        (pd.Series([1, 2, 3, 4, 4, 4, 1, 1, 1, 2, 2, 3, 4]), True),
+        (
+            pd.Series([1, 2, 31, 4, 52, 6, 87, 87.7, 9, 1, 3, 4, 1, 10, 123123, 9876, 1.2, 6.8]),
+            False,
+        ),
+        (pd.Series([None, None, np.nan, float("nan")]), False),
+        (pd.Series([pd.NA]), False),
+    ],
+)
+def test_categorical_series(data, is_categorical, pyarrow_dtypes):
+    if pyarrow_dtypes:
+        data = data.convert_dtypes(dtype_backend="pyarrow")
+    assert data_types.is_categorical(data) == is_categorical
 
-def test_boolean_series():
-    assert data_types.is_boolean(pd.Series([True, False, False, True, True])), "Should be boolean"
-    assert data_types.is_boolean(pd.Series([False, False, False])), "Should be boolean"
-    assert data_types.is_boolean(pd.Series([True, True, True])), "Should be boolean"
-    assert data_types.is_boolean(pd.Series([1, 0, 0, 1])), "Should be boolean"
-    assert data_types.is_boolean(pd.Series([0, 0, 0, 0])), "Should be boolean"
-    assert data_types.is_boolean(pd.Series([1, 1, 1, 1])), "Should be boolean"
-    assert not data_types.is_boolean(
-        pd.Series([True, False, False, True, True, "True"])
-    ), "Should not be boolean"
-    assert not data_types.is_boolean(pd.Series([2, 2, 2, 2])), "Should not be boolean"
-    assert not data_types.is_boolean(pd.Series([1, 0, 0, 1, 3])), "Should not be boolean"
-    assert not data_types.is_boolean(pd.Series(["a", "abc", "2"])), "Should not be boolean"
-    assert not data_types.is_boolean(pd.Series(["A", "B", "A", "A", "B"])), "Should not be boolean"
-    assert not data_types.is_boolean(pd.Series([-0.2, 1.6567, 3, 4, 5])), "Should not be boolean"
-    assert not data_types.is_boolean(pd.Series([None])), "Should not be boolean"
 
+@pytest.mark.parametrize("pyarrow_dtypes", pyarrow_params)
+@pytest.mark.parametrize(
+    "data, is_boolean",
+    [
+        (pd.Series([True, False, False, True, True]), True),
+        (pd.Series([False, False, False]), True),
+        (pd.Series([True, True, True]), True),
+        (pd.Series([1, 0, 0, 1]), True),
+        (pd.Series([0, 0, 0, 0]), True),
+        (pd.Series([1, 1, 1, 1]), True),
+        (pd.Series([True, False, False, True, True, "True"]), False),
+        (pd.Series([2, 2, 2, 2]), False),
+        (pd.Series([1, 0, 0, 1, 3]), False),
+        (pd.Series(["a", "abc", "2"]), False),
+        (pd.Series(["A", "B", "A", "A", "B"]), False),
+        (pd.Series([-0.2, 1.6567, 3, 4, 5]), False),
+        (pd.Series([None]), False),
+    ],
+)
+def test_boolean_series(data, is_boolean, pyarrow_dtypes):
+    if pyarrow_dtypes:
+        data = data.convert_dtypes(dtype_backend="pyarrow")
+    assert data_types.is_boolean(data) == is_boolean
 
-def test_date_series():
-    assert data_types.is_date(
-        pd.Series(["2014-01-01 12:05:02", "2014-01-02 13:05:02", "2014-12-03 14:05:02"])
-    ), "Should be type date"
-    assert data_types.is_date(
-        pd.Series(["Mar 12 2018", "Dec 12 2018", "Jan 21 2020"])
-    ), "Should be type date"
-    assert not data_types.is_date(
-        pd.Series(["2014-01-01", "2014-01-02", "2014-12-03T14:05:02", "nan"])
-    ), "Should not be type date"
-    assert not data_types.is_date(
-        pd.Series(["2014-01-01", "2014-01-02", "2014-12-03 14:05:02", 1, 2, 3])
-    ), "Should not be type date"
-    assert not data_types.is_date(pd.Series([1, 2, 3, 4, 5])), "Should not be type date"
-    assert not data_types.is_date(pd.Series([None, 2.0, 3, 4, 5])), "Should not be type date"
-    assert data_types.is_date(
-        pd.Series([pd.Timestamp("20130101"), pd.Timestamp("20230102"), None])
-    ), "Should be type date"
+
+@pytest.mark.parametrize("pyarrow_dtypes", pyarrow_params)
+@pytest.mark.parametrize(
+    "data, is_date",
+    [
+        (pd.Series(["2014-01-01 12:05:02", "2014-01-02 13:05:02", "2014-12-03 14:05:02"]), True),
+        (pd.Series(["Mar 12 2018", "Dec 12 2018", "Jan 21 2020"]), True),
+        (pd.Series(["2014-01-01", "2014-01-02", "2014-12-03T14:05:02", "nan"]), False),
+        (pd.Series(["2014-01-01", "2014-01-02", "2014-12-03 14:05:02", 1, 2, 3]), False),
+        (pd.Series([1, 2, 3, 4, 5]), False),
+        (pd.Series([None, 2.0, 3, 4, 5]), False),
+        (pd.Series([pd.Timestamp("20130101"), pd.Timestamp("20230102"), None]), True),
+    ],
+)
+def test_date_series(data, is_date, pyarrow_dtypes):
+    if pyarrow_dtypes:
+        data = data.convert_dtypes(dtype_backend="pyarrow")
+    assert data_types.is_date(data) == is_date

From 42f3802028db32d72efbb3dc8fc7e5270dd569ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Bel=C3=A1k?= <michal.belak@datamole.cz>
Date: Fri, 8 Mar 2024 11:35:25 +0100
Subject: [PATCH 4/4] fix test

---
 tests/test_report.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_report.py b/tests/test_report.py
index 99edc83..80960d9 100644
--- a/tests/test_report.py
+++ b/tests/test_report.py
@@ -95,8 +95,8 @@ def test_show(test_df: pd.DataFrame):
             report.show()
 
 
-def test_notebook_export(tmp_path: pathlib.Path):
-    report = Report(dataframe=_get_test_df())
+def test_notebook_export(tmp_path: pathlib.Path, test_df: pd.DataFrame):
+    report = Report(dataframe=test_df)
 
     report.add_overview()
     for export_data_mode in (
@@ -112,8 +112,8 @@ def test_notebook_export(tmp_path: pathlib.Path):
         )
 
 
-def test_exported_notebook_executes(tmp_path: pathlib.Path):
-    report = Report(dataframe=_get_test_df())
+def test_exported_notebook_executes(tmp_path: pathlib.Path, test_df: pd.DataFrame):
+    report = Report(dataframe=test_df)
 
     report.add_overview()
     for export_data_mode in (ExportDataMode.EMBED, ExportDataMode.FILE):