Skip to content

Commit

Permalink
SparkTestHelper remove spark dependency
Browse files Browse the repository at this point in the history
if `test_helpers` depends on the `spark` fixture, then this gets created whenever we request the `test_helpers` fixture, which is slow. Now both `test_helpers` and `spark` fixture use a helper function, with the upshot that `spark` is only created in `test_helpers` whenever `SparkTestHelper` is instantiated
  • Loading branch information
ADBond committed Nov 6, 2023
1 parent e4267e8 commit 70aacc2
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 6 deletions.
12 changes: 8 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ def pytest_collection_modifyitems(items, config):
item.add_marker(mark)


@pytest.fixture(scope="module")
def spark():
def _make_spark():
from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession

Expand All @@ -56,7 +55,12 @@ def spark():

spark = SparkSession(sc)
spark.sparkContext.setCheckpointDir("./tmp_checkpoints")
return spark


@pytest.fixture(scope="module")
def spark():
spark = _make_spark()
yield spark


Expand All @@ -71,14 +75,14 @@ def df_spark(spark):
# see e.g. https://stackoverflow.com/a/42400786/11811947
# ruff: noqa: F811
@pytest.fixture
def test_helpers(spark, pg_engine):
def test_helpers(pg_engine):
# LazyDict to lazy-load helpers
# That way we do not instantiate helpers we do not need
# e.g. running only duckdb tests we don't need PostgresTestHelper
# so we can run duckdb tests in environments w/o access to postgres
return LazyDict(
duckdb=(DuckDBTestHelper, []),
spark=(SparkTestHelper, [spark]),
spark=(SparkTestHelper, [_make_spark]),
sqlite=(SQLiteTestHelper, []),
postgres=(PostgresTestHelper, [pg_engine]),
)
Expand Down
4 changes: 2 additions & 2 deletions tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ def brl(self):


class SparkTestHelper(TestHelper):
def __init__(self, spark):
self.spark = spark
def __init__(self, spark_creator_function):
self.spark = spark_creator_function()

@property
def Linker(self):
Expand Down

0 comments on commit 70aacc2

Please sign in to comment.