Skip to content

Commit

Permalink
testing duckdb with sqlalchemy and dbapi connections
Browse files Browse the repository at this point in the history
  • Loading branch information
edublancas committed Jul 13, 2023
1 parent ec6e9db commit 0635687
Showing 1 changed file with 127 additions and 125 deletions.
252 changes: 127 additions & 125 deletions src/tests/integration/test_duckDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,29 @@
import logging
import pytest

from sqlalchemy import text
import polars as pl
import pandas as pd

from sql.connection import Connection
from sql.run import ResultSet
from sql import connection
from sql.warnings import JupySQLDataFramePerformanceWarning


# TODO: reconcile the fixtures, we have other fixtures in conftest.py
@pytest.fixture
def ip_duckdb_native(ip_empty_testing):
ip_empty_testing.run_cell("import duckdb; conn = duckdb.connect()")
def ip_duckdb_native_empty(tmp_empty, ip_empty_testing):
ip_empty_testing.run_cell("import duckdb; conn = duckdb.connect('my.db')")
ip_empty_testing.run_cell("%sql conn --alias duck")
yield ip_empty_testing
ip_empty_testing.run_cell("conn.close()")


@pytest.fixture
def ip_duckdb_sqlalchemy_empty(tmp_empty, ip_empty_testing):
ip_empty_testing.run_cell("%sql duckdb:///my.db --alias duckdb")
yield ip_empty_testing
ip_empty_testing.run_cell("%sql --close duckdb")


@pytest.mark.parametrize(
"ip, exp",
[
Expand Down Expand Up @@ -67,7 +72,7 @@ def test_auto_commit_mode_off(ip, caplog, request):
assert any("weather" == table[0] for table in tables_out)


def test_dbapi_connection_sets_right_dialect(ip_duckdb_native):
def test_dbapi_connection_sets_right_dialect(ip_with_duckDB_native):
assert Connection.current.is_dbapi_connection()
assert Connection.current.dialect == "duckdb"

Expand All @@ -79,24 +84,28 @@ def test_dbapi_connection_sets_right_dialect(ip_duckdb_native):
("PolarsDataFrame", pl.DataFrame, "pl"),
],
)
def test_converts_to_data_frames_natively(
def test_native_connection_converts_to_data_frames_natively(
monkeypatch,
ip_duckdb_native,
ip_duckdb_native_empty,
method,
expected_type,
expected_native_method,
):
ip_duckdb_native.run_cell("%sql CREATE TABLE weather (city VARCHAR, temp_lo INT);")
ip_duckdb_native.run_cell("%sql INSERT INTO weather VALUES ('San Francisco', 46);")
ip_duckdb_native.run_cell("%sql INSERT INTO weather VALUES ('NYC', 20);")
ip_duckdb_native.run_cell("results = %sql SELECT * FROM weather")
ip_duckdb_native_empty.run_cell(
"%sql CREATE TABLE weather (city VARCHAR, temp_lo INT);"
)
ip_duckdb_native_empty.run_cell(
"%sql INSERT INTO weather VALUES ('San Francisco', 46);"
)
ip_duckdb_native_empty.run_cell("%sql INSERT INTO weather VALUES ('NYC', 20);")
ip_duckdb_native_empty.run_cell("results = %sql SELECT * FROM weather")

results = ip_duckdb_native.run_cell("results").result
results = ip_duckdb_native_empty.run_cell("results").result

mock = Mock(wraps=results.sqlaproxy)
monkeypatch.setattr(results, "_sqlaproxy", mock)

out = ip_duckdb_native.run_cell(f"results.{method}()")
out = ip_duckdb_native_empty.run_cell(f"results.{method}()")

mock.execute.assert_called_once_with("SELECT * FROM weather")
getattr(mock, expected_native_method).assert_called_once_with()
Expand All @@ -115,21 +124,24 @@ def test_converts_to_data_frames_natively(
"autopolars_on",
],
)
def test_auto_data_frame_config(
ip_duckdb_native,
def test_convert_to_dataframe_automatically(
ip_duckdb_native_empty,
conversion_cell,
expected_type,
):
ip_duckdb_native.run_cell(conversion_cell)
ip_duckdb_native.run_cell("%sql CREATE TABLE weather (city VARCHAR, temp_lo INT);")
ip_duckdb_native.run_cell("%sql INSERT INTO weather VALUES ('San Francisco', 46);")
ip_duckdb_native.run_cell("%sql INSERT INTO weather VALUES ('NYC', 20);")
df = ip_duckdb_native.run_cell("%sql SELECT * FROM weather").result
ip_duckdb_native_empty.run_cell(conversion_cell)
ip_duckdb_native_empty.run_cell(
"%sql CREATE TABLE weather (city VARCHAR, temp_lo INT);"
)
ip_duckdb_native_empty.run_cell(
"%sql INSERT INTO weather VALUES ('San Francisco', 46);"
)
ip_duckdb_native_empty.run_cell("%sql INSERT INTO weather VALUES ('NYC', 20);")
df = ip_duckdb_native_empty.run_cell("%sql SELECT * FROM weather").result
assert isinstance(df, expected_type)
assert df.shape == (2, 2)


# TODO: make this an integration test, also try with native connections
@pytest.mark.parametrize(
"config",
[
Expand Down Expand Up @@ -165,83 +177,101 @@ def test_auto_data_frame_config(
"multiple_tables_created",
],
)
def test_multiple_statements(ip_empty_testing, config, sql, tables):
ip_empty_testing.run_cell("%sql duckdb://")
ip_empty_testing.run_cell(config)

ip_empty_testing.run_cell("%sql CREATE TABLE weather (city VARCHAR,);")
ip_empty_testing.run_cell("%sql INSERT INTO weather VALUES ('NYC');")
ip_empty_testing.run_cell("%sql SELECT * FROM weather;")
@pytest.mark.parametrize(
"ip",
[
"ip_duckdb_native_empty",
"ip_duckdb_sqlalchemy_empty",
],
)
def test_multiple_statements(ip, config, sql, tables, request):
ip_ = request.getfixturevalue(ip)
ip_.run_cell(config)

out = ip_empty_testing.run_cell(sql)
out_tables = ip_empty_testing.run_cell("%sqlcmd tables")
ip_.run_cell("%sql CREATE TABLE weather (city VARCHAR,);")
ip_.run_cell("%sql INSERT INTO weather VALUES ('NYC');")
ip_.run_cell("%sql SELECT * FROM weather;")

assert out.error_in_exec is None
out = ip_.run_cell(sql)

if config == "%config SqlMagic.autopandas = True":
assert out.result.to_dict() == {"city": {0: "NYC"}}
else:
assert out.result.dict() == {"city": ("NYC",)}

assert set(tables) == set(r[0] for r in out_tables.result._table.rows)
if ip == "ip_duckdb_sqlalchemy_empty":
out_tables = ip_.run_cell("%sqlcmd tables")
assert set(tables) == set(r[0] for r in out_tables.result._table.rows)


# TODO: make this an integration test, also try with native connections
# @pytest.mark.parametrize(
# "config",
# [
# "%config SqlMagic.autopandas = True",
# "%config SqlMagic.autopandas = False",
# ],
# ids=[
# "autopandas_on",
# "autopandas_off",
# ],
# )
# @pytest.mark.parametrize(
# "sql, tables",
# [
# [
# (
# "%sql CREATE TEMP TABLE some_table (city VARCHAR,);"
# "CREATE TABLE more_names (city VARCHAR,);"
# "INSERT INTO some_table VALUES ('NYC');"
# "SELECT * FROM some_table;"
# ),
# ["more_names"],
# ],
# ],
# ids=[
# "multiple_selects",
# ],
# )
# def test_tmp_table(ip_empty_testing, config, sql, tables):
# ip_empty_testing.run_cell("%sql duckdb://")
# ip_empty_testing.run_cell(config)

# out = ip_empty_testing.run_cell(sql)
# out_tables = ip_empty_testing.run_cell("%sqlcmd tables")

# if config == "%config SqlMagic.autopandas = True":
# assert out.result.to_dict() == {"city": {0: "NYC"}}
# else:
# assert out.result.dict() == {"city": ("NYC",)}

# assert set(tables) == set(r[0] for r in out_tables.result._table.rows)


def test_dataframe_returned_only_if_last_statement_is_select(ip_empty):
ip_empty.run_cell("%sql duckdb://")
ip_empty.run_cell("%config SqlMagic.autopandas=True")
connection.Connection.connections["duckdb://"].engine.raw_connection = Mock(
side_effect=ValueError("some error")
)
@pytest.mark.parametrize(
"config",
[
"%config SqlMagic.autopandas = True",
"%config SqlMagic.autopandas = False",
],
ids=[
"autopandas_on",
"autopandas_off",
],
)
@pytest.mark.parametrize(
"sql, tables",
[
[
(
"%sql CREATE TEMP TABLE some_table (city VARCHAR,);"
"CREATE TABLE more_names (city VARCHAR,);"
"INSERT INTO some_table VALUES ('NYC');"
"SELECT * FROM some_table;"
),
["more_names"],
],
],
ids=[
"multiple_selects",
],
)
@pytest.mark.parametrize(
"ip",
[
pytest.param(
"ip_duckdb_native_empty",
marks=pytest.mark.xfail(
reason="Currently, native DuckDB runs each "
"statement in a separate cursor"
),
),
"ip_duckdb_sqlalchemy_empty",
],
)
def test_tmp_table(ip, config, sql, tables, request):
ip = request.getfixturevalue(ip)
ip.run_cell(config)

out = ip_empty.run_cell(
"%sql CREATE TABLE a (c VARCHAR,); CREATE TABLE b (c VARCHAR,);"
)
out = ip.run_cell(sql)
out_tables = ip.run_cell("%sqlcmd tables")

assert out.error_in_exec is None
if config == "%config SqlMagic.autopandas = True":
assert out.result.to_dict() == {"city": {0: "NYC"}}
else:
assert out.result.dict() == {"city": ("NYC",)}

assert set(tables) == set(r[0] for r in out_tables.result._table.rows)


@pytest.mark.parametrize(
"ip",
[
"ip_duckdb_native_empty",
"ip_duckdb_sqlalchemy_empty",
],
)
def test_empty_data_frame_if_last_statement_is_not_select(ip, request):
ip = request.getfixturevalue(ip)
ip.run_cell("%config SqlMagic.autopandas=True")
out = ip.run_cell("%sql CREATE TABLE a (c VARCHAR,); CREATE TABLE b (c VARCHAR,);")
assert len(out.result) == 0


@pytest.mark.parametrize(
Expand All @@ -262,48 +292,20 @@ def test_dataframe_returned_only_if_last_statement_is_select(ip_empty):
""",
],
)
def test_commits_all_statements(ip_empty, sql):
ip_empty.run_cell("%sql duckdb://")
out = ip_empty.run_cell(sql)
@pytest.mark.parametrize(
"ip",
[
"ip_duckdb_native_empty",
"ip_duckdb_sqlalchemy_empty",
],
)
def test_commits_all_statements(ip, sql, request):
ip = request.getfixturevalue(ip)
out = ip.run_cell(sql)
assert out.error_in_exec is None
assert out.result.dict() == {"x": (1, 2)}


def test_resultset_uses_native_duckdb_df(ip_empty):
from sqlalchemy import create_engine
from sql.connection import Connection

engine = create_engine("duckdb://")
session = engine.connect()

session.execute(text("CREATE TABLE a (x INT,);"))
session.execute(text("INSERT INTO a(x) VALUES (10),(20),(30);"))

sql = text("SELECT * FROM a")

# this breaks if there's an open results set
session.execute(sql).fetchall()

results = session.execute(sql)

Connection.set(engine, displaycon=False)

results.fetchmany = Mock(wraps=results.fetchmany)

mock = Mock()
mock.displaylimit = 1
mock.autolimit = 0

result_set = ResultSet(results, mock, statement=sql, conn=Mock())

df = result_set.DataFrame()

assert isinstance(df, pd.DataFrame)
assert df.to_dict() == {"x": {0: 10, 1: 20, 2: 30}}

results.fetchmany.assert_called_once_with(size=2)


@pytest.mark.parametrize("method", ["DataFrame", "PolarsDataFrame"])
def test_warn_when_using_sqlalchemy_and_converting_to_dataframe(ip_empty, method):
ip_empty.run_cell("%sql duckdb://")
Expand Down

0 comments on commit 0635687

Please sign in to comment.