From 0635687c0a4a8dc247016c665e51f0e7e1dbf684 Mon Sep 17 00:00:00 2001 From: Eduardo Blancas Date: Thu, 13 Jul 2023 17:40:53 -0600 Subject: [PATCH] testing duckdb with sqlalchemy and dbapi connections --- src/tests/integration/test_duckDB.py | 252 ++++++++++++++------------- 1 file changed, 127 insertions(+), 125 deletions(-) diff --git a/src/tests/integration/test_duckDB.py b/src/tests/integration/test_duckDB.py index a07fdaaa1..1962e84f0 100644 --- a/src/tests/integration/test_duckDB.py +++ b/src/tests/integration/test_duckDB.py @@ -2,24 +2,29 @@ import logging import pytest -from sqlalchemy import text import polars as pl import pandas as pd from sql.connection import Connection -from sql.run import ResultSet -from sql import connection from sql.warnings import JupySQLDataFramePerformanceWarning +# TODO: reconcile the fixtures, we have other fixtures in conftest.py @pytest.fixture -def ip_duckdb_native(ip_empty_testing): - ip_empty_testing.run_cell("import duckdb; conn = duckdb.connect()") +def ip_duckdb_native_empty(tmp_empty, ip_empty_testing): + ip_empty_testing.run_cell("import duckdb; conn = duckdb.connect('my.db')") ip_empty_testing.run_cell("%sql conn --alias duck") yield ip_empty_testing ip_empty_testing.run_cell("conn.close()") +@pytest.fixture +def ip_duckdb_sqlalchemy_empty(tmp_empty, ip_empty_testing): + ip_empty_testing.run_cell("%sql duckdb:///my.db --alias duckdb") + yield ip_empty_testing + ip_empty_testing.run_cell("%sql --close duckdb") + + @pytest.mark.parametrize( "ip, exp", [ @@ -67,7 +72,7 @@ def test_auto_commit_mode_off(ip, caplog, request): assert any("weather" == table[0] for table in tables_out) -def test_dbapi_connection_sets_right_dialect(ip_duckdb_native): +def test_dbapi_connection_sets_right_dialect(ip_with_duckDB_native): assert Connection.current.is_dbapi_connection() assert Connection.current.dialect == "duckdb" @@ -79,24 +84,28 @@ def test_dbapi_connection_sets_right_dialect(ip_duckdb_native): ("PolarsDataFrame", pl.DataFrame, "pl"), ], ) -def test_converts_to_data_frames_natively( +def test_native_connection_converts_to_data_frames_natively( monkeypatch, - ip_duckdb_native, + ip_duckdb_native_empty, method, expected_type, expected_native_method, ): - ip_duckdb_native.run_cell("%sql CREATE TABLE weather (city VARCHAR, temp_lo INT);") - ip_duckdb_native.run_cell("%sql INSERT INTO weather VALUES ('San Francisco', 46);") - ip_duckdb_native.run_cell("%sql INSERT INTO weather VALUES ('NYC', 20);") - ip_duckdb_native.run_cell("results = %sql SELECT * FROM weather") + ip_duckdb_native_empty.run_cell( + "%sql CREATE TABLE weather (city VARCHAR, temp_lo INT);" + ) + ip_duckdb_native_empty.run_cell( + "%sql INSERT INTO weather VALUES ('San Francisco', 46);" + ) + ip_duckdb_native_empty.run_cell("%sql INSERT INTO weather VALUES ('NYC', 20);") + ip_duckdb_native_empty.run_cell("results = %sql SELECT * FROM weather") - results = ip_duckdb_native.run_cell("results").result + results = ip_duckdb_native_empty.run_cell("results").result mock = Mock(wraps=results.sqlaproxy) monkeypatch.setattr(results, "_sqlaproxy", mock) - out = ip_duckdb_native.run_cell(f"results.{method}()") + out = ip_duckdb_native_empty.run_cell(f"results.{method}()") mock.execute.assert_called_once_with("SELECT * FROM weather") getattr(mock, expected_native_method).assert_called_once_with() @@ -115,21 +124,24 @@ def test_converts_to_data_frames_natively( "autopolars_on", ], ) -def test_auto_data_frame_config( - ip_duckdb_native, +def test_convert_to_dataframe_automatically( + ip_duckdb_native_empty, conversion_cell, expected_type, ): - ip_duckdb_native.run_cell(conversion_cell) - ip_duckdb_native.run_cell("%sql CREATE TABLE weather (city VARCHAR, temp_lo INT);") - ip_duckdb_native.run_cell("%sql INSERT INTO weather VALUES ('San Francisco', 46);") - ip_duckdb_native.run_cell("%sql INSERT INTO weather VALUES ('NYC', 20);") - df = ip_duckdb_native.run_cell("%sql SELECT * FROM weather").result + ip_duckdb_native_empty.run_cell(conversion_cell) + ip_duckdb_native_empty.run_cell( + "%sql CREATE TABLE weather (city VARCHAR, temp_lo INT);" + ) + ip_duckdb_native_empty.run_cell( + "%sql INSERT INTO weather VALUES ('San Francisco', 46);" + ) + ip_duckdb_native_empty.run_cell("%sql INSERT INTO weather VALUES ('NYC', 20);") + df = ip_duckdb_native_empty.run_cell("%sql SELECT * FROM weather").result assert isinstance(df, expected_type) assert df.shape == (2, 2) -# TODO: make this an integration test, also try with native connections @pytest.mark.parametrize( "config", [ @@ -165,83 +177,101 @@ def test_auto_data_frame_config( "multiple_tables_created", ], ) -def test_multiple_statements(ip_empty_testing, config, sql, tables): - ip_empty_testing.run_cell("%sql duckdb://") - ip_empty_testing.run_cell(config) - - ip_empty_testing.run_cell("%sql CREATE TABLE weather (city VARCHAR,);") - ip_empty_testing.run_cell("%sql INSERT INTO weather VALUES ('NYC');") - ip_empty_testing.run_cell("%sql SELECT * FROM weather;") +@pytest.mark.parametrize( + "ip", + [ + "ip_duckdb_native_empty", + "ip_duckdb_sqlalchemy_empty", + ], +) +def test_multiple_statements(ip, config, sql, tables, request): + ip_ = request.getfixturevalue(ip) + ip_.run_cell(config) - out = ip_empty_testing.run_cell(sql) - out_tables = ip_empty_testing.run_cell("%sqlcmd tables") + ip_.run_cell("%sql CREATE TABLE weather (city VARCHAR,);") + ip_.run_cell("%sql INSERT INTO weather VALUES ('NYC');") + ip_.run_cell("%sql SELECT * FROM weather;") - assert out.error_in_exec is None + out = ip_.run_cell(sql) if config == "%config SqlMagic.autopandas = True": assert out.result.to_dict() == {"city": {0: "NYC"}} else: assert out.result.dict() == {"city": ("NYC",)} - assert set(tables) == set(r[0] for r in out_tables.result._table.rows) + if ip == "ip_duckdb_sqlalchemy_empty": + out_tables = ip_.run_cell("%sqlcmd tables") + assert set(tables) == set(r[0] for r in out_tables.result._table.rows) -# TODO: make this an integration test, also try with native connections -# @pytest.mark.parametrize( -# "config", -# [ -# "%config SqlMagic.autopandas = True", -# "%config SqlMagic.autopandas = False", -# ], -# ids=[ -# "autopandas_on", -# "autopandas_off", -# ], -# ) -# @pytest.mark.parametrize( -# "sql, tables", -# [ -# [ -# ( -# "%sql CREATE TEMP TABLE some_table (city VARCHAR,);" -# "CREATE TABLE more_names (city VARCHAR,);" -# "INSERT INTO some_table VALUES ('NYC');" -# "SELECT * FROM some_table;" -# ), -# ["more_names"], -# ], -# ], -# ids=[ -# "multiple_selects", -# ], -# ) -# def test_tmp_table(ip_empty_testing, config, sql, tables): -# ip_empty_testing.run_cell("%sql duckdb://") -# ip_empty_testing.run_cell(config) - -# out = ip_empty_testing.run_cell(sql) -# out_tables = ip_empty_testing.run_cell("%sqlcmd tables") - -# if config == "%config SqlMagic.autopandas = True": -# assert out.result.to_dict() == {"city": {0: "NYC"}} -# else: -# assert out.result.dict() == {"city": ("NYC",)} - -# assert set(tables) == set(r[0] for r in out_tables.result._table.rows) - - -def test_dataframe_returned_only_if_last_statement_is_select(ip_empty): - ip_empty.run_cell("%sql duckdb://") - ip_empty.run_cell("%config SqlMagic.autopandas=True") - connection.Connection.connections["duckdb://"].engine.raw_connection = Mock( - side_effect=ValueError("some error") - ) +@pytest.mark.parametrize( + "config", + [ + "%config SqlMagic.autopandas = True", + "%config SqlMagic.autopandas = False", + ], + ids=[ + "autopandas_on", + "autopandas_off", + ], +) +@pytest.mark.parametrize( + "sql, tables", + [ + [ + ( + "%sql CREATE TEMP TABLE some_table (city VARCHAR,);" + "CREATE TABLE more_names (city VARCHAR,);" + "INSERT INTO some_table VALUES ('NYC');" + "SELECT * FROM some_table;" + ), + ["more_names"], + ], + ], + ids=[ + "multiple_selects", + ], +) +@pytest.mark.parametrize( + "ip", + [ + pytest.param( + "ip_duckdb_native_empty", + marks=pytest.mark.xfail( + reason="Currently, native DuckDB runs each " + "statement in a separate cursor" + ), + ), + "ip_duckdb_sqlalchemy_empty", + ], +) +def test_tmp_table(ip, config, sql, tables, request): + ip = request.getfixturevalue(ip) + ip.run_cell(config) - out = ip_empty.run_cell( - "%sql CREATE TABLE a (c VARCHAR,); CREATE TABLE b (c VARCHAR,);" - ) + out = ip.run_cell(sql) + out_tables = ip.run_cell("%sqlcmd tables") - assert out.error_in_exec is None + if config == "%config SqlMagic.autopandas = True": + assert out.result.to_dict() == {"city": {0: "NYC"}} + else: + assert out.result.dict() == {"city": ("NYC",)} + + assert set(tables) == set(r[0] for r in out_tables.result._table.rows) + + +@pytest.mark.parametrize( + "ip", + [ + "ip_duckdb_native_empty", + "ip_duckdb_sqlalchemy_empty", + ], +) +def test_empty_data_frame_if_last_statement_is_not_select(ip, request): + ip = request.getfixturevalue(ip) + ip.run_cell("%config SqlMagic.autopandas=True") + out = ip.run_cell("%sql CREATE TABLE a (c VARCHAR,); CREATE TABLE b (c VARCHAR,);") + assert len(out.result) == 0 @pytest.mark.parametrize( @@ -262,48 +292,20 @@ def test_dataframe_returned_only_if_last_statement_is_select(ip_empty): """, ], ) -def test_commits_all_statements(ip_empty, sql): - ip_empty.run_cell("%sql duckdb://") - out = ip_empty.run_cell(sql) +@pytest.mark.parametrize( + "ip", + [ + "ip_duckdb_native_empty", + "ip_duckdb_sqlalchemy_empty", + ], +) +def test_commits_all_statements(ip, sql, request): + ip = request.getfixturevalue(ip) + out = ip.run_cell(sql) assert out.error_in_exec is None assert out.result.dict() == {"x": (1, 2)} -def test_resultset_uses_native_duckdb_df(ip_empty): - from sqlalchemy import create_engine - from sql.connection import Connection - - engine = create_engine("duckdb://") - session = engine.connect() - - session.execute(text("CREATE TABLE a (x INT,);")) - session.execute(text("INSERT INTO a(x) VALUES (10),(20),(30);")) - - sql = text("SELECT * FROM a") - - # this breaks if there's an open results set - session.execute(sql).fetchall() - - results = session.execute(sql) - - Connection.set(engine, displaycon=False) - - results.fetchmany = Mock(wraps=results.fetchmany) - - mock = Mock() - mock.displaylimit = 1 - mock.autolimit = 0 - - result_set = ResultSet(results, mock, statement=sql, conn=Mock()) - - df = result_set.DataFrame() - - assert isinstance(df, pd.DataFrame) - assert df.to_dict() == {"x": {0: 10, 1: 20, 2: 30}} - - results.fetchmany.assert_called_once_with(size=2) - - @pytest.mark.parametrize("method", ["DataFrame", "PolarsDataFrame"]) def test_warn_when_using_sqlalchemy_and_converting_to_dataframe(ip_empty, method): ip_empty.run_cell("%sql duckdb://")