Skip to content

Commit

Permalink
Merge pull request #80 from pdet/main
Browse files Browse the repository at this point in the history
Fix R CI. Add Substrait-ADBC Test
  • Loading branch information
pdet authored Apr 16, 2024
2 parents 1116fb5 + 5ce4f6e commit 26bd22a
Show file tree
Hide file tree
Showing 8 changed files with 175 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
- name: Install Python Dependencies
shell: bash
run: |
pip install pytest pandas "ibis-framework[duckdb]==3.2.0" "ibis-substrait==2.21.1" "substrait-validator==0.0.11"
pip install pytest pandas substrait adbc_driver_manager "ibis-framework[duckdb]==3.2.0" "ibis-substrait==2.21.1" "substrait-validator==0.0.11"
pip uninstall protobuf -y
pip install --no-binary protobuf protobuf
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ jobs:
runs-on: ubuntu-20.04
env:
GEN: ninja
DUCKDB_PATH: ../duckdb/

steps:
- uses: r-lib/actions/setup-r@v2
with:
Expand Down
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 1993 files
2 changes: 1 addition & 1 deletion duckdb-r
Submodule duckdb-r updated 1527 files
4 changes: 2 additions & 2 deletions src/to_substrait.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -780,7 +780,7 @@ substrait::Rel *DuckDBToSubstrait::TransformLimit(LogicalOperator &dop) {
idx_t limit_val;
idx_t offset_val;

switch(dlimit.limit_val.Type()) {
switch (dlimit.limit_val.Type()) {
case LimitNodeType::CONSTANT_VALUE:
limit_val = dlimit.limit_val.GetConstantValue();
break;
Expand All @@ -790,7 +790,7 @@ substrait::Rel *DuckDBToSubstrait::TransformLimit(LogicalOperator &dop) {
default:
throw InternalException("Unsupported limit value type");
}
switch(dlimit.offset_val.Type()) {
switch (dlimit.offset_val.Type()) {
case LimitNodeType::CONSTANT_VALUE:
offset_val = dlimit.offset_val.GetConstantValue();
break;
Expand Down
Binary file added test/python/data/somefile.parquet
Binary file not shown.
167 changes: 167 additions & 0 deletions test/python/test_adbc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
import duckdb
import pytest
import sys
import datetime
import os
from os.path import abspath, join, dirname, normpath
import glob


adbc_driver_manager = pytest.importorskip("adbc_driver_manager.dbapi")
adbc_driver_manager_lib = pytest.importorskip("adbc_driver_manager._lib")
json_format = pytest.importorskip("google.protobuf.json_format")
plan_pb2 = pytest.importorskip("substrait.gen.proto.plan_pb2")
pyarrow = pytest.importorskip("pyarrow")

# When testing local, if you build via BUILD_PYTHON=1 make, you need to manually set up the
# dylib duckdb path.
driver_path = duckdb.duckdb.__file__

def find_substrait():
# Paths to search for extensions
build = normpath(join(dirname(__file__), "../../duckdb/build/"))
extension = "extension/*/*.duckdb_extension"

extension_search_patterns = [
join(build, "release", extension),
join(build, "debug", extension),
]

# DUCKDB_PYTHON_TEST_EXTENSION_PATH can be used to add a path for the extension test to search for extensions
if 'DUCKDB_PYTHON_TEST_EXTENSION_PATH' in os.environ:
env_extension_path = os.getenv('DUCKDB_PYTHON_TEST_EXTENSION_PATH')
env_extension_path = env_extension_path.rstrip('/')
extension_search_patterns.append(env_extension_path + '/*/*.duckdb_extension')
extension_search_patterns.append(env_extension_path + '/*.duckdb_extension')

extension_paths_found = []
for pattern in extension_search_patterns:
extension_pattern_abs = abspath(pattern)
for path in glob.glob(extension_pattern_abs):
extension_paths_found.append(path)

for path in extension_paths_found:
if path.endswith("substrait.duckdb_extension"):
return path
pytest.skip(f'could not load substrait')

return "Fail"


@pytest.fixture
def duck_conn():
with adbc_driver_manager.connect(driver=driver_path, entrypoint="duckdb_adbc_init", db_kwargs={"allow_unsigned_extensions": "true"}) as conn:
yield conn

file_path = os.path.dirname(os.path.abspath(__file__))
file_path = os.path.join(file_path,'data','somefile.parquet')

PLAN_PROTOTEXT = '''{
"relations":[
{
"root":{
"input":{
"project":{
"input":{
"read":{
"baseSchema":{
"names":[
"mbid",
"artist_mb"
],
"struct":{
"types":[
{
"string":{
"nullability":"NULLABILITY_NULLABLE"
}
},
{
"string":{
"nullability":"NULLABILITY_NULLABLE"
}
}
],
"nullability":"NULLABILITY_REQUIRED"
}
},
"projection":{
"select":{
"structItems":[
{
},
{
"field":1
}
]
},
"maintainSingularStruct":true
},
"localFiles":{
"items":[
{
"uriFile":"''' + file_path + '''",
"parquet":{
}
}
]
}
}
},
"expressions":[
{
"selection":{
"directReference":{
"structField":{
}
},
"rootReference":{
}
}
},
{
"selection":{
"directReference":{
"structField":{
"field":1
}
},
"rootReference":{
}
}
}
]
}
},
"names":[
"mbid",
"artist_mb"
]
}
}
],
"version":{
"minorNumber":39,
"producer":"DuckDB"
}
}'''

def test_substrait_over_adbc(duck_conn):
plan = json_format.Parse(PLAN_PROTOTEXT, plan_pb2.Plan())
cur = duck_conn.cursor()
substrait_path = find_substrait()
cur.execute("LOAD '"+ substrait_path + "'")

plan_data = plan.SerializeToString()
cur.execute(plan_data)
result_table = cur.fetch_arrow_table()
correct_table = pyarrow.Table.from_pydict({
'mbid': pyarrow.array(["1"], type=pyarrow.string()),
'artist_mb': pyarrow.array(["Tenacious D"], type=pyarrow.string())
})
assert result_table.equals(correct_table)

0 comments on commit 26bd22a

Please sign in to comment.