Skip to content

Commit

Permalink
feat(databricks): add the databricks backend
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Sep 28, 2024
1 parent 4b34a61 commit d229f75
Show file tree
Hide file tree
Showing 53 changed files with 1,531 additions and 163 deletions.
19 changes: 19 additions & 0 deletions .github/workflows/ibis-backends-cloud.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ jobs:
- name: install poetry
run: pip install 'poetry==1.8.3'

- name: remove databricks arrow and numpy upper bounds
if: matrix.backend.name != 'databricks'
run: |
poetry remove databricks-sql-connector
poetry update numpy pyarrow
- name: install additional deps
if: matrix.backend.key == 'snowpark'
run: poetry add snowflake-snowpark-python --python="==${{ steps.install_python.outputs.python-version }}"
Expand All @@ -120,6 +126,19 @@ jobs:
with:
credentials_json: ${{ secrets.GCP_CREDENTIALS }}

- name: setup databricks credentials
if: matrix.backend.name == 'databricks'
run: |
{
echo "DATABRICKS_HTTP_PATH=${DATABRICKS_HTTP_PATH}"
echo "DATABRICKS_SERVER_HOSTNAME=${DATABRICKS_SERVER_HOSTNAME}"
echo "DATABRICKS_TOKEN=${DATABRICKS_TOKEN}"
} >> "$GITHUB_ENV"
env:
DATABRICKS_HTTP_PATH: ${{ secrets.DATABRICKS_HTTP_PATH }}
DATABRICKS_SERVER_HOSTNAME: ${{ secrets.DATABRICKS_SERVER_HOSTNAME }}
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}

- name: setup snowflake credentials
if: matrix.backend.name == 'snowflake'
run: |
Expand Down
12 changes: 8 additions & 4 deletions .github/workflows/ibis-backends.yml
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,11 @@ jobs:
- name: install poetry
run: pip install 'poetry==1.8.3'

- name: remove databricks arrow and numpy upper bounds
run: |
poetry remove databricks-sql-connector
poetry update numpy pyarrow
- name: install ibis
run: poetry install --without dev --without docs --extras "${{ join(matrix.backend.extras, ' ') }} examples"

Expand Down Expand Up @@ -499,8 +504,7 @@ jobs:

- name: check that no untracked files were produced
shell: bash
run: |
! git status --porcelain | tee /dev/stderr | grep .
run: git checkout poetry.lock pyproject.toml && ! git status --porcelain | tee /dev/stderr | grep .

- name: upload code coverage
if: success()
Expand Down Expand Up @@ -609,7 +613,7 @@ jobs:

- name: remove incompatible deps
# it requires a version of pandas that min versions are not compatible with
run: poetry remove lonboard deltalake
run: poetry remove lonboard deltalake databricks-sql-connector

- name: install minimum versions of required deps
run: poetry add --lock ${{ join(matrix.backend.deps.required, ' ') }} --python="==${{ steps.install_python.outputs.python-version }}"
Expand Down Expand Up @@ -715,7 +719,7 @@ jobs:

- name: remove lonboard
# it requires a version of pandas that pyspark is not compatible with
run: poetry remove lonboard
run: poetry remove lonboard databricks-sql-connector

- name: install exact versions of pyspark, pandas and numpy
run: poetry add --lock 'pyspark@${{ matrix.pyspark-version }}' ${{ join(matrix.deps, ' ') }}
Expand Down
63 changes: 63 additions & 0 deletions ci/schema/databricks.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
CREATE VIEW IF NOT EXISTS diamonds AS
SELECT * FROM parquet.`/Volumes/ibis_testing/default/testing_data/parquet/diamonds.parquet`;

CREATE VIEW IF NOT EXISTS batting AS
SELECT * FROM parquet.`/Volumes/ibis_testing/default/testing_data/parquet/batting.parquet`;

CREATE VIEW IF NOT EXISTS awards_players AS
SELECT * FROM parquet.`/Volumes/ibis_testing/default/testing_data/parquet/awards_players.parquet`;

CREATE VIEW IF NOT EXISTS functional_alltypes AS
SELECT * FROM parquet.`/Volumes/ibis_testing/default/testing_data/parquet/functional_alltypes.parquet`;

CREATE VIEW IF NOT EXISTS astronauts AS
SELECT * FROM parquet.`/Volumes/ibis_testing/default/testing_data/parquet/astronauts.parquet`;

CREATE TABLE IF NOT EXISTS `array_types` AS
VALUES (ARRAY(CAST(1 AS BIGINT), 2, 3), ARRAY('a', 'b', 'c'), ARRAY(1.0, 2.0, 3.0), 'a', 1.0, ARRAY(ARRAY(), ARRAY(CAST(1 AS BIGINT), 2, 3), NULL)),
(ARRAY(4, 5), ARRAY('d', 'e'), ARRAY(4.0, 5.0), 'a', 2.0, ARRAY()),
(ARRAY(6, NULL), ARRAY('f', NULL), ARRAY(6.0, NULL), 'a', 3.0, ARRAY(NULL, ARRAY(), NULL)),
(ARRAY(NULL, 1, NULL), ARRAY(NULL, 'a', NULL), ARRAY(), 'b', 4.0, ARRAY(ARRAY(1), ARRAY(2), ARRAY(), ARRAY(3, 4, 5))),
(ARRAY(2, NULL, 3), ARRAY('b', NULL, 'c'), NULL, 'b', 5.0, NULL),
(ARRAY(4, NULL, NULL, 5), ARRAY('d', NULL, NULL, 'e'), ARRAY(4.0, NULL, NULL, 5.0), 'c', 6.0, ARRAY(ARRAY(1, 2, 3)))
AS (`x`, `y`, `z`, `grouper`, `scalar_column`, `multi_dim`);

CREATE TABLE IF NOT EXISTS `map` AS
VALUES (CAST(1 AS BIGINT), map('a', CAST(1 AS BIGINT), 'b', 2, 'c', 3)),
(2, map('d', 4, 'e', 5, 'f', 6)) AS (`idx`, `kv`);

CREATE TABLE IF NOT EXISTS `struct` AS
VALUES (named_struct('a', 1.0, 'b', 'banana', 'c', CAST(2 AS BIGINT))),
(named_struct('a', 2.0, 'b', 'apple', 'c', 3)),
(named_struct('a', 3.0, 'b', 'orange', 'c', 4)),
(named_struct('a', NULL, 'b', 'banana', 'c', 2)),
(named_struct('a', 2.0, 'b', NULL, 'c', 3)),
(NULL),
(named_struct('a', 3.0, 'b', 'orange', 'c', NULL)) AS (`abc`);

CREATE TABLE IF NOT EXISTS `json_t` AS
VALUES (CAST(1 AS BIGINT), parse_json('{"a": [1,2,3,4], "b": 1}')),
(2, parse_json('{"a":null,"b":2}')),
(3, parse_json('{"a":"foo", "c":null}')),
(4, parse_json('null')),
(5, parse_json('[42,47,55]')),
(6, parse_json('[]')),
(7, parse_json('"a"')),
(8, parse_json('""')),
(9, parse_json('"b"')),
(10, NULL),
(11, parse_json('true')),
(12, parse_json('false')),
(13, parse_json('42')),
(14, parse_json('37.37')) AS (`rowid`, `js`);

CREATE TABLE IF NOT EXISTS `win` AS
VALUES
('a', CAST(0 AS BIGINT), CAST(3 AS BIGINT)),
('a', 1, 2),
('a', 2, 0),
('a', 3, 1),
('a', 4, 1) AS (`g`, `x`, `y`);

CREATE TABLE IF NOT EXISTS `topk` AS
VALUES (CAST(1 AS BIGINT)), (1), (NULL) AS (`x`);
17 changes: 6 additions & 11 deletions ibis/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,6 @@ class BaseBackend(abc.ABC, _FileIOHandler, CacheHandler):

supports_temporary_tables = False
supports_python_udfs = False
supports_in_memory_tables = True

def __init__(self, *args, **kwargs):
self._con_args: tuple[Any] = args
Expand Down Expand Up @@ -1083,23 +1082,19 @@ def _register_in_memory_tables(self, expr: ir.Expr) -> None:
memtable, self._finalize_in_memory_table, memtable.name
)

@abc.abstractmethod
def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
if self.supports_in_memory_tables:
raise NotImplementedError(
f"{self.name} must implement `_register_in_memory_table` to support in-memory tables"
)
"""Register an in-memory table associated with `op`."""

@abc.abstractmethod
def _finalize_memtable(self, name: str) -> None:
"""Clean up a memtable named `name`."""

def _finalize_in_memory_table(self, name: str) -> None:
"""Wrap `_finalize_memtable` to suppress exceptions."""
with contextlib.suppress(Exception):
self._finalize_memtable(name)

def _finalize_memtable(self, name: str) -> None:
if self.supports_in_memory_tables:
raise NotImplementedError(
f"{self.name} must implement `_finalize_memtable` to support in-memory tables"
)

def _run_pre_execute_hooks(self, expr: ir.Expr) -> None:
"""Backend-specific hooks to run before an expression is executed."""
self._register_udfs(expr)
Expand Down
Loading

0 comments on commit d229f75

Please sign in to comment.