Skip to content

Commit

Permalink
1.7.0 RC1 (#479)
Browse files Browse the repository at this point in the history
Ensures compatibility with dbt-spark 1.7.0rc1
  • Loading branch information
benc-db authored Oct 13, 2023
2 parents 29b4c20 + 232720b commit 6690579
Show file tree
Hide file tree
Showing 11 changed files with 713 additions and 53 deletions.
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
## dbt-databricks 1.7.x (TBD)

## dbt-databricks 1.7.0rc1 (October 13, 2023)

### Fixes

- Fixed a bug where setting a primary key constraint before a null constraint would fail by ensuring null constraints happen first ([479](https://github.com/databricks/dbt-databricks/pull/479))
- Foreign key constraints now work with dbt's constraint structure ([479](https://github.com/databricks/dbt-databricks/pull/479))

### Under the Hood

- Compatibility with dbt-spark 1.7.0b2 ([467](https://github.com/databricks/dbt-databricks/pull/467))
- Compatibility with dbt-spark 1.7.0rc1 ([479](https://github.com/databricks/dbt-databricks/pull/479))

## dbt-databricks 1.6.6 (October 9, 2023)

Expand Down
4 changes: 3 additions & 1 deletion dbt/adapters/databricks/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,9 @@ def parse_columns_from_information( # type: ignore[override]
columns.append(column)
return columns

def get_catalog(self, manifest: Manifest) -> Tuple[Table, List[Exception]]:
def get_catalog(
self, manifest: Manifest, selected_nodes: Optional[Set] = None
) -> Tuple[Table, List[Exception]]:
schema_map = self._get_catalog_schemas(manifest)

with executor(self.config) as tpe:
Expand Down
63 changes: 34 additions & 29 deletions dbt/include/databricks/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,8 @@

{% macro get_constraints_sql(relation, constraints, model, column) %}
{% set statements = [] %}
{% for constraint in constraints %}
-- Hack so that not null constraints will be applied before primary key constraints
{% for constraint in constraints|sort(attribute='type') %}
{% if constraint %}
{% set constraint_statements = get_constraint_sql(relation, constraint, model, column) %}
{% for statement in constraint_statements %}
Expand Down Expand Up @@ -289,40 +290,44 @@
{{ exceptions.warn("unenforced constraint type: " ~ constraint.type)}}
{% endif %}

{% set column_names = constraint.get("columns", []) %}
{% if column and not column_names %}
{% set column_names = [column['name']] %}
{% endif %}
{% set quoted_names = [] %}
{% for column_name in column_names %}
{% set column = model.get('columns', {}).get(column_name) %}
{% if not column %}
{{ exceptions.warn('Invalid foreign key column: ' ~ column_name) }}
{% else %}
{% set quoted_name = adapter.quote(column['name']) if column['quote'] else column['name'] %}
{% do quoted_names.append(quoted_name) %}
{% endif %}
{% endfor %}

{% set joined_names = quoted_names|join(", ") %}

{% set name = constraint.get("name") %}
{% if not name and local_md5 %}
{{ exceptions.warn("Constraint of type " ~ type ~ " with no `name` provided. Generating hash instead.") }}
{%- set name = local_md5("primary_key;" ~ column_names ~ ";") -%}
{% endif %}

{% set parent = constraint.get("parent") %}
{% if not parent %}
{{ exceptions.raise_compiler_error('No parent table defined for foreign key: ' ~ expression) }}
{% endif %}
{% if not "." in parent %}
{% set parent = relation.schema ~ "." ~ parent%}
{% endif %}
{% set stmt = "alter table " ~ relation ~ " add constraint " ~ name ~ " foreign key(" ~ joined_names ~ ") references " ~ parent %}
{% set parent_columns = constraint.get("parent_columns") %}
{% if parent_columns %}
{% set stmt = stmt ~ "(" ~ parent_columns|join(", ") ~ ")"%}
{% if constraint.get('expression') %}
{% set stmt = "alter table " ~ relation ~ " add constraint " ~ name ~ " foreign key" ~ constraint.get('expression') %}
{% else %}
{% set column_names = constraint.get("columns", []) %}
{% if column and not column_names %}
{% set column_names = [column['name']] %}
{% endif %}
{% set quoted_names = [] %}
{% for column_name in column_names %}
{% set column = model.get('columns', {}).get(column_name) %}
{% if not column %}
{{ exceptions.warn('Invalid foreign key column: ' ~ column_name) }}
{% else %}
{% set quoted_name = adapter.quote(column['name']) if column['quote'] else column['name'] %}
{% do quoted_names.append(quoted_name) %}
{% endif %}
{% endfor %}

{% set joined_names = quoted_names|join(", ") %}

{% set parent = constraint.get("parent") %}
{% if not parent %}
{{ exceptions.raise_compiler_error('No parent table defined for foreign key: ' ~ expression) }}
{% endif %}
{% if not "." in parent %}
{% set parent = relation.schema ~ "." ~ parent%}
{% endif %}
{% set stmt = "alter table " ~ relation ~ " add constraint " ~ name ~ " foreign key(" ~ joined_names ~ ") references " ~ parent %}
{% set parent_columns = constraint.get("parent_columns") %}
{% if parent_columns %}
{% set stmt = stmt ~ "(" ~ parent_columns|join(", ") ~ ")"%}
{% endif %}
{% endif %}
{% set stmt = stmt ~ ";" %}
{% do statements.append(stmt) %}
Expand Down
4 changes: 2 additions & 2 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ tox>=3.2.0
types-requests
types-mock

dbt-core==1.7.0b2
dbt-tests-adapter==1.7.0b2
dbt-core==1.7.0rc1
dbt-tests-adapter==1.7.0rc1
# git+https://github.com/dbt-labs/[email protected]#egg=dbt-spark
# git+https://github.com/dbt-labs/[email protected]#egg=dbt-core&subdirectory=core
# git+https://github.com/dbt-labs/[email protected]#egg=dbt-tests-adapter&subdirectory=tests/adapter
57 changes: 41 additions & 16 deletions docs/databricks-dbt-constraints-vs-model-contracts.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# dbt-databricks constraints vs DBT 1.5 model contracts
# dbt-databricks constraints vs DBT 1.5 model contracts

dbt-databricks constraints are enabled for a model by setting `persist_constraints: true` in the model configuration. Model contracts are enabled by setting `enforced: true` under the contract configuration.
dbt-databricks constraints are enabled for a model by setting `persist_constraints: true` in the model configuration. Model contracts are enabled by setting `enforced: true` under the contract configuration.

```
models:
Expand All @@ -10,9 +10,10 @@ models:
enforced: true
```

DBT model contracts enforce column names and datatypes. This means that **all** columns must be explicitly listed and have name and data_type properties.
DBT model contracts enforce column names and datatypes. This means that **all** columns must be explicitly listed and have name and data_type properties.

dbt-databricks constraints list model level constraints under `meta: constraints:` while in DBT `constraints` is a property of the model.

```
dbt-databricks
Expand All @@ -36,8 +37,9 @@ models:
expression: "id > 0"
```

dbt-databricks constraints have a single column level constraint (currently limited to not_null) defined by the `meta: constraint:` property.
dbt-databricks constraints have a single column level constraint (currently limited to not_null) defined by the `meta: constraint:` property.
Model contracts have multiple column constraints listed under a columns `constraints` property.

```
dbt-databricks
Expand All @@ -58,13 +60,13 @@ model contract
```

Model contract constraint structure:
- **type** (required): dbt-databricks constraints do not have this property. DBT has not_null, check, primary_key, foreign_key, and custom types. dbt-databricks constraints currently support the equivalents of not_null and check.
- **expression**: Free text input to qualify the constraint. In dbt-databricks constraints this is the condition property. Note: in model contracts the expression text is contained by double quotes, the condition text in dbt-databricks constraints is not double quoted.

- **type** (required): dbt-databricks constraints do not have this property. DBT has not_null, check, primary_key, foreign_key, and custom types. dbt-databricks constraints currently support the equivalents of not_null and check.
- **expression**: Free text input to qualify the constraint. In dbt-databricks constraints this is the condition property. Note: in model contracts the expression text is contained by double quotes, the condition text in dbt-databricks constraints is not double quoted.
- **name** (optional in model contracts, required for check constraints in dbt-databricks constraints): Human-friendly name for this constraint.
- **columns** (model-level only): List of column names to apply the constraint over. dbt-databricks constraints do not have this property.


In a model contract a check constraint over a single column can be defined at either the model or the column level, but it is recommended that it be defined at the column level. Check constraints over multiple columns must be defined at the model level.
In a model contract a check constraint over a single column can be defined at either the model or the column level, but it is recommended that it be defined at the column level. Check constraints over multiple columns must be defined at the model level.
dbt-databricks check constraints are defined only at the model level.

```
Expand All @@ -75,7 +77,7 @@ models:
meta:
constraints:
- name: id_greater_than_zero
condition: id > 0
condition: id > 0
columns:
- name: name
meta:
Expand All @@ -86,13 +88,36 @@ models:
model contract
models:
- name: my_model
- name: my_model
columns:
- name: name
data_type: integer
constraints:
data_type: integer
constraints:
- type: not_null
- type: check
name: id_greater_than_zero
expression: "id > 0"
```

As of 1.7.0rc1, you can express foreign key constraints with dbt model contracts like so:
models:

```
- name: foreign_table
columns:
- name: pk
constraints:
- type: not_null
- type: check
name: id_greater_than_zero
expression: "id > 0"
```
- type: primary_key
- name: my_model
columns:
- name: name
data_type: integer
- name: fk
constraints:
- type: foreign_key
expression: "(fk) REFERENCES foreign_table (pk)"
```

To work with dbt's constraint format, your expression should match everything you require after the keyword FOREIGN KEY as described in the [add constraint documentation](https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-alter-table-add-constraint.html).
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
databricks-sql-connector>=2.9.3, <3.0.0
dbt-spark==1.7.0b2
dbt-spark==1.7.0rc1
databricks-sdk==0.9.0
keyring>=23.13.0
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def _get_plugin_version():
packages=find_namespace_packages(include=["dbt", "dbt.*"]),
include_package_data=True,
install_requires=[
"dbt-spark==1.7.0b2",
"dbt-spark==1.7.0rc1",
"databricks-sql-connector>=2.9.3, <3.0.0",
"databricks-sdk>=0.9.0",
"keyring>=23.13.0",
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


def pytest_addoption(parser):
parser.addoption("--profile", action="store", default="databricks_cluster", type=str)
parser.addoption("--profile", action="store", default="databricks_uc_sql_endpoint", type=str)


# Using @pytest.mark.skip_profile('databricks_cluster') uses the 'skip_by_adapter_type'
Expand Down
63 changes: 62 additions & 1 deletion tests/functional/adapter/test_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,14 @@
my_model_incremental_wrong_order_sql,
my_model_incremental_wrong_name_sql,
my_incremental_model_sql,
incremental_foreign_key_model_raw_numbers_sql,
incremental_foreign_key_model_stg_numbers_sql,
)
from dbt.tests.util import (
run_dbt,
write_file,
read_file,
)


# constraints are enforced via 'alter' statements that run after table creation
_expected_sql_spark = """
Expand Down Expand Up @@ -208,3 +214,58 @@ def models(self):
"my_model.sql": my_incremental_model_sql,
"constraints_schema.yml": constraints_yml,
}


incremental_foreign_key_schema_yml = """
version: 2
models:
- name: raw_numbers
config:
contract:
enforced: true
materialized: table
columns:
- name: n
data_type: integer
constraints:
- type: primary_key
- type: not_null
- name: stg_numbers
config:
contract:
enforced: true
materialized: incremental
on_schema_change: append_new_columns
unique_key: n
columns:
- name: n
data_type: integer
constraints:
- type: foreign_key
name: fk_n
expression: (n) REFERENCES {schema}.raw_numbers
"""


@pytest.mark.skip_profile("databricks_cluster")
class TestDatabricksIncrementalForeignKeyConstraint:
@pytest.fixture(scope="class")
def models(self):
return {
"schema.yml": incremental_foreign_key_schema_yml,
"raw_numbers.sql": incremental_foreign_key_model_raw_numbers_sql,
"stg_numbers.sql": incremental_foreign_key_model_stg_numbers_sql,
}

def test_incremental_foreign_key_constraint(self, project):
unformatted_constraint_schema_yml = read_file("models", "schema.yml")
write_file(
unformatted_constraint_schema_yml.format(schema=project.test_schema),
"models",
"schema.yml",
)

run_dbt(["run", "--select", "raw_numbers"])
run_dbt(["run", "--select", "stg_numbers"])
run_dbt(["run", "--select", "stg_numbers"])
1 change: 1 addition & 0 deletions tests/functional/adapter/test_dbt_clone.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
from dbt.tests.adapter.dbt_clone.test_dbt_clone import BaseClonePossible
from dbt.tests.adapter.dbt_clone.test_dbt_clone import TestCloneSameTargetAndState # noqa F401


class TestDatabricksClonePossible(BaseClonePossible):
Expand Down
Loading

0 comments on commit 6690579

Please sign in to comment.