Skip to content

Commit

Permalink
Persist docs on incremental only if changed (#513)
Browse files Browse the repository at this point in the history
  • Loading branch information
benc-db authored Nov 27, 2023
2 parents bbdb10a + af4b434 commit 675405a
Show file tree
Hide file tree
Showing 8 changed files with 215 additions and 54 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
## dbt-databricks 1.7.x (TBD)
## dbt-databricks 1.7.2 (TBD)

### Features

- Adding capability to specify compute on a per model basis ([488](https://github.com/databricks/dbt-databricks/pull/488))
- Selectively persist column docs that have changed between runs of incremental ([513](https://github.com/databricks/dbt-databricks/pull/513))

## dbt-databricks 1.7.1 (Nov 13, 2023)

Expand Down
4 changes: 3 additions & 1 deletion dbt/adapters/databricks/column.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from dataclasses import dataclass
from typing import ClassVar, Dict
from typing import ClassVar, Dict, Optional

from dbt.adapters.spark.column import SparkColumn


@dataclass
class DatabricksColumn(SparkColumn):
comment: Optional[str] = None

TYPE_LABELS: ClassVar[Dict[str, str]] = {
"LONG": "BIGINT",
}
Expand Down
22 changes: 22 additions & 0 deletions dbt/adapters/databricks/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,7 @@ def parse_describe_extended( # type: ignore[override]
column=column["col_name"],
column_index=idx,
dtype=column["data_type"],
comment=column["comment"],
)
for idx, column in enumerate(rows)
]
Expand Down Expand Up @@ -657,3 +658,24 @@ def _catalog(self, catalog: Optional[str]) -> Iterator[None]:
finally:
if current_catalog is not None:
self.execute_macro(USE_CATALOG_MACRO_NAME, kwargs=dict(catalog=current_catalog))

@available.parse(lambda *a, **k: {})
def get_persist_doc_columns(
self, existing_columns: List[DatabricksColumn], columns: Dict[str, Any]
) -> Dict[str, Any]:
"""Returns a dictionary of columns that have updated comments."""
return_columns = {}

# Since existing_columns are gathered after writing the table, we don't need to include any
# columns from the model that are not in the existing_columns. If we did, it would lead to
# an error when we tried to alter the table.
for column in existing_columns:
name = column.column
if (
name in columns
and "description" in columns[name]
and columns[name]["description"] != (column.comment or "")
):
return_columns[name] = columns[name]

return return_columns
8 changes: 8 additions & 0 deletions dbt/include/databricks/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -505,3 +505,11 @@
) -%}
{% do return([false, new_relation]) %}
{% endmacro %}

{% macro databricks__persist_docs(relation, model, for_relation, for_columns) -%}
{% if for_columns and config.persist_column_docs() and model.columns %}
{%- set existing_columns = adapter.get_columns_in_relation(relation) -%}
{%- set columns_to_persist_docs = adapter.get_persist_doc_columns(existing_columns, model.columns) -%}
{% do alter_column_comment(relation, columns_to_persist_docs) %}
{% endif %}
{% endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@
{% do apply_grants(target_relation, grant_config, should_revoke) %}

{% do persist_docs(target_relation, model) %}


{% do optimize(target_relation) %}

{{ run_hooks(post_hooks) }}
Expand Down
46 changes: 46 additions & 0 deletions tests/functional/adapter/incremental/fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
merge_update_columns_sql = """
{{ config(
materialized = 'incremental',
unique_key = 'id',
merge_update_columns = ['msg'],
) }}
{% if not is_incremental() %}
select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color
union all
select cast(2 as bigint) as id, 'goodbye' as msg, 'red' as color
{% else %}
-- msg will be updated, color will be ignored
select cast(2 as bigint) as id, 'yo' as msg, 'green' as color
union all
select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color
{% endif %}
"""

no_comment_schema = """
version: 2
models:
- name: merge_update_columns_sql
columns:
- name: id
- name: msg
- name: color
"""

comment_schema = """
version: 2
models:
- name: merge_update_columns_sql
columns:
- name: id
description: This is the id column
- name: msg
description: This is the msg column
- name: color
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from dbt.tests import util
from tests.functional.adapter.incremental import fixtures

import pytest


class TestIncrementalPersistDocs:
@pytest.fixture(scope="class")
def models(self):
return {
"merge_update_columns_sql.sql": fixtures.merge_update_columns_sql,
"schema.yml": fixtures.no_comment_schema,
}

@pytest.fixture(scope="class")
def project_config_update(self):
return {
"models": {
"test": {
"+persist_docs": {
"relation": True,
"columns": True,
},
}
}
}

def test_adding_comments(self, project):
util.run_dbt(["run"])
util.write_file(fixtures.comment_schema, "models", "schema.yml")
_, out = util.run_dbt_and_capture(["--debug", "run"])
assert "comment 'This is the id column'" in out
assert "comment 'This is the msg column'" in out
assert "comment ''" not in out
Loading

0 comments on commit 675405a

Please sign in to comment.