Persist docs on incremental only if changed (#513)

databricks · Nov 27, 2023 · 675405a · 675405a
2 parents bbdb10a + af4b434
commit 675405a
Show file tree

Hide file tree

Showing 8 changed files with 215 additions and 54 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,8 +1,9 @@
-## dbt-databricks 1.7.x (TBD)
+## dbt-databricks 1.7.2 (TBD)
 
 ### Features
 
 - Adding capability to specify compute on a per model basis ([488](https://github.com/databricks/dbt-databricks/pull/488))
+- Selectively persist column docs that have changed between runs of incremental ([513](https://github.com/databricks/dbt-databricks/pull/513))
 
 ## dbt-databricks 1.7.1 (Nov 13, 2023)
 

diff --git a/dbt/adapters/databricks/column.py b/dbt/adapters/databricks/column.py
@@ -1,11 +1,13 @@
 from dataclasses import dataclass
-from typing import ClassVar, Dict
+from typing import ClassVar, Dict, Optional
 
 from dbt.adapters.spark.column import SparkColumn
 
 
 @dataclass
 class DatabricksColumn(SparkColumn):
+    comment: Optional[str] = None
+
     TYPE_LABELS: ClassVar[Dict[str, str]] = {
         "LONG": "BIGINT",
     }

diff --git a/dbt/adapters/databricks/impl.py b/dbt/adapters/databricks/impl.py
@@ -391,6 +391,7 @@ def parse_describe_extended(  # type: ignore[override]
                 column=column["col_name"],
                 column_index=idx,
                 dtype=column["data_type"],
+                comment=column["comment"],
             )
             for idx, column in enumerate(rows)
         ]
@@ -657,3 +658,24 @@ def _catalog(self, catalog: Optional[str]) -> Iterator[None]:
         finally:
             if current_catalog is not None:
                 self.execute_macro(USE_CATALOG_MACRO_NAME, kwargs=dict(catalog=current_catalog))
+
+    @available.parse(lambda *a, **k: {})
+    def get_persist_doc_columns(
+        self, existing_columns: List[DatabricksColumn], columns: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Returns a dictionary of columns that have updated comments."""
+        return_columns = {}
+
+        # Since existing_columns are gathered after writing the table, we don't need to include any
+        # columns from the model that are not in the existing_columns. If we did, it would lead to
+        # an error when we tried to alter the table.
+        for column in existing_columns:
+            name = column.column
+            if (
+                name in columns
+                and "description" in columns[name]
+                and columns[name]["description"] != (column.comment or "")
+            ):
+                return_columns[name] = columns[name]
+
+        return return_columns
diff --git a/dbt/include/databricks/macros/adapters.sql b/dbt/include/databricks/macros/adapters.sql
@@ -505,3 +505,11 @@
   ) -%}
   {% do return([false, new_relation]) %}
 {% endmacro %}
+
+{% macro databricks__persist_docs(relation, model, for_relation, for_columns) -%}
+  {% if for_columns and config.persist_column_docs() and model.columns %}
+    {%- set existing_columns = adapter.get_columns_in_relation(relation) -%}
+    {%- set columns_to_persist_docs = adapter.get_persist_doc_columns(existing_columns, model.columns) -%}
+    {% do alter_column_comment(relation, columns_to_persist_docs) %}
+  {% endif %}
+{% endmacro %}
diff --git a/dbt/include/databricks/macros/materializations/incremental/incremental.sql b/dbt/include/databricks/macros/materializations/incremental/incremental.sql
@@ -84,8 +84,6 @@
   {% do apply_grants(target_relation, grant_config, should_revoke) %}
 
   {% do persist_docs(target_relation, model) %}
-
-
   {% do optimize(target_relation) %}
 
   {{ run_hooks(post_hooks) }}

diff --git a/tests/functional/adapter/incremental/fixtures.py b/tests/functional/adapter/incremental/fixtures.py
@@ -0,0 +1,46 @@
+merge_update_columns_sql = """
+{{ config(
+    materialized = 'incremental',
+    unique_key = 'id',
+    merge_update_columns = ['msg'],
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg, 'red' as color
+
+{% else %}
+
+-- msg will be updated, color will be ignored
+select cast(2 as bigint) as id, 'yo' as msg, 'green' as color
+union all
+select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color
+
+{% endif %}
+"""
+
+no_comment_schema = """
+version: 2
+
+models:
+  - name: merge_update_columns_sql
+    columns:
+        - name: id
+        - name: msg
+        - name: color
+"""
+
+comment_schema = """
+version: 2
+
+models:
+  - name: merge_update_columns_sql
+    columns:
+        - name: id
+          description: This is the id column
+        - name: msg
+          description: This is the msg column
+        - name: color
+"""
diff --git a/tests/functional/adapter/incremental/test_incremental_persist_docs.py b/tests/functional/adapter/incremental/test_incremental_persist_docs.py
@@ -0,0 +1,34 @@
+from dbt.tests import util
+from tests.functional.adapter.incremental import fixtures
+
+import pytest
+
+
+class TestIncrementalPersistDocs:
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "merge_update_columns_sql.sql": fixtures.merge_update_columns_sql,
+            "schema.yml": fixtures.no_comment_schema,
+        }
+
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "test": {
+                    "+persist_docs": {
+                        "relation": True,
+                        "columns": True,
+                    },
+                }
+            }
+        }
+
+    def test_adding_comments(self, project):
+        util.run_dbt(["run"])
+        util.write_file(fixtures.comment_schema, "models", "schema.yml")
+        _, out = util.run_dbt_and_capture(["--debug", "run"])
+        assert "comment 'This is the id column'" in out
+        assert "comment 'This is the msg column'" in out
+        assert "comment ''" not in out