Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

View Column persist_docs + functional tests #519

Merged
merged 6 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- Adding capability to specify compute on a per model basis ([488](https://github.com/databricks/dbt-databricks/pull/488))
- Selectively persist column docs that have changed between runs of incremental ([513](https://github.com/databricks/dbt-databricks/pull/513))
- Enabling access control list for job runs (thanks @srggrs!)([518](https://github.com/databricks/dbt-databricks/pull/518))
- Allow persisting of column comments on views and retrieving comments for docs on Hive ([519](https://github.com/databricks/dbt-databricks/pull/519))

## dbt-databricks 1.7.1 (Nov 13, 2023)

Expand Down
1 change: 1 addition & 0 deletions dbt/adapters/databricks/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

@dataclass
class DatabricksColumn(SparkColumn):
table_comment: Optional[str] = None
comment: Optional[str] = None

TYPE_LABELS: ClassVar[Dict[str, str]] = {
Expand Down
15 changes: 15 additions & 0 deletions dbt/adapters/databricks/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
SHOW_TABLE_EXTENDED_MACRO_NAME = "show_table_extended"
SHOW_TABLES_MACRO_NAME = "show_tables"
SHOW_VIEWS_MACRO_NAME = "show_views"
GET_COLUMNS_COMMENTS_MACRO_NAME = "get_columns_comments"


@dataclass
Expand Down Expand Up @@ -104,6 +105,8 @@ def get_identifier_list_string(table_names: Set[str]) -> str:

@undefined_proof
class DatabricksAdapter(SparkAdapter):
INFORMATION_COMMENT_REGEX = re.compile(r"Comment: (.*)\n[A-Z][A-Za-z ]+:", re.DOTALL)

Relation = DatabricksRelation
Column = DatabricksColumn

Expand Down Expand Up @@ -388,6 +391,7 @@ def parse_describe_extended( # type: ignore[override]
table_type=relation.type,
table_owner=str(metadata.get(KEY_TABLE_OWNER)),
table_stats=table_stats,
table_comment=metadata.get("Comment"),
column=column["col_name"],
column_index=idx,
dtype=column["data_type"],
Expand Down Expand Up @@ -444,23 +448,32 @@ def _set_relation_information(self, relation: DatabricksRelation) -> DatabricksR

return self._get_updated_relation(relation)[0]

def _get_column_comments(self, relation: DatabricksRelation) -> Dict[str, str]:
"""Get the column comments for the relation."""
columns = self.execute_macro(GET_COLUMNS_COMMENTS_MACRO_NAME, kwargs={"relation": relation})
return {row[0]: row[2] for row in columns}

def parse_columns_from_information( # type: ignore[override]
self, relation: DatabricksRelation, information: str
) -> List[DatabricksColumn]:
owner_match = re.findall(self.INFORMATION_OWNER_REGEX, information)
owner = owner_match[0] if owner_match else None
comment_match = re.findall(self.INFORMATION_COMMENT_REGEX, information)
table_comment = comment_match[0] if comment_match else None
matches = re.finditer(self.INFORMATION_COLUMNS_REGEX, information)
columns = []
stats_match = re.findall(self.INFORMATION_STATISTICS_REGEX, information)
raw_table_stats = stats_match[0] if stats_match else None
table_stats = DatabricksColumn.convert_table_stats(raw_table_stats)

for match_num, match in enumerate(matches):
column_name, column_type, nullable = match.groups()
column = DatabricksColumn(
table_database=relation.database,
table_schema=relation.schema,
table_name=relation.table,
table_type=relation.type,
table_comment=table_comment,
column_index=match_num,
table_owner=owner,
column=column_name,
Expand Down Expand Up @@ -583,11 +596,13 @@ def _get_columns_for_catalog( # type: ignore[override]
) -> Iterable[Dict[str, Any]]:
columns = self.parse_columns_from_information(relation, information)

comments = self._get_column_comments(relation)
for column in columns:
# convert DatabricksRelation into catalog dicts
as_dict = column.to_column_dict()
as_dict["column_name"] = as_dict.pop("column", None)
as_dict["column_type"] = as_dict.pop("dtype")
as_dict["column_comment"] = comments[as_dict["column_name"]]
yield as_dict

def add_query(
Expand Down
32 changes: 32 additions & 0 deletions dbt/include/databricks/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,32 @@
{%- endif -%}
{%- endmacro -%}

{% macro get_column_comment_sql(column_name, column_dict) -%}
{% if column_name in column_dict and column_dict[column_name]["description"] -%}
{% set escaped_description = column_dict[column_name]["description"] | replace("'", "\\'") %}
{% set column_comment_clause = "comment '" ~ escaped_description ~ "'" %}
{%- endif -%}
{{ adapter.quote(column_name) }} {{ column_comment_clause }}
{% endmacro %}

{% macro get_persist_docs_column_list(model_columns, query_columns) %}
{% for column_name in query_columns %}
{{ get_column_comment_sql(column_name, model_columns) }}
{{- ", " if not loop.last else "" }}
{% endfor %}
{% endmacro %}

{% macro databricks__create_view_as(relation, sql) -%}
create or replace view {{ relation }}
{% if config.persist_column_docs() -%}
{% set model_columns = model.columns %}
{% set query_columns = get_columns_in_query(sql) %}
{% if query_columns %}
(
{{ get_persist_docs_column_list(model_columns, query_columns) }}
)
{% endif %}
{% endif %}
{{ comment_clause() }}
{%- set contract_config = config.get('contract') -%}
{% if contract_config and contract_config.enforced %}
Expand Down Expand Up @@ -512,4 +536,12 @@
{%- set columns_to_persist_docs = adapter.get_persist_doc_columns(existing_columns, model.columns) -%}
{% do alter_column_comment(relation, columns_to_persist_docs) %}
{% endif %}
{% endmacro %}

{% macro get_columns_comments(relation) -%}
{% call statement('get_columns_comments', fetch_result=True) -%}
describe table {{ relation }}
{% endcall %}

{% do return(load_result('get_columns_comments').table) %}
{% endmacro %}
67 changes: 67 additions & 0 deletions tests/functional/adapter/persist_docs/test_persist_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import json
from dbt.tests.adapter.persist_docs.test_persist_docs import (
BasePersistDocsBase,
BasePersistDocsColumnMissing,
BasePersistDocsCommentOnQuotedColumn,
)
from dbt.tests import util

import pytest


class TestPersistDocs(BasePersistDocsBase):
def _assert_has_view_comments(
self, view_node, has_node_comments=True, has_column_comments=True
):
view_comment = view_node["metadata"]["comment"]
if has_node_comments:
assert view_comment.startswith("View model description")
self._assert_common_comments(view_comment)
else:
assert view_comment == "" or view_comment is None

view_id_comment = view_node["columns"]["id"]["comment"]
if has_column_comments:
assert view_id_comment.startswith("id Column description")
self._assert_common_comments(view_id_comment)
else:
assert view_id_comment is None

view_name_comment = view_node["columns"]["name"]["comment"]
assert view_name_comment is None

@pytest.fixture(scope="class")
def project_config_update(self):
return {
"models": {
"test": {
"+persist_docs": {
"relation": True,
"columns": True,
},
}
}
}

def test_has_comments(self, project):
util.run_dbt(["docs", "generate"])
with open("target/catalog.json") as fp:
catalog_data = json.load(fp)
assert "nodes" in catalog_data
assert len(catalog_data["nodes"]) == 4
table_node = catalog_data["nodes"]["model.test.table_model"]
self._assert_has_table_comments(table_node)

view_node = catalog_data["nodes"]["model.test.view_model"]
self._assert_has_view_comments(view_node)

no_docs_node = catalog_data["nodes"]["model.test.no_docs_model"]
self._assert_has_view_comments(no_docs_node, False, False)


class TestPersistDocsColumnMissing(BasePersistDocsColumnMissing):
pass


class TestPersistDocsCommentOnQuotedColumn(BasePersistDocsCommentOnQuotedColumn):
pass

This file was deleted.

10 changes: 0 additions & 10 deletions tests/integration/persist_docs/models/my_fun_docs.md

This file was deleted.

1 change: 0 additions & 1 deletion tests/integration/persist_docs/models/no_docs_model.sql

This file was deleted.

119 changes: 0 additions & 119 deletions tests/integration/persist_docs/models/schema.yml

This file was deleted.

2 changes: 0 additions & 2 deletions tests/integration/persist_docs/models/table_delta_model.sql

This file was deleted.

2 changes: 0 additions & 2 deletions tests/integration/persist_docs/models/view_model.sql

This file was deleted.

3 changes: 0 additions & 3 deletions tests/integration/persist_docs/seeds/seed.csv

This file was deleted.

26 changes: 0 additions & 26 deletions tests/integration/persist_docs/seeds/seeds.yml

This file was deleted.

Loading
Loading