From 78aa1a5dca6beb689ede015e94aab2227be4dbc5 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 27 Aug 2024 22:05:02 -0400 Subject: [PATCH 01/10] update snapshot.sql to use snapshot column names --- dbt/include/spark/macros/materializations/snapshot.sql | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index a397f84e5..795dc8b29 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -13,6 +13,8 @@ {% macro spark__snapshot_merge_sql(target, source, insert_cols) -%} + {%- set dbt_valid_to = config.get("dbt_valid_to_column_name") or "dbt_valid_to" -%} + {%- set dbt_scd_id = config.get("dbt_scd_id_column_name") or "dbt_scd_id" -%} merge into {{ target }} as DBT_INTERNAL_DEST {% if target.is_iceberg %} @@ -21,12 +23,12 @@ {% else %} using {{ source }} as DBT_INTERNAL_SOURCE {% endif %} - on DBT_INTERNAL_SOURCE.dbt_scd_id = DBT_INTERNAL_DEST.dbt_scd_id + on DBT_INTERNAL_SOURCE.{{ dbt_scd_id }} = DBT_INTERNAL_DEST.{{ dbt_scd_id }} when matched - and DBT_INTERNAL_DEST.dbt_valid_to is null + and DBT_INTERNAL_DEST.{{ dbt_valid_to }} is null and DBT_INTERNAL_SOURCE.dbt_change_type in ('update', 'delete') then update - set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to + set {{ dbt_valid_to }} = DBT_INTERNAL_SOURCE.{{ dbt_valid_to }} when not matched and DBT_INTERNAL_SOURCE.dbt_change_type = 'insert' From a83dc9517714b0e978b610381c4fe9e15f4eec53 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Thu, 29 Aug 2024 18:30:41 -0400 Subject: [PATCH 02/10] Get snapshot_table_column_names --- dbt/include/spark/macros/materializations/snapshot.sql | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index 795dc8b29..9c8972415 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -83,13 +83,12 @@ {% materialization snapshot, adapter='spark' %} - {%- set config = model['config'] -%} {%- set target_table = model.get('alias', model.get('name')) -%} {%- set strategy_name = config.get('strategy') -%} {%- set unique_key = config.get('unique_key') %} - {%- set file_format = config.get('file_format', 'parquet') -%} + {%- set file_format = config.get('file_format') or 'parquet' -%} {%- set grant_config = config.get('grants') -%} {% set target_relation_exists, target_relation = get_or_create_relation( @@ -128,7 +127,7 @@ {{ run_hooks(pre_hooks, inside_transaction=True) }} {% set strategy_macro = strategy_dispatch(strategy_name) %} - {% set strategy = strategy_macro(model, "snapshotted_data", "source_data", config, target_relation_exists) %} + {% set strategy = strategy_macro(model, "snapshotted_data", "source_data", model['config'], target_relation_exists) %} {% if not target_relation_exists %} @@ -137,7 +136,9 @@ {% else %} - {{ adapter.valid_snapshot_target(target_relation) }} + {% set snapshot_table_column_names = config.get("snapshot_table_column_names") %} + + {{ adapter.valid_snapshot_target(target_relation, snapshot_table_column_names) }} {% set staging_table = spark_build_snapshot_staging_table(strategy, sql, target_relation) %} From 9a374be081158c566691bac115a36f3a5badab57 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 3 Sep 2024 16:10:08 -0400 Subject: [PATCH 03/10] Update branches in dev-requirements.txt and changie --- .changes/unreleased/Features-20240903-161003.yaml | 6 ++++++ dev-requirements.txt | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 .changes/unreleased/Features-20240903-161003.yaml diff --git a/.changes/unreleased/Features-20240903-161003.yaml b/.changes/unreleased/Features-20240903-161003.yaml new file mode 100644 index 000000000..57a0f14c0 --- /dev/null +++ b/.changes/unreleased/Features-20240903-161003.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Allow configuring snapshot column names +time: 2024-09-03T16:10:03.021221-04:00 +custom: + Author: gshank + Issue: "1096" diff --git a/dev-requirements.txt b/dev-requirements.txt index 055cb92f7..a1dcf6906 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,8 +1,8 @@ # install latest changes in dbt-core # TODO: how to automate switching from develop to version branches? -git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core +git+https://github.com/dbt-labs/dbt-core.git@snapshot_column_names#egg=dbt-core&subdirectory=core git+https://github.com/dbt-labs/dbt-common.git -git+https://github.com/dbt-labs/dbt-adapters.git +git+https://github.com/dbt-labs/dbt-adapters.git@snapshot_column_names git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter # dev From 5218ad0d06e90b4e01c68c5938efe76561b2fa5e Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Fri, 13 Sep 2024 14:38:03 -0400 Subject: [PATCH 04/10] Update for new columns names format --- dbt/include/spark/macros/materializations/snapshot.sql | 9 ++++----- dev-requirements.txt | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index 9c8972415..f8a6349ec 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -13,8 +13,7 @@ {% macro spark__snapshot_merge_sql(target, source, insert_cols) -%} - {%- set dbt_valid_to = config.get("dbt_valid_to_column_name") or "dbt_valid_to" -%} - {%- set dbt_scd_id = config.get("dbt_scd_id_column_name") or "dbt_scd_id" -%} + {%- set stcn = config.get("snapshot_table_column_names") or get_snapshot_table_column_names() -%} merge into {{ target }} as DBT_INTERNAL_DEST {% if target.is_iceberg %} @@ -23,12 +22,12 @@ {% else %} using {{ source }} as DBT_INTERNAL_SOURCE {% endif %} - on DBT_INTERNAL_SOURCE.{{ dbt_scd_id }} = DBT_INTERNAL_DEST.{{ dbt_scd_id }} + on DBT_INTERNAL_SOURCE.{{ stcn.dbt_scd_id }} = DBT_INTERNAL_DEST.{{ stcn.dbt_scd_id }} when matched - and DBT_INTERNAL_DEST.{{ dbt_valid_to }} is null + and DBT_INTERNAL_DEST.{{ stcn.dbt_valid_to }} is null and DBT_INTERNAL_SOURCE.dbt_change_type in ('update', 'delete') then update - set {{ dbt_valid_to }} = DBT_INTERNAL_SOURCE.{{ dbt_valid_to }} + set {{ stcn.dbt_valid_to }} = DBT_INTERNAL_SOURCE.{{ dbt_valid_to }} when not matched and DBT_INTERNAL_SOURCE.dbt_change_type = 'insert' diff --git a/dev-requirements.txt b/dev-requirements.txt index a1dcf6906..505baab91 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,7 +1,7 @@ # install latest changes in dbt-core # TODO: how to automate switching from develop to version branches? git+https://github.com/dbt-labs/dbt-core.git@snapshot_column_names#egg=dbt-core&subdirectory=core -git+https://github.com/dbt-labs/dbt-common.git +git+https://github.com/dbt-labs/dbt-common.git@object_mergebehavior git+https://github.com/dbt-labs/dbt-adapters.git@snapshot_column_names git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter From 402265a1651ad4065ea8e782d4c8182635d0f117 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Fri, 13 Sep 2024 17:33:49 -0400 Subject: [PATCH 05/10] remove branches from dev-requirements.txt --- dev-requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 505baab91..055cb92f7 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,8 +1,8 @@ # install latest changes in dbt-core # TODO: how to automate switching from develop to version branches? -git+https://github.com/dbt-labs/dbt-core.git@snapshot_column_names#egg=dbt-core&subdirectory=core -git+https://github.com/dbt-labs/dbt-common.git@object_mergebehavior -git+https://github.com/dbt-labs/dbt-adapters.git@snapshot_column_names +git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core +git+https://github.com/dbt-labs/dbt-common.git +git+https://github.com/dbt-labs/dbt-adapters.git git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter # dev From cc960f3efbaf3b26126b52907938d948c6266466 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 16 Sep 2024 16:54:36 -0400 Subject: [PATCH 06/10] Use get_snapshot_table_column_names macro --- dbt/include/spark/macros/materializations/snapshot.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index f8a6349ec..cd4f37d2e 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -135,9 +135,9 @@ {% else %} - {% set snapshot_table_column_names = config.get("snapshot_table_column_names") %} + {% set stcn = config.get("snapshot_table_column_names") or get_snapshot_table_column_names() %} - {{ adapter.valid_snapshot_target(target_relation, snapshot_table_column_names) }} + {{ adapter.valid_snapshot_target(target_relation, stcn) }} {% set staging_table = spark_build_snapshot_staging_table(strategy, sql, target_relation) %} From 7cda48751ce1c979c740d37c316d3ff6a724aba0 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 16 Sep 2024 18:51:28 -0400 Subject: [PATCH 07/10] typo --- dbt/include/spark/macros/materializations/snapshot.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index cd4f37d2e..25d172403 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -27,7 +27,7 @@ and DBT_INTERNAL_DEST.{{ stcn.dbt_valid_to }} is null and DBT_INTERNAL_SOURCE.dbt_change_type in ('update', 'delete') then update - set {{ stcn.dbt_valid_to }} = DBT_INTERNAL_SOURCE.{{ dbt_valid_to }} + set {{ stcn.dbt_valid_to }} = DBT_INTERNAL_SOURCE.{{ stcn.dbt_valid_to }} when not matched and DBT_INTERNAL_SOURCE.dbt_change_type = 'insert' From 679752957193a674bf6c2f880edc2bbfbbdd9461 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 16 Sep 2024 20:21:53 -0400 Subject: [PATCH 08/10] Use snapshot_column_names branch for dbt-adapters --- dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 055cb92f7..aaf7e1079 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,7 +2,7 @@ # TODO: how to automate switching from develop to version branches? git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core git+https://github.com/dbt-labs/dbt-common.git -git+https://github.com/dbt-labs/dbt-adapters.git +git+https://github.com/dbt-labs/dbt-adapters.git@snapshot_column_names git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter # dev From ba7c611d3ce38991a33d95148273e4b20a5cb14f Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Wed, 18 Sep 2024 13:44:08 -0400 Subject: [PATCH 09/10] Change stcn to "columns" --- .../spark/macros/materializations/snapshot.sql | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index 25d172403..43c4750f6 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -13,7 +13,7 @@ {% macro spark__snapshot_merge_sql(target, source, insert_cols) -%} - {%- set stcn = config.get("snapshot_table_column_names") or get_snapshot_table_column_names() -%} + {%- set columns = config.get("snapshot_table_column_names") or get_snapshot_table_column_names() -%} merge into {{ target }} as DBT_INTERNAL_DEST {% if target.is_iceberg %} @@ -22,12 +22,12 @@ {% else %} using {{ source }} as DBT_INTERNAL_SOURCE {% endif %} - on DBT_INTERNAL_SOURCE.{{ stcn.dbt_scd_id }} = DBT_INTERNAL_DEST.{{ stcn.dbt_scd_id }} + on DBT_INTERNAL_SOURCE.{{ columns.dbt_scd_id }} = DBT_INTERNAL_DEST.{{ columns.dbt_scd_id }} when matched - and DBT_INTERNAL_DEST.{{ stcn.dbt_valid_to }} is null + and DBT_INTERNAL_DEST.{{ columns.dbt_valid_to }} is null and DBT_INTERNAL_SOURCE.dbt_change_type in ('update', 'delete') then update - set {{ stcn.dbt_valid_to }} = DBT_INTERNAL_SOURCE.{{ stcn.dbt_valid_to }} + set {{ columns.dbt_valid_to }} = DBT_INTERNAL_SOURCE.{{ columns.dbt_valid_to }} when not matched and DBT_INTERNAL_SOURCE.dbt_change_type = 'insert' @@ -135,9 +135,9 @@ {% else %} - {% set stcn = config.get("snapshot_table_column_names") or get_snapshot_table_column_names() %} + {% set columns = config.get("snapshot_table_column_names") or get_snapshot_table_column_names() %} - {{ adapter.valid_snapshot_target(target_relation, stcn) }} + {{ adapter.valid_snapshot_target(target_relation, columns) }} {% set staging_table = spark_build_snapshot_staging_table(strategy, sql, target_relation) %} From 3837a1a15e867a16894238ba382ff573ff10e773 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Thu, 19 Sep 2024 09:21:50 -0400 Subject: [PATCH 10/10] Update lower bound pin of dbt-adapters to 1.7.0 --- dev-requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index aaf7e1079..055cb92f7 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,7 +2,7 @@ # TODO: how to automate switching from develop to version branches? git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core git+https://github.com/dbt-labs/dbt-common.git -git+https://github.com/dbt-labs/dbt-adapters.git@snapshot_column_names +git+https://github.com/dbt-labs/dbt-adapters.git git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter # dev diff --git a/setup.py b/setup.py index 9e1fa31e3..2de68a6ff 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,7 @@ def _get_plugin_version_dict(): install_requires=[ "sqlparams>=3.0.0", "dbt-common>=1.0.4,<2.0", - "dbt-adapters>=1.1.1,<2.0", + "dbt-adapters>=1.7.0,<2.0", # add dbt-core to ensure backwards compatibility of installation, this is not a functional dependency "dbt-core>=1.8.0", ],