From e5d71be1563c19a3ccd828d522bf1463e3ab987c Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Wed, 24 Apr 2024 12:54:16 -0600 Subject: [PATCH] Handle case when an incremental table is empty (#5326) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Preview](https://docs-getdbt-com-git-dbeatty10-patch-4-dbt-labs.vercel.app//docs/build/incremental-models#filtering-rows-on-an-incremental-run) ## What are you changing in this pull request and why? resolves #5321 To ensure that the updated code will work for a broad number of users without issues, I tested the following example against these data platforms: - bigquery - databricks - duckdb - postgres - redshift - snowflake image ☝️ Notice the table is empty, like the edge case scenario described in https://github.com/dbt-labs/dbt-core/issues/9997 image ☝️ Notice it successfully added new data when it arrived.
### Reprex Create this file: `models/my_incremental.sql` ```sql {{ config(materialized="incremental") }} with non_empty_cte as ( select 1 as id, cast('2024-01-01' as date) as event_time ), empty_cte as ( select 0 as id, cast('1999-12-31' as date) as event_time from non_empty_cte where 0=1 ) select * {% if var("scenario", "empty") == "empty" %} from empty_cte {% else %} from non_empty_cte {% endif %} {% if is_incremental() %} -- this filter will only be applied on an incremental run -- (uses >= to include records whose timestamp occurred since the last run of this model) where event_time >= (select coalesce(max(event_time), cast('1900-01-01' as date)) from {{ this }}) {% endif %} ``` Assuming a `profiles.yml` with all the relevant profile names, run these commands: ```shell dbt run --profile duckdb -s my_incremental --vars '{scenario: empty}' --full-refresh dbt show --profile duckdb --inline "select * from {{ ref('my_incremental') }}" dbt run --profile duckdb -s my_incremental --vars '{scenario: empty}' dbt show --profile duckdb --inline "select * from {{ ref('my_incremental') }}" dbt run --profile duckdb -s my_incremental --vars '{scenario: non_empty}' dbt show --profile duckdb --inline "select * from {{ ref('my_incremental') }}" dbt run --profile postgres -s my_incremental --vars '{scenario: empty}' --full-refresh dbt show --profile postgres --inline "select * from {{ ref('my_incremental') }}" dbt run --profile postgres -s my_incremental --vars '{scenario: empty}' dbt show --profile postgres --inline "select * from {{ ref('my_incremental') }}" dbt run --profile postgres -s my_incremental --vars '{scenario: non_empty}' dbt show --profile postgres --inline "select * from {{ ref('my_incremental') }}" dbt run --profile redshift -s my_incremental --vars '{scenario: empty}' --full-refresh dbt show --profile redshift --inline "select * from {{ ref('my_incremental') }}" dbt run --profile redshift -s my_incremental --vars '{scenario: empty}' dbt show --profile redshift --inline "select * from {{ ref('my_incremental') }}" dbt run --profile redshift -s my_incremental --vars '{scenario: non_empty}' dbt show --profile redshift --inline "select * from {{ ref('my_incremental') }}" dbt run --profile databricks -s my_incremental --vars '{scenario: empty}' --full-refresh dbt show --profile databricks --inline "select * from {{ ref('my_incremental') }}" dbt run --profile databricks -s my_incremental --vars '{scenario: empty}' dbt show --profile databricks --inline "select * from {{ ref('my_incremental') }}" dbt run --profile databricks -s my_incremental --vars '{scenario: non_empty}' dbt show --profile databricks --inline "select * from {{ ref('my_incremental') }}" dbt run --profile snowflake -s my_incremental --vars '{scenario: empty}' --full-refresh dbt show --profile snowflake --inline "select * from {{ ref('my_incremental') }}" dbt run --profile snowflake -s my_incremental --vars '{scenario: empty}' dbt show --profile snowflake --inline "select * from {{ ref('my_incremental') }}" dbt run --profile snowflake -s my_incremental --vars '{scenario: non_empty}' dbt show --profile snowflake --inline "select * from {{ ref('my_incremental') }}" dbt run --profile bigquery -s my_incremental --vars '{scenario: empty}' --full-refresh dbt show --profile bigquery --inline "select * from {{ ref('my_incremental') }}" dbt run --profile bigquery -s my_incremental --vars '{scenario: empty}' dbt show --profile bigquery --inline "select * from {{ ref('my_incremental') }}" dbt run --profile bigquery -s my_incremental --vars '{scenario: non_empty}' dbt show --profile bigquery --inline "select * from {{ ref('my_incremental') }}" ```
## Checklist - [x] Review the [Content style guide](https://github.com/dbt-labs/docs.getdbt.com/blob/current/contributing/content-style-guide.md) so my content adheres to these guidelines. --- website/docs/docs/build/incremental-models.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/docs/docs/build/incremental-models.md b/website/docs/docs/build/incremental-models.md index 7d73adb621b..5a0900b4cdd 100644 --- a/website/docs/docs/build/incremental-models.md +++ b/website/docs/docs/build/incremental-models.md @@ -70,7 +70,7 @@ from {{ ref('app_data_events') }} -- this filter will only be applied on an incremental run -- (uses >= to include records whose timestamp occurred since the last run of this model) - where event_time >= (select max(event_time) from {{ this }}) + where event_time >= (select coalesce(max(event_time), '1900-01-01') from {{ this }}) {% endif %} ``` @@ -141,7 +141,7 @@ from {{ ref('app_data_events') }} -- this filter will only be applied on an incremental run -- (uses >= to include records arriving later on the same day as the last run of this model) - where date_day >= (select max(date_day) from {{ this }}) + where date_day >= (select coalesce(max(event_time), '1900-01-01') from {{ this }}) {% endif %}