From e5d71be1563c19a3ccd828d522bf1463e3ab987c Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Wed, 24 Apr 2024 12:54:16 -0600
Subject: [PATCH] Handle case when an incremental table is empty (#5326)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
[Preview](https://docs-getdbt-com-git-dbeatty10-patch-4-dbt-labs.vercel.app//docs/build/incremental-models#filtering-rows-on-an-incremental-run)
## What are you changing in this pull request and why?
resolves #5321
To ensure that the updated code will work for a broad number of users
without issues, I tested the following example against these data
platforms:
- bigquery
- databricks
- duckdb
- postgres
- redshift
- snowflake
☝️ Notice the table is empty, like the edge case scenario described in
https://github.com/dbt-labs/dbt-core/issues/9997
☝️ Notice it successfully added new data when it arrived.
### Reprex
Create this file:
`models/my_incremental.sql`
```sql
{{ config(materialized="incremental") }}
with
non_empty_cte as (
select 1 as id, cast('2024-01-01' as date) as event_time
),
empty_cte as (
select 0 as id, cast('1999-12-31' as date) as event_time
from non_empty_cte
where 0=1
)
select *
{% if var("scenario", "empty") == "empty" %}
from empty_cte
{% else %}
from non_empty_cte
{% endif %}
{% if is_incremental() %}
-- this filter will only be applied on an incremental run
-- (uses >= to include records whose timestamp occurred since the last run of this model)
where event_time >= (select coalesce(max(event_time), cast('1900-01-01' as date)) from {{ this }})
{% endif %}
```
Assuming a `profiles.yml` with all the relevant profile names, run these
commands:
```shell
dbt run --profile duckdb -s my_incremental --vars '{scenario: empty}' --full-refresh
dbt show --profile duckdb --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile duckdb -s my_incremental --vars '{scenario: empty}'
dbt show --profile duckdb --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile duckdb -s my_incremental --vars '{scenario: non_empty}'
dbt show --profile duckdb --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile postgres -s my_incremental --vars '{scenario: empty}' --full-refresh
dbt show --profile postgres --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile postgres -s my_incremental --vars '{scenario: empty}'
dbt show --profile postgres --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile postgres -s my_incremental --vars '{scenario: non_empty}'
dbt show --profile postgres --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile redshift -s my_incremental --vars '{scenario: empty}' --full-refresh
dbt show --profile redshift --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile redshift -s my_incremental --vars '{scenario: empty}'
dbt show --profile redshift --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile redshift -s my_incremental --vars '{scenario: non_empty}'
dbt show --profile redshift --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile databricks -s my_incremental --vars '{scenario: empty}' --full-refresh
dbt show --profile databricks --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile databricks -s my_incremental --vars '{scenario: empty}'
dbt show --profile databricks --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile databricks -s my_incremental --vars '{scenario: non_empty}'
dbt show --profile databricks --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile snowflake -s my_incremental --vars '{scenario: empty}' --full-refresh
dbt show --profile snowflake --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile snowflake -s my_incremental --vars '{scenario: empty}'
dbt show --profile snowflake --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile snowflake -s my_incremental --vars '{scenario: non_empty}'
dbt show --profile snowflake --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile bigquery -s my_incremental --vars '{scenario: empty}' --full-refresh
dbt show --profile bigquery --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile bigquery -s my_incremental --vars '{scenario: empty}'
dbt show --profile bigquery --inline "select * from {{ ref('my_incremental') }}"
dbt run --profile bigquery -s my_incremental --vars '{scenario: non_empty}'
dbt show --profile bigquery --inline "select * from {{ ref('my_incremental') }}"
```
## Checklist
- [x] Review the [Content style
guide](https://github.com/dbt-labs/docs.getdbt.com/blob/current/contributing/content-style-guide.md)
so my content adheres to these guidelines.
---
website/docs/docs/build/incremental-models.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/website/docs/docs/build/incremental-models.md b/website/docs/docs/build/incremental-models.md
index 7d73adb621b..5a0900b4cdd 100644
--- a/website/docs/docs/build/incremental-models.md
+++ b/website/docs/docs/build/incremental-models.md
@@ -70,7 +70,7 @@ from {{ ref('app_data_events') }}
-- this filter will only be applied on an incremental run
-- (uses >= to include records whose timestamp occurred since the last run of this model)
- where event_time >= (select max(event_time) from {{ this }})
+ where event_time >= (select coalesce(max(event_time), '1900-01-01') from {{ this }})
{% endif %}
```
@@ -141,7 +141,7 @@ from {{ ref('app_data_events') }}
-- this filter will only be applied on an incremental run
-- (uses >= to include records arriving later on the same day as the last run of this model)
- where date_day >= (select max(date_day) from {{ this }})
+ where date_day >= (select coalesce(max(event_time), '1900-01-01') from {{ this }})
{% endif %}