From 68bcc26d10a0e1b39e6c3f79b4e98f415f5af01e Mon Sep 17 00:00:00 2001 From: georgewoodhead <50772749+georgewoodhead@users.noreply.github.com> Date: Tue, 28 Nov 2023 13:18:16 +0000 Subject: [PATCH] Simplify base events this run (close #60) --- .../.scripts/integration_test.sh | 11 +- macros/identifiers.sql | 2 +- .../dtype_to_type.sql | 34 ++ .../field.sql | 39 --- .../field_alias.sql | 18 ++ .../get_context_fields.sql | 23 ++ .../get_enabled_context_fields.sql | 56 ++++ .../media_ad_break_field.sql | 23 -- .../media_ad_field.sql | 23 -- .../media_ad_quartile_event_field.sql | 23 -- .../media_event_type_field.sql | 28 +- .../media_player_field.sql | 40 --- .../media_player_type_field.sql | 52 +-- .../media_session_field.sql | 23 -- .../media_type_field.sql | 35 +-- .../percent_progress_field.sql | 94 ++---- .../playback_quality_field.sql | 54 +--- .../player_id_field.sql | 39 --- .../snakeify_case.sql | 16 + .../source_url_field.sql | 39 --- .../web_or_mobile_field.sql | 33 -- ...plow_media_player_base_events_this_run.sql | 180 ----------- ...plow_media_player_base_events_this_run.sql | 176 ----------- ...plow_media_player_base_events_this_run.sql | 189 ----------- models/base/scratch/default/sources.yml | 33 -- ...plow_media_player_base_events_this_run.sql | 187 ----------- ...plow_media_player_base_events_this_run.sql | 295 ++++++++++++++++++ ...w_media_player_media_ad_views_this_run.sql | 30 +- .../snowplow_media_player_media_ads.sql | 74 ++--- .../snowplow_media_player_base_this_run.sql | 54 ++-- .../snowplow_media_player_media_stats.sql | 14 +- 31 files changed, 602 insertions(+), 1335 deletions(-) create mode 100644 macros/snowplow_media_player_base_events_this_run/dtype_to_type.sql delete mode 100644 macros/snowplow_media_player_base_events_this_run/field.sql create mode 100644 macros/snowplow_media_player_base_events_this_run/field_alias.sql create mode 100644 macros/snowplow_media_player_base_events_this_run/get_context_fields.sql create mode 100644 macros/snowplow_media_player_base_events_this_run/get_enabled_context_fields.sql delete mode 100644 macros/snowplow_media_player_base_events_this_run/media_ad_break_field.sql delete mode 100644 macros/snowplow_media_player_base_events_this_run/media_ad_field.sql delete mode 100644 macros/snowplow_media_player_base_events_this_run/media_ad_quartile_event_field.sql delete mode 100644 macros/snowplow_media_player_base_events_this_run/media_player_field.sql delete mode 100644 macros/snowplow_media_player_base_events_this_run/media_session_field.sql delete mode 100644 macros/snowplow_media_player_base_events_this_run/player_id_field.sql create mode 100644 macros/snowplow_media_player_base_events_this_run/snakeify_case.sql delete mode 100644 macros/snowplow_media_player_base_events_this_run/source_url_field.sql delete mode 100644 macros/snowplow_media_player_base_events_this_run/web_or_mobile_field.sql delete mode 100644 models/base/scratch/bigquery/snowplow_media_player_base_events_this_run.sql delete mode 100644 models/base/scratch/databricks/snowplow_media_player_base_events_this_run.sql delete mode 100644 models/base/scratch/default/snowplow_media_player_base_events_this_run.sql delete mode 100644 models/base/scratch/default/sources.yml delete mode 100644 models/base/scratch/snowflake/snowplow_media_player_base_events_this_run.sql create mode 100644 models/base/scratch/snowplow_media_player_base_events_this_run.sql diff --git a/integration_tests/.scripts/integration_test.sh b/integration_tests/.scripts/integration_test.sh index 27c8f0b..3ac227b 100755 --- a/integration_tests/.scripts/integration_test.sh +++ b/integration_tests/.scripts/integration_test.sh @@ -31,11 +31,20 @@ for db in ${DATABASES[@]}; do echo "Snowplow media player integration tests (v1 only): Execute models - run 1/6" eval "dbt run --target $db --full-refresh --vars '{snowplow__allow_refresh: true, snowplow__enable_media_player_v2: false, snowplow__enable_media_session: false, snowplow__enable_media_ad: false, snowplow__enable_media_ad_break: false, snowplow__enable_ad_quartile_event: false, snowplow__enable_mobile_events: false}'" || exit 1; - + echo "Snowplow media player integration tests (v1 only): Execute models - run 2/2" eval "dbt run --target $db --vars '{snowplow__allow_refresh: true, snowplow__enable_media_player_v2: false, snowplow__enable_media_session: false, snowplow__enable_media_ad: false, snowplow__enable_media_ad_break: false, snowplow__enable_ad_quartile_event: false, snowplow__enable_mobile_events: false}'" || exit 1; + # This run and the subsequent incremental ones exist just to make sure that the models work with the older contexts disabled + echo "Snowplow media player integration tests (v2 only): Execute models - run 1/6" + + eval "dbt run --target $db --full-refresh --vars '{snowplow__allow_refresh: true, snowplow__backfill_limit_days: 3000, snowplow__enable_youtube: false, snowplow__enable_whatwg_media: false, snowplow__enable_whatwg_video: false, snowplow__enable_media_player_v1: false}'" || exit 1; + + echo "Snowplow media player integration tests (v2 only): Execute models - run 2/2" + + eval "dbt run --target $db --vars '{snowplow__enable_youtube: false, snowplow__enable_whatwg_media: false, snowplow__enable_whatwg_video: false, snowplow__enable_media_player_v1: false}'" || exit 1; + echo "Snowplow media player integration tests: Execute models - run 1/6" diff --git a/macros/identifiers.sql b/macros/identifiers.sql index 0e8c09c..1831ed2 100644 --- a/macros/identifiers.sql +++ b/macros/identifiers.sql @@ -123,7 +123,7 @@ You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 {% macro user_identifiers() %} - {{ return(adapter.dispatch('user_identifiers', 'snowplow_unified')()) }} + {{ return(adapter.dispatch('user_identifiers', 'snowplow_media_player')()) }} {% endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/dtype_to_type.sql b/macros/snowplow_media_player_base_events_this_run/dtype_to_type.sql new file mode 100644 index 0000000..67657a2 --- /dev/null +++ b/macros/snowplow_media_player_base_events_this_run/dtype_to_type.sql @@ -0,0 +1,34 @@ +{# +Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, +and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. +You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ +#} + +{% macro dtype_to_type(dtype) -%} + {{ return(adapter.dispatch('dtype_to_type', 'snowplow_media_player')(dtype)) }} +{%- endmacro %} + +{% macro default__dtype_to_type(dtype) -%} + + {%- if 'string' in dtype -%} + {{ type_string() }} + + {%- elif 'integer' in dtype -%} + {{ type_int() }} + + {%- elif 'number' in dtype -%} + {{ type_numeric() }} + + {%- elif 'float' in dtype -%} + {{ type_float() }} + + {%- elif 'boolean' in dtype -%} + {{ type_boolean() }} + + {%- else -%} + {{ exceptions.raise_compiler_error(dtype ~ ' dtype is not supported, please use data type specified in schema') }} + + {%- endif -%} + +{%- endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/field.sql b/macros/snowplow_media_player_base_events_this_run/field.sql deleted file mode 100644 index 5b19cbd..0000000 --- a/macros/snowplow_media_player_base_events_this_run/field.sql +++ /dev/null @@ -1,39 +0,0 @@ -{# -Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} - -{% macro field(property, col_prefix=None, field=None) -%} - {{ return(adapter.dispatch('field')(property, col_prefix, field)) }} -{%- endmacro %} - -{% macro bigquery__field(property, col_prefix, field) -%} - {% if property is string -%} - {{ property }} - {%- else -%} - {{ snowplow_utils.get_optional_fields( - enabled=true, - fields=[{'field': property.get('field', field), 'dtype': property.get('dtype', 'string') }], - col_prefix=property.get('col_prefix', col_prefix), - relation=source('atomic', 'events'), - relation_alias=property.get('relation_alias', 'a'), - include_field_alias=false - ) }} - {%- endif %} -{%- endmacro %} - -{% macro default__field(property, col_prefix, field) -%} - {% if property is string -%} - {{ property }} - {%- else -%} - {{ snowplow_utils.get_field( - column_name=property.get('col_prefix', col_prefix), - field_name=property.get('field', field), - table_alias=property.get('relation_alias', 'a'), - type=property.get('dtype', 'string'), - array_index='0' if 'contexts_' in property.get('col_prefix', col_prefix) else none - ) }} - {%- endif %} -{%- endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/field_alias.sql b/macros/snowplow_media_player_base_events_this_run/field_alias.sql new file mode 100644 index 0000000..d9cae03 --- /dev/null +++ b/macros/snowplow_media_player_base_events_this_run/field_alias.sql @@ -0,0 +1,18 @@ +{# +Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, +and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. +You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ +#} + +{% macro field_alias(field, prefix=None) -%} + {{ return(adapter.dispatch('field_alias', 'snowplow_media_player')(field, prefix)) }} +{%- endmacro %} + +{% macro default__field_alias(field, prefix) -%} + + {% set alias = (prefix~'_' if prefix else '')~(snakeify_case(field.get('field'))) -%} + + {{ alias }} + +{%- endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/get_context_fields.sql b/macros/snowplow_media_player_base_events_this_run/get_context_fields.sql new file mode 100644 index 0000000..cc89d46 --- /dev/null +++ b/macros/snowplow_media_player_base_events_this_run/get_context_fields.sql @@ -0,0 +1,23 @@ +{# +Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, +and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. +You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ +#} + +{% macro get_context_fields(fields, enabled, context, prefix=None) %} + {{ return(adapter.dispatch('get_context_fields', 'snowplow_media_player')(fields, enabled, context, prefix)) }} +{% endmacro %} + +{% macro default__get_context_fields(fields, enabled, context, prefix) %} + + {%- if enabled -%} + {{ get_enabled_context_fields(fields, context, prefix) }} + {%- else -%} + {% for f in fields %} + , cast(null as {{ dtype_to_type(f.get('dtype')) }}) as {{ field_alias(f, prefix) }} + {%- endfor %} + + {%- endif -%} + +{% endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/get_enabled_context_fields.sql b/macros/snowplow_media_player_base_events_this_run/get_enabled_context_fields.sql new file mode 100644 index 0000000..cd8303c --- /dev/null +++ b/macros/snowplow_media_player_base_events_this_run/get_enabled_context_fields.sql @@ -0,0 +1,56 @@ +{# +Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, +and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. +You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ +#} + +{% macro get_enabled_context_fields(fields, col_prefix, field_prefix) -%} + {{ return(adapter.dispatch('get_enabled_context_fields')(fields, col_prefix, field_prefix)) }} +{%- endmacro %} + + +{% macro postgres__get_enabled_context_fields(fields, col_prefix, field_prefix) -%} +{%- endmacro %} + + +{% macro bigquery__get_enabled_context_fields(fields, col_prefix, field_prefix) -%} + {% for f in fields %} + , {{ snowplow_utils.get_optional_fields( + enabled=true, + fields=[{'field': snakeify_case(f.get('field')), 'dtype': f.get('dtype') }], + col_prefix=col_prefix, + relation=source('atomic', 'events'), + relation_alias='ev', + include_field_alias=false + ) }} as {{ field_alias(f, field_prefix) }} + {%- endfor %} +{%- endmacro %} + + +{% macro snowflake__get_enabled_context_fields(fields, col_prefix, field_prefix) -%} + {% for f in fields %} + {% set type = dtype_to_type(f.get('dtype')) %} + , {{ snowplow_utils.get_field( + column_name=col_prefix, + field_name=f.get('field'), + table_alias='ev', + type=type, + array_index='0' if 'contexts_' in col_prefix else none + ) }} as {{ field_alias(f, field_prefix) }} + {%- endfor %} +{%- endmacro %} + + +{% macro spark__get_enabled_context_fields(fields, col_prefix, field_prefix) -%} + {% for f in fields %} + {% set type = dtype_to_type(f.get('dtype')) %} + , {{ snowplow_utils.get_field( + column_name=col_prefix, + field_name=snakeify_case(f.get('field')), + table_alias='ev', + type=type, + array_index='0' if 'contexts_' in col_prefix else none + ) }} as {{ field_alias(f, field_prefix) }} + {%- endfor %} +{%- endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/media_ad_break_field.sql b/macros/snowplow_media_player_base_events_this_run/media_ad_break_field.sql deleted file mode 100644 index 043bb3d..0000000 --- a/macros/snowplow_media_player_base_events_this_run/media_ad_break_field.sql +++ /dev/null @@ -1,23 +0,0 @@ -{# -Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} - -{% macro media_ad_break_field(property) %} - {%- if var("snowplow__enable_media_ad_break") -%} - {{ field( - property, - col_prefix='contexts_com_snowplowanalytics_snowplow_media_ad_break_1' - ) }} - {%- else -%} - {% if property is string and target.type not in ['postgres', 'redshift'] -%} - {{ property }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ property.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif -%} -{% endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/media_ad_field.sql b/macros/snowplow_media_player_base_events_this_run/media_ad_field.sql deleted file mode 100644 index f2c9bb0..0000000 --- a/macros/snowplow_media_player_base_events_this_run/media_ad_field.sql +++ /dev/null @@ -1,23 +0,0 @@ -{# -Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} - -{% macro media_ad_field(property) %} - {%- if var("snowplow__enable_media_ad") -%} - {{ field( - property, - col_prefix='contexts_com_snowplowanalytics_snowplow_media_ad_1' - ) }} - {%- else -%} - {% if property is string and target.type not in ['postgres', 'redshift'] -%} - {{ property }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ property.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif -%} -{% endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/media_ad_quartile_event_field.sql b/macros/snowplow_media_player_base_events_this_run/media_ad_quartile_event_field.sql deleted file mode 100644 index 622abfa..0000000 --- a/macros/snowplow_media_player_base_events_this_run/media_ad_quartile_event_field.sql +++ /dev/null @@ -1,23 +0,0 @@ -{# -Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} - -{% macro media_ad_quartile_event_field(property) %} - {%- if var("snowplow__enable_ad_quartile_event") -%} - {{ field( - property, - col_prefix='unstruct_event_com_snowplowanalytics_snowplow_media_ad_quartile_event_1' - ) }} - {%- else -%} - {% if property is string and target.type not in ['postgres', 'redshift'] -%} - {{ property }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ property.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif -%} -{% endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/media_event_type_field.sql b/macros/snowplow_media_player_base_events_this_run/media_event_type_field.sql index 1f1d3a1..a93856d 100644 --- a/macros/snowplow_media_player_base_events_this_run/media_event_type_field.sql +++ b/macros/snowplow_media_player_base_events_this_run/media_event_type_field.sql @@ -5,33 +5,19 @@ and you may not use this file except in compliance with the Snowplow Personal an You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ #} -{% macro media_event_type_field(media_player_event_type, event_name) %} +{% macro media_event_type_field() -%} coalesce( - {% if var("snowplow__enable_media_player_v1") -%} - -- for v1 media schemas, use the type property in media_player_event - {{ field( - media_player_event_type, - col_prefix="unstruct_event_com_snowplowanalytics_snowplow_media_player_event_1", - field='type' - ) }} - {%- else -%} - {% if media_player_event_type is string and target.type not in ['postgres', 'redshift'] -%} - {{ media_player_event_type }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ media_player_event_type.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif %}, + -- for v1 media schemas, use the type property in media_player_event + media_player_event__type, -- for v2 media schemas, the type is the event name, remove underscores to match v1 event types - case - when right({{ event_name }}, 6) = '_event' + case + when right(event_name, 6) = '_event' then replace( - left({{ event_name }}, length({{ event_name }}) - 6), + left(event_name, length(event_name) - 6), '_', '' ) else null end ) -{% endmacro %} +{%- endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/media_player_field.sql b/macros/snowplow_media_player_base_events_this_run/media_player_field.sql deleted file mode 100644 index 4ae7281..0000000 --- a/macros/snowplow_media_player_base_events_this_run/media_player_field.sql +++ /dev/null @@ -1,40 +0,0 @@ -{# -Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} - -{% macro media_player_field(v1, v2, default='null') %} - coalesce( - {% if var("snowplow__enable_media_player_v2") -%} - {{ field( - v2, - col_prefix='contexts_com_snowplowanalytics_snowplow_media_player_2' - ) }} - {%- else -%} - {% if v2 is string and target.type not in ['postgres', 'redshift'] -%} - {{ v2 }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ v2.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif %}, - {%- if v1 is not none and var("snowplow__enable_media_player_v1") -%} - {{ field( - v1, - col_prefix='contexts_com_snowplowanalytics_snowplow_media_player_1' - ) }} - {%- else -%} - {% if v1 is string and target.type not in ['postgres', 'redshift'] -%} - {{ v1 }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ v1.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif %}, - {{ default }} - ) -{% endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/media_player_type_field.sql b/macros/snowplow_media_player_base_events_this_run/media_player_type_field.sql index 40660e3..e348e96 100644 --- a/macros/snowplow_media_player_base_events_this_run/media_player_type_field.sql +++ b/macros/snowplow_media_player_base_events_this_run/media_player_type_field.sql @@ -5,46 +5,20 @@ and you may not use this file except in compliance with the Snowplow Personal an You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ #} -{% macro media_player_type_field(v2_player_type, youtube_player_id, media_player_id) %} +{% macro media_player_type_field() -%} coalesce( - {% if var("snowplow__enable_media_player_v2") -%} - {{ field( - v2_player_type, - col_prefix='contexts_com_snowplowanalytics_snowplow_media_player_2' - ) }} - {%- else -%} - {% if v2_player_type is string and target.type not in ['postgres', 'redshift'] -%} - {{ v2_player_type }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ v2_player_type.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif %}, - {% if var("snowplow__enable_youtube") and var("snowplow__enable_whatwg_media") -%} - case - when {{ field( - youtube_player_id, - col_prefix='contexts_com_youtube_youtube_1' - ) }} is not null then 'com.youtube-youtube' - when {{ field( - media_player_id, - col_prefix='contexts_org_whatwg_media_element_1' - ) }} is not null then 'org.whatwg-media_element' + media_player_v2__player_type + {%- if var("snowplow__enable_youtube") and var("snowplow__enable_whatwg_media") %} + , case + when youtube__player_id is not null then 'com.youtube-youtube' + when html5_media_element__html_id is not null then 'org.whatwg-media_element' else 'unknown' end - {%- elif var("snowplow__enable_youtube") -%} - 'com.youtube-youtube' - {% elif var("snowplow__enable_whatwg_media") -%} - 'org.whatwg-media_element' - {%- else -%} - {% if youtube_player_id is string and target.type not in ['postgres', 'redshift'] -%} - {{ youtube_player_id }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ youtube_player_id.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {% endif %} + {%- elif var("snowplow__enable_youtube") %} + , 'com.youtube-youtube' + {%- elif var("snowplow__enable_whatwg_media") %} + , 'org.whatwg-media_element' + {%- endif -%} + , cast(null as {{ type_string() }}) ) -{% endmacro %} +{%- endmacro -%} diff --git a/macros/snowplow_media_player_base_events_this_run/media_session_field.sql b/macros/snowplow_media_player_base_events_this_run/media_session_field.sql deleted file mode 100644 index c98e501..0000000 --- a/macros/snowplow_media_player_base_events_this_run/media_session_field.sql +++ /dev/null @@ -1,23 +0,0 @@ -{# -Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} - -{% macro media_session_field(property) %} - {% if var("snowplow__enable_media_session") -%} - {{ field( - property, - col_prefix='contexts_com_snowplowanalytics_snowplow_media_session_1' - ) }} - {%- else -%} - {% if property is string and target.type not in ['postgres', 'redshift'] -%} - {{ property }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ property.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif %} -{% endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/media_type_field.sql b/macros/snowplow_media_player_base_events_this_run/media_type_field.sql index 6cafe66..317da35 100644 --- a/macros/snowplow_media_player_base_events_this_run/media_type_field.sql +++ b/macros/snowplow_media_player_base_events_this_run/media_type_field.sql @@ -5,37 +5,14 @@ and you may not use this file except in compliance with the Snowplow Personal an You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ #} -{% macro media_type_field(v2_media_type, media_media_type) %} +{% macro media_type_field() -%} coalesce( - {% if var("snowplow__enable_media_player_v2") -%} - {{ field( - v2_media_type, - col_prefix='contexts_com_snowplowanalytics_snowplow_media_player_2' - ) }} - {%- else -%} - {% if v2_media_type is string and target.type not in ['postgres', 'redshift'] -%} - {{ v2_media_type }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ v2_media_type.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif %}, + media_player_v2__media_type {% if var("snowplow__enable_whatwg_media") -%} - case when {{ field( - media_media_type, - col_prefix='contexts_org_whatwg_media_element_1' - ) }} = 'audio' then 'audio' else 'video' end + , case when html5_media_element__media_type = 'audio' then 'audio' else 'video' end {%- elif var("snowplow__enable_youtube") -%} - 'video' - {%- else -%} - {% if media_media_type is string and target.type not in ['postgres', 'redshift'] -%} - {{ media_media_type }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ media_media_type.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} + , 'video' {%- endif %} + , cast(null as {{ type_string() }}) ) -{% endmacro %} +{%- endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/percent_progress_field.sql b/macros/snowplow_media_player_base_events_this_run/percent_progress_field.sql index 3e9e297..5814261 100644 --- a/macros/snowplow_media_player_base_events_this_run/percent_progress_field.sql +++ b/macros/snowplow_media_player_base_events_this_run/percent_progress_field.sql @@ -5,68 +5,34 @@ and you may not use this file except in compliance with the Snowplow Personal an You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ #} -{% macro percent_progress_field(v1_percent_progress, v1_event_type, event_name, v2_current_time, v2_duration) %} - {%- set v2_percent_progres -%} - round({{ field( - v2_current_time, - col_prefix='contexts_com_snowplowanalytics_snowplow_media_player_2' - ) }} / {{ field( - v2_duration, - col_prefix='contexts_com_snowplowanalytics_snowplow_media_player_2' - ) }} * 100) - {%- endset -%} - coalesce( - {% if var("snowplow__enable_media_player_v1") -%} - case - when {{ field( - v1_event_type, - col_prefix="unstruct_event_com_snowplowanalytics_snowplow_media_player_event_1", - field='type' - ) }} = 'ended' - then 100 - else {{ field( - v1_percent_progress, - col_prefix='contexts_com_snowplowanalytics_snowplow_media_player_1' - ) }} - end - {%- else -%} - {% if v1_percent_progress is string and target.type not in ['postgres', 'redshift'] -%} - {{ v1_percent_progress }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ v1_percent_progress.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif %}, - {% if var("snowplow__enable_media_player_v2") -%} - case - when {{ event_name }} = 'end_event' - then 100 - when {{ event_name }} = 'percent_progress_event' - and coalesce({{ field( - v2_duration, - col_prefix='contexts_com_snowplowanalytics_snowplow_media_player_2' - ) }}, 0) > 0 - then ( - case - {% for element in get_percentage_boundaries(var("snowplow__percent_progress_boundaries"))|sort|reverse %} - when {{ v2_percent_progres }} >= {{ element }} - then {{ element }} - {% endfor %} - else null - end - ) +{% macro percent_progress_field() %} - else null - end - {%- else -%} - {% if v2_duration is string and target.type not in ['postgres', 'redshift'] -%} - {{ v2_duration }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ v2_duration.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif %} - ) -{% endmacro %} + {%- set v2_percent_progress -%} + round(media_player_v2__current_time / media_player_v2__duration * 100) + {%- endset -%} + + coalesce( + {% if var("snowplow__enable_media_player_v1") -%} + case + when media_player_event__type = 'ended' then 100 + else media_player_v1__percent_progress + end, + {%- endif %} + + {% if var("snowplow__enable_media_player_v2") -%} + case + when event_name = 'end_event' then 100 + when event_name = 'percent_progress_event' and coalesce(media_player_v2__duration, 0) > 0 + then ( + case + {% for element in get_percentage_boundaries(var("snowplow__percent_progress_boundaries"))|sort|reverse %} + when {{ v2_percent_progress }} >= {{ element }} then {{ element }} + {% endfor %} + end + ) + end, + {%- endif %} + cast(null as {{ type_numeric() }}) + ) + +{%- endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/playback_quality_field.sql b/macros/snowplow_media_player_base_events_this_run/playback_quality_field.sql index cc48947..0f2104c 100644 --- a/macros/snowplow_media_player_base_events_this_run/playback_quality_field.sql +++ b/macros/snowplow_media_player_base_events_this_run/playback_quality_field.sql @@ -5,53 +5,13 @@ and you may not use this file except in compliance with the Snowplow Personal an You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ #} -{% macro playback_quality_field(v2_quality, youtube_quality, video_width, video_height) %} +{% macro playback_quality_field() -%} coalesce( - {% if var("snowplow__enable_media_player_v2") -%} - {{ field( - v2_quality, - col_prefix='contexts_com_snowplowanalytics_snowplow_media_player_2' - ) }} - {%- else -%} - {% if v2_quality is string and target.type not in ['postgres', 'redshift'] -%} - {{ v2_quality }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ v2_quality.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif %}, - {% if var("snowplow__enable_youtube") -%} - {{ field( - youtube_quality, - col_prefix='contexts_com_youtube_youtube_1' - ) }} - {%- else -%} - {% if youtube_quality is string and target.type not in ['postgres', 'redshift'] -%} - {{ youtube_quality }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ youtube_quality.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif %}, + media_player_v2__quality, + youtube__playback_quality, {% if var("snowplow__enable_whatwg_media") and var("snowplow__enable_whatwg_video") -%} - {{ field( - video_width, - col_prefix='contexts_org_whatwg_video_element_1' - ) }}||'x'||{{ field( - video_height, - col_prefix='contexts_org_whatwg_video_element_1' - ) }} - {%- else -%} - {% if video_width is string and target.type not in ['postgres', 'redshift'] -%} - {{ video_width }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ video_width.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {% endif %}, + html5_video_element__video_width || 'x' || html5_video_element__video_width, + {% endif -%} 'N/A' - ) -{% endmacro %} + ) +{%- endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/player_id_field.sql b/macros/snowplow_media_player_base_events_this_run/player_id_field.sql deleted file mode 100644 index 51e735e..0000000 --- a/macros/snowplow_media_player_base_events_this_run/player_id_field.sql +++ /dev/null @@ -1,39 +0,0 @@ -{# -Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} - -{% macro player_id_field(youtube_player_id, media_player_id) %} - coalesce( - {% if var("snowplow__enable_youtube") -%} - {{ field( - youtube_player_id, - col_prefix='contexts_com_youtube_youtube_1' - ) }} - {%- else -%} - {% if youtube_player_id is string and target.type not in ['postgres', 'redshift'] -%} - {{ youtube_player_id }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ youtube_player_id.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif %}, - {% if var("snowplow__enable_whatwg_media") -%} - {{ field( - media_player_id, - col_prefix='contexts_org_whatwg_media_element_1' - ) }} - {%- else -%} - {% if media_player_id is string and target.type not in ['postgres', 'redshift'] -%} - {{ media_player_id }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ media_player_id.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif %} - ) -{% endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/snakeify_case.sql b/macros/snowplow_media_player_base_events_this_run/snakeify_case.sql new file mode 100644 index 0000000..50b292c --- /dev/null +++ b/macros/snowplow_media_player_base_events_this_run/snakeify_case.sql @@ -0,0 +1,16 @@ +{# +Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, +and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. +You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ +#} + +{# Take a string in camel/pascal case and make it snakecase #} +{% macro snakeify_case(text) %} + {%- set re = modules.re -%} + {%- set camel_string1 = '([A-Z]+)([A-Z][a-z])'-%} {# Capitals followed by a lowercase #} + {%- set camel_string2 = '([a-z\d])([A-Z])'-%} {# lowercase followed by a capital #} + {%- set replace_string = '\\1_\\2' -%} + {%- set output_text = re.sub(camel_string2, replace_string, re.sub(camel_string1, replace_string, text)).replace('-', '_').lower() -%} + {{- output_text -}} +{% endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/source_url_field.sql b/macros/snowplow_media_player_base_events_this_run/source_url_field.sql deleted file mode 100644 index 67db079..0000000 --- a/macros/snowplow_media_player_base_events_this_run/source_url_field.sql +++ /dev/null @@ -1,39 +0,0 @@ -{# -Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} - -{% macro source_url_field(youtube_url, media_current_src) %} - coalesce( - {% if var("snowplow__enable_youtube") -%} - {{ field( - youtube_url, - col_prefix='contexts_com_youtube_youtube_1' - ) }} - {%- else -%} - {% if youtube_url is string and target.type not in ['postgres', 'redshift'] -%} - {{ youtube_url }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ youtube_url.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif %}, - {% if var("snowplow__enable_whatwg_media") -%} - {{ field( - media_current_src, - col_prefix='contexts_org_whatwg_media_element_1' - ) }} - {%- else -%} - {% if media_current_src is string and target.type not in ['postgres', 'redshift'] -%} - {{ media_current_src }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ media_current_src.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif %} - ) -{% endmacro %} diff --git a/macros/snowplow_media_player_base_events_this_run/web_or_mobile_field.sql b/macros/snowplow_media_player_base_events_this_run/web_or_mobile_field.sql deleted file mode 100644 index e35727f..0000000 --- a/macros/snowplow_media_player_base_events_this_run/web_or_mobile_field.sql +++ /dev/null @@ -1,33 +0,0 @@ -{# -Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} - -{% macro web_or_mobile_field(web, mobile) %} - coalesce( - {% if var("snowplow__enable_web_events") -%} - {{ field(web) }} - {%- else -%} - {% if web is string and target.type not in ['postgres', 'redshift'] -%} - {{ web }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ web.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif %}, - {% if var("snowplow__enable_mobile_events") -%} - {{ field(mobile) }} - {%- else -%} - {% if mobile is string and target.type not in ['postgres', 'redshift'] -%} - {{ mobile }} - {% elif target.type not in ['postgres', 'redshift'] %} - cast(null as {{ mobile.get('dtype', 'string') }}) - {%- else -%} - null - {% endif %} - {%- endif %} - ) -{% endmacro %} diff --git a/models/base/scratch/bigquery/snowplow_media_player_base_events_this_run.sql b/models/base/scratch/bigquery/snowplow_media_player_base_events_this_run.sql deleted file mode 100644 index 81b2fb2..0000000 --- a/models/base/scratch/bigquery/snowplow_media_player_base_events_this_run.sql +++ /dev/null @@ -1,180 +0,0 @@ -{# -Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} - -{{ - config( - tags=["this_run"] - ) -}} - -{%- set lower_limit, upper_limit = snowplow_utils.return_limits_from_model(ref('snowplow_media_player_base_sessions_this_run'), - 'start_tstamp', - 'end_tstamp') %} - -{# Check for exceptions #} -{% if var("snowplow__enable_whatwg_media") is false and var("snowplow__enable_whatwg_video") %} - {{ exceptions.raise_compiler_error("variable: snowplow__enable_whatwg_video is enabled but variable: snowplow__enable_whatwg_media is not, both need to be enabled for modelling html5 video tracking data.") }} -{% elif not var("snowplow__enable_media_player_v1") and not var("snowplow__enable_media_player_v2") %} - {{ exceptions.raise_compiler_error("No media player context enabled. Please enable at least one media player context: snowplow__enable_media_player_v1 or snowplow__enable_media_player_v2") }} -{% elif not var("snowplow__enable_youtube") and not var("snowplow__enable_whatwg_media") and not var("snowplow__enable_media_player_v2") %} - {{ exceptions.raise_compiler_error("No media context enabled. Please enable as many of the following variables as required: snowplow__enable_media_player_v2, snowplow__enable_youtube, snowplow__enable_whatwg_media, snowplow__enable_whatwg_video") }} -{% endif %} - -{% set base_events_query = snowplow_utils.base_create_snowplow_events_this_run( - sessions_this_run_table='snowplow_media_player_base_sessions_this_run', - session_identifiers=session_identifiers(), - session_sql=var('snowplow__session_sql', none), - session_timestamp=var('snowplow__session_timestamp', 'collector_tstamp'), - derived_tstamp_partitioned=var('snowplow__derived_tstamp_partitioned', true), - days_late_allowed=var('snowplow__days_late_allowed', 3), - max_session_days=var('snowplow__max_session_days', 3), - app_ids=var('snowplow__app_id', []), - snowplow_events_database=var('snowplow__database', target.database) if target.type not in ['databricks', 'spark'] else var('snowplow__databricks_catalog', 'hive_metastore') if target.type in ['databricks'] else var('snowplow__atomic_schema', 'atomic'), - snowplow_events_schema=var('snowplow__atomic_schema', 'atomic'), - snowplow_events_table=var('snowplow__events_table', 'events'), - entities_or_sdes=contexts, - custom_sql=var('snowplow__custom_sql', '') -) %} - -with base_query as ( - {{ base_events_query }} -), - -prep as ( - - select - a.*, - a.derived_tstamp as start_tstamp, - - {{ web_or_mobile_field( - web={ 'field': 'id', 'col_prefix': 'contexts_com_snowplowanalytics_snowplow_web_page_1' }, - mobile={'field': 'id', 'col_prefix': 'contexts_com_snowplowanalytics_mobile_screen_1' } - ) }} as page_view_id, - {{ web_or_mobile_field( - web='a.domain_sessionid', - mobile={ 'field': 'session_id', 'col_prefix': 'contexts_com_snowplowanalytics_snowplow_client_session_1' } - ) }} as original_session_identifier, - - -- unpacking the media player event - {{ media_player_field( - v1={ 'field': 'label', 'col_prefix': 'unstruct_event_com_snowplowanalytics_snowplow_media_player_event_1' }, - v2={ 'field': 'label' }, - ) }} as media_label, - {{ media_event_type_field(media_player_event_type={}, event_name='a.event_name') }} as event_type, - - -- unpacking the media player object - round({{ media_player_field( - v1={ 'field': 'duration', 'dtype': 'numeric' }, - v2={ 'field': 'duration', 'dtype': 'numeric' } - ) }}) as duration_secs, - {{ media_player_field( - v1={ 'field': 'current_time', 'dtype': 'numeric' }, - v2={ 'field': 'current_time', 'dtype': 'numeric' } - ) }} as player_current_time, - {{ media_player_field( - v1={ 'field': 'playback_rate', 'dtype': 'numeric' }, - v2={ 'field': 'playback_rate', 'dtype': 'numeric' }, - default='1.0' - ) }} as playback_rate, - {{ percent_progress_field( - v1_percent_progress={ 'field': 'percent_progress', 'dtype': 'int' }, - v1_event_type={}, - event_name='a.event_name', - v2_current_time={ 'field': 'current_time', 'dtype': 'numeric' }, - v2_duration={ 'field': 'duration', 'dtype': 'numeric'} - ) }} as percent_progress, - {{ media_player_field( - v1={ 'field': 'muted', 'dtype': 'boolean' }, - v2={ 'field': 'muted', 'dtype': 'boolean' } - ) }} as is_muted, - - -- media session properties - {{ media_session_field({ 'field': 'media_session_id' }) }} as media_session_id, - {{ media_session_field({ 'field': 'time_played', 'dtype': 'numeric' }) }} as media_session_time_played, - {{ media_session_field({ 'field': 'time_played_muted', 'dtype': 'numeric' }) }} as media_session_time_played_muted, - {{ media_session_field({ 'field': 'time_paused', 'dtype': 'numeric' }) }} as media_session_time_paused, - {{ media_session_field({ 'field': 'content_watched', 'dtype': 'numeric' }) }} as media_session_content_watched, - {{ media_session_field({ 'field': 'time_buffering', 'dtype': 'numeric' }) }} as media_session_time_buffering, - {{ media_session_field({ 'field': 'time_spent_ads', 'dtype': 'numeric' }) }} as media_session_time_spent_ads, - {{ media_session_field({ 'field': 'ads', 'dtype': 'int' }) }} as media_session_ads, - {{ media_session_field({ 'field': 'ads_clicked', 'dtype': 'int' }) }} as media_session_ads_clicked, - {{ media_session_field({ 'field': 'ads_skipped', 'dtype': 'int' }) }} as media_session_ads_skipped, - {{ media_session_field({ 'field': 'ad_breaks', 'dtype': 'int' }) }} as media_session_ad_breaks, - {{ media_session_field({ 'field': 'avg_playback_rate', 'dtype': 'numeric' }) }} as media_session_avg_playback_rate, - - -- ad properties - {{ media_ad_field({ 'field': 'name' }) }} as ad_name, - {{ media_ad_field({ 'field': 'ad_id' }) }} as ad_id, - {{ media_ad_field({ 'field': 'creative_id' }) }} as ad_creative_id, - {{ media_ad_field({ 'field': 'pod_position', 'dtype': 'int' }) }} as ad_pod_position, - {{ media_ad_field({ 'field': 'duration', 'dtype': 'numeric' }) }} as ad_duration_secs, - {{ media_ad_field({ 'field': 'skippable', 'dtype': 'boolean' }) }} as ad_skippable, - - -- ad break properties - {{ media_ad_break_field({ 'field': 'name' }) }} as ad_break_name, - {{ media_ad_break_field({ 'field': 'break_id' }) }} as ad_break_id, - {{ media_ad_break_field({ 'field': 'break_type' }) }} as ad_break_type, - - -- ad quartile event - {{ media_ad_quartile_event_field({ 'field': 'percent_progress', 'dtype': 'int' }) }} as ad_percent_progress, - - -- combined media properties - {{ player_id_field( - youtube_player_id={ 'field': 'player_id' }, - media_player_id={ 'field': 'html_id' } - ) }} as player_id, - {{ media_player_type_field( - v2_player_type={ 'field': 'player_type' }, - youtube_player_id={ 'field': 'player_id' }, - media_player_id={ 'field': 'html_id' } - ) }} as media_player_type, - {{ source_url_field( - youtube_url={ 'field': 'url' }, - media_current_src={ 'field': 'current_src' } - )}} as source_url, - {{ media_type_field( - v2_media_type={ 'field': 'media_type' }, - media_media_type={ 'field': 'media_type' } - ) }} as media_type, - {{ playback_quality_field( - v2_quality={ 'field': 'quality' }, - youtube_quality={ 'field': 'playback_quality' }, - video_width={ 'field': 'video_width', 'dtype': 'int' }, - video_height={ 'field': 'video_height', 'dtype': 'int' } - ) }} as playback_quality - - from base_query as a - - where - {{ snowplow_media_player.event_name_filter(var("snowplow__media_event_names", "['media_player_event']")) }} - -) - -, ranked as ( - - select - *, - dense_rank() over (partition by ev.session_identifier order by ev.start_tstamp) AS event_in_session_index, - from prep as ev - -) - -select - coalesce( - p.media_session_id, - {{ dbt_utils.generate_surrogate_key(['p.page_view_id', 'p.player_id', 'p.media_label', 'p.media_type', 'p.media_player_type']) }} - ) as play_id, - {{ dbt_utils.generate_surrogate_key(['p.player_id', 'p.media_label', 'p.media_type', 'p.media_player_type']) }} as media_identifier, - p.*, - - coalesce(cast(piv.weight_rate * p.duration_secs / 100 as {{ type_int() }}), 0) as play_time_secs, - coalesce(cast(case when p.is_muted = true then piv.weight_rate * p.duration_secs / 100 else 0 end as {{ type_int() }}), 0) as play_time_muted_secs - - from ranked p - - left join {{ ref("snowplow_media_player_pivot_base") }} piv - on p.percent_progress = piv.percent_progress diff --git a/models/base/scratch/databricks/snowplow_media_player_base_events_this_run.sql b/models/base/scratch/databricks/snowplow_media_player_base_events_this_run.sql deleted file mode 100644 index 4308aef..0000000 --- a/models/base/scratch/databricks/snowplow_media_player_base_events_this_run.sql +++ /dev/null @@ -1,176 +0,0 @@ -{# -Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} - -{{ - config( - tags=["this_run"] - ) -}} - -{%- set lower_limit, upper_limit = snowplow_utils.return_limits_from_model(ref('snowplow_media_player_base_sessions_this_run'), - 'start_tstamp', - 'end_tstamp') %} - -{# Check for exceptions #} -{% if var("snowplow__enable_whatwg_media") is false and var("snowplow__enable_whatwg_video") %} - {{ exceptions.raise_compiler_error("variable: snowplow__enable_whatwg_video is enabled but variable: snowplow__enable_whatwg_media is not, both need to be enabled for modelling html5 video tracking data.") }} -{% elif not var("snowplow__enable_media_player_v1") and not var("snowplow__enable_media_player_v2") %} - {{ exceptions.raise_compiler_error("No media player context enabled. Please enable at least one media player context: snowplow__enable_media_player_v1 or snowplow__enable_media_player_v2") }} -{% elif not var("snowplow__enable_youtube") and not var("snowplow__enable_whatwg_media") and not var("snowplow__enable_media_player_v2") %} - {{ exceptions.raise_compiler_error("No media context enabled. Please enable as many of the following variables as required: snowplow__enable_media_player_v2, snowplow__enable_youtube, snowplow__enable_whatwg_media, snowplow__enable_whatwg_video") }} -{% endif %} - -{% set base_events_query = snowplow_utils.base_create_snowplow_events_this_run( - sessions_this_run_table='snowplow_media_player_base_sessions_this_run', - session_identifiers=session_identifiers(), - session_sql=var('snowplow__session_sql', none), - session_timestamp=var('snowplow__session_timestamp', 'collector_tstamp'), - derived_tstamp_partitioned=var('snowplow__derived_tstamp_partitioned', true), - days_late_allowed=var('snowplow__days_late_allowed', 3), - max_session_days=var('snowplow__max_session_days', 3), - app_ids=var('snowplow__app_id', []), - snowplow_events_database=var('snowplow__database', target.database) if target.type not in ['databricks', 'spark'] else var('snowplow__databricks_catalog', 'hive_metastore') if target.type in ['databricks'] else var('snowplow__atomic_schema', 'atomic'), - snowplow_events_schema=var('snowplow__atomic_schema', 'atomic'), - snowplow_events_table=var('snowplow__events_table', 'events'), - entities_or_sdes=contexts, - custom_sql=var('snowplow__custom_sql', '') -) %} - -with base_query as ( - {{ base_events_query }} -), - -prep as ( - - select - - a.*, - a.derived_tstamp as start_tstamp, - - {{ web_or_mobile_field( - web={ 'field': 'id', 'col_prefix': 'contexts_com_snowplowanalytics_snowplow_web_page_1', 'dtype': 'string' }, - mobile={ 'field': 'id', 'col_prefix': 'contexts_com_snowplowanalytics_mobile_screen_1', 'dtype': 'string' } - ) }} as page_view_id, - {{ web_or_mobile_field( - web='a.domain_sessionid', - mobile={ 'field': 'session_id', 'col_prefix': 'contexts_com_snowplowanalytics_snowplow_client_session_1', 'dtype': 'string' } - ) }} as original_session_identifier, - - -- unpacking the media player event - {{ media_player_field( - v1={ 'field': 'label', 'col_prefix': 'unstruct_event_com_snowplowanalytics_snowplow_media_player_event_1', 'dtype': 'string' }, - v2={ 'field': 'label', 'dtype': 'string' } - ) }} as media_label, - {{ media_event_type_field(media_player_event_type={ 'dtype': 'string' }, event_name='a.event_name') }} as event_type, - - -- unpacking the media player object - round({{ media_player_field( - v1={ 'field': 'duration', 'dtype': 'double' }, - v2={ 'field': 'duration', 'dtype': 'double' } - ) }}) as duration_secs, - {{ media_player_field( - v1={ 'field': 'current_time', 'dtype': 'double' }, - v2={ 'field': 'current_time', 'dtype': 'double' } - ) }} as player_current_time, - {{ media_player_field( - v1={ 'field': 'playback_rate', 'dtype': 'double' }, - v2={ 'field': 'playback_rate', 'dtype': 'double' }, - default='1.0' - ) }} as playback_rate, - {{ percent_progress_field( - v1_percent_progress={ 'field': 'percent_progress', 'dtype': 'string' }, - v1_event_type={ 'field': 'type', 'dtype': 'string' }, - event_name='a.event_name', - v2_current_time={ 'field': 'current_time', 'dtype': 'double' }, - v2_duration={ 'field': 'duration', 'dtype': 'double' } - ) }} as percent_progress, - {{ media_player_field( - v1={ 'field': 'muted', 'dtype': 'boolean' }, - v2={ 'field': 'muted', 'dtype': 'boolean' } - ) }} as is_muted, - - -- media session properties - {{ media_session_field({ 'field': 'media_session_id', 'dtype': 'string' }) }} as media_session_id, - {{ media_session_field({ 'field': 'time_played', 'dtype': 'double' }) }} as media_session_time_played, - {{ media_session_field({ 'field': 'time_played_muted', 'dtype': 'double' }) }} as media_session_time_played_muted, - {{ media_session_field({ 'field': 'time_paused', 'dtype': 'double' }) }} as media_session_time_paused, - {{ media_session_field({ 'field': 'content_watched', 'dtype': 'double' }) }} as media_session_content_watched, - {{ media_session_field({ 'field': 'time_buffering', 'dtype': 'double' }) }} as media_session_time_buffering, - {{ media_session_field({ 'field': 'time_spent_ads', 'dtype': 'double' }) }} as media_session_time_spent_ads, - {{ media_session_field({ 'field': 'ads', 'dtype': 'integer' }) }} as media_session_ads, - {{ media_session_field({ 'field': 'ads_clicked', 'dtype': 'integer' }) }} as media_session_ads_clicked, - {{ media_session_field({ 'field': 'ads_skipped', 'dtype': 'integer' }) }} as media_session_ads_skipped, - {{ media_session_field({ 'field': 'ad_breaks', 'dtype': 'integer' }) }} as media_session_ad_breaks, - {{ media_session_field({ 'field': 'avg_playback_rate', 'dtype': 'double' }) }} as media_session_avg_playback_rate, - - -- ad properties - {{ media_ad_field({ 'field': 'name', 'dtype': 'string' }) }} as ad_name, - {{ media_ad_field({ 'field': 'ad_id', 'dtype': 'string' }) }} as ad_id, - {{ media_ad_field({ 'field': 'creative_id', 'dtype': 'string' }) }} as ad_creative_id, - {{ media_ad_field({ 'field': 'pod_position', 'dtype': 'integer' }) }} as ad_pod_position, - {{ media_ad_field({ 'field': 'duration', 'dtype': 'double' }) }} as ad_duration_secs, - {{ media_ad_field({ 'field': 'skippable', 'dtype': 'boolean' }) }} as ad_skippable, - - -- ad break properties - {{ media_ad_break_field({ 'field': 'name', 'dtype': 'string' }) }} as ad_break_name, - {{ media_ad_break_field({ 'field': 'break_id', 'dtype': 'string' }) }} as ad_break_id, - {{ media_ad_break_field({ 'field': 'break_type', 'dtype': 'string' }) }} as ad_break_type, - - -- ad quartile event - {{ media_ad_quartile_event_field({ 'field': 'percent_progress', 'dtype': 'integer' }) }} as ad_percent_progress, - - -- combined media properties - {{ player_id_field( - youtube_player_id={ 'field': 'player_id', 'dtype': 'string' }, - media_player_id={ 'field': 'html_id', 'dtype': 'string' } - ) }} as player_id, - {{ media_player_type_field( - v2_player_type={ 'field': 'player_type', 'dtype': 'string' }, - youtube_player_id={ 'field': 'player_id', 'dtype': 'string' }, - media_player_id={ 'field': 'html_id', 'dtype': 'string' } - ) }} as media_player_type, - {{ source_url_field( - youtube_url={ 'field': 'url', 'dtype': 'string' }, - media_current_src={ 'field': 'current_src', 'dtype': 'string' } - ) }} as source_url, - {{ media_type_field( - v2_media_type={ 'field': 'media_type', 'dtype': 'string' }, - media_media_type={ 'field': 'media_type', 'dtype': 'string' } - ) }} as media_type, - {{ playback_quality_field( - v2_quality={ 'field': 'quality', 'dtype': 'string' }, - youtube_quality={ 'field': 'playback_quality', 'dtype': 'string' }, - video_width={ 'field': 'video_width', 'dtype': 'integer' }, - video_height={ 'field': 'video_height', 'dtype': 'integer' } - )}} as playback_quality - - from base_query as a - - where - {{ snowplow_media_player.event_name_filter(var("snowplow__media_event_names", "['media_player_event']")) }} - -) - -select - coalesce( - p.media_session_id, - {{ dbt_utils.generate_surrogate_key(['p.page_view_id', 'p.player_id', 'p.media_label', 'p.media_type', 'p.media_player_type']) }} - ) as play_id, - {{ dbt_utils.generate_surrogate_key(['p.player_id', 'p.media_label', 'p.media_type', 'p.media_player_type']) }} as media_identifier, - p.* except (percent_progress), - - cast(p.percent_progress as integer) as percent_progress, - - coalesce(cast(round(piv.weight_rate * p.duration_secs / 100) as {{ type_int() }}), 0) as play_time_secs, - coalesce(cast(case when p.is_muted = true then round(piv.weight_rate * p.duration_secs / 100) else 0 end as {{ type_int() }}), 0) as play_time_muted_secs, - - dense_rank() over (partition by session_identifier order by start_tstamp) AS event_in_session_index - - from prep p - - left join {{ ref("snowplow_media_player_pivot_base") }} piv - on p.percent_progress = piv.percent_progress diff --git a/models/base/scratch/default/snowplow_media_player_base_events_this_run.sql b/models/base/scratch/default/snowplow_media_player_base_events_this_run.sql deleted file mode 100644 index bed0e1c..0000000 --- a/models/base/scratch/default/snowplow_media_player_base_events_this_run.sql +++ /dev/null @@ -1,189 +0,0 @@ -{# -Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} - -{{ - config( - sort='collector_tstamp', - dist='event_id', - tags=["this_run"] - ) -}} - - - -{# Check for exceptions #} -{% if var("snowplow__enable_whatwg_media") is false and var("snowplow__enable_whatwg_video") %} - {{ exceptions.raise_compiler_error("variable: snowplow__enable_whatwg_video is enabled but variable: snowplow__enable_whatwg_media is not, both need to be enabled for modelling html5 video tracking data.") }} -{% elif not var("snowplow__enable_media_player_v1") and not var("snowplow__enable_media_player_v2") %} - {{ exceptions.raise_compiler_error("No media player context enabled. Please enable at least one media player context: snowplow__enable_media_player_v1 or snowplow__enable_media_player_v2") }} -{% elif not var("snowplow__enable_youtube") and not var("snowplow__enable_whatwg_media") and not var("snowplow__enable_media_player_v2") %} - {{ exceptions.raise_compiler_error("No media context enabled. Please enable as many of the following variables as required: snowplow__enable_media_player_v2, snowplow__enable_youtube, snowplow__enable_whatwg_media, snowplow__enable_whatwg_video") }} -{% endif %} - -{# Setting sdes or contexts for Postgres / Redshift. dbt passes variables by reference so need to use copy to avoid altering the list multiple times #} -{% set contexts = var('snowplow__entities_or_sdes', []).copy() %} - -{% if var("snowplow__enable_mobile_events") %} - {% do contexts.append({'schema': var('snowplow__context_screen'), 'prefix': 'mobile_screen_', 'single_entity': True}) %} - {% do contexts.append({'schema': var('snowplow__context_mobile_session'), 'prefix': 'mobile_session_', 'single_entity': True}) %} -{% endif %} - -{% if var("snowplow__enable_media_player_v1") %} - {% do contexts.append({'schema': var('snowplow__media_player_event_context'), 'prefix': 'media_player_event_', 'single_entity': True}) %} - {% do contexts.append({'schema': var('snowplow__media_player_context'), 'prefix': 'media_player_', 'single_entity': True}) %} -{% endif %} - -{% if var("snowplow__enable_media_player_v2") %} - {% do contexts.append({'schema': var('snowplow__media_player_v2_context'), 'prefix': 'media_player_v2_', 'single_entity': True}) %} -{% endif %} - -{% if var("snowplow__enable_media_session") %} - {% do contexts.append({'schema': var('snowplow__media_session_context'), 'prefix': 'media_session_', 'single_entity': True}) %} -{% endif %} - -{% if var("snowplow__enable_media_ad") %} - {% do contexts.append({'schema': var('snowplow__media_ad_context'), 'prefix': 'media_ad_', 'single_entity': True}) %} -{% endif %} - -{% if var("snowplow__enable_media_ad_break") %} - {% do contexts.append({'schema': var('snowplow__media_ad_break_context'), 'prefix': 'media_ad_break_', 'single_entity': True}) %} -{% endif %} - -{%- if var("snowplow__enable_youtube") -%} - {% do contexts.append({'schema': var('snowplow__youtube_context'), 'prefix': 'youtube_', 'single_entity': True}) %} -{%- endif %} - -{% if var("snowplow__enable_whatwg_media") -%} - {% do contexts.append({'schema': var('snowplow__html5_media_element_context'), 'prefix': 'html5_media_element_', 'single_entity': True}) %} -{%- endif %} - -{% if var("snowplow__enable_whatwg_video") -%} - {% do contexts.append({'schema': var('snowplow__html5_video_element_context'), 'prefix': 'html5_video_element_', 'single_entity': True}) %} -{%- endif %} - -{% if var("snowplow__enable_web_events") %} - {% do contexts.append({'schema': var('snowplow__context_web_page'), 'prefix': 'web_page_', 'single_entity': True}) %} -{% endif %} - -{% if var("snowplow__enable_ad_quartile_event") %} - {% do contexts.append({'schema': var('snowplow__media_ad_quartile_event'), 'prefix': 'ad_quartile_event_', 'single_entity': True}) %} -{% endif %} - -{% set base_events_query = snowplow_utils.base_create_snowplow_events_this_run( - sessions_this_run_table='snowplow_media_player_base_sessions_this_run', - session_identifiers=session_identifiers(), - session_sql=var('snowplow__session_sql', none), - session_timestamp=var('snowplow__session_timestamp', 'collector_tstamp'), - derived_tstamp_partitioned=var('snowplow__derived_tstamp_partitioned', true), - days_late_allowed=var('snowplow__days_late_allowed', 3), - max_session_days=var('snowplow__max_session_days', 3), - app_ids=var('snowplow__app_id', []), - snowplow_events_database=var('snowplow__database', target.database) if target.type not in ['databricks', 'spark'] else var('snowplow__databricks_catalog', 'hive_metastore') if target.type in ['databricks'] else var('snowplow__atomic_schema', 'atomic'), - snowplow_events_schema=var('snowplow__atomic_schema', 'atomic'), - snowplow_events_table=var('snowplow__events_table', 'events'), - entities_or_sdes=contexts, - custom_sql=var('snowplow__custom_sql', '') -) %} - -with base_query as ( - {{ base_events_query }} -), - -prep as ( - select - ev.*, - - {{ web_or_mobile_field(web='ev.web_page__id', mobile='ev.mobile_screen__id') }} as page_view_id, - {{ web_or_mobile_field(web='ev.domain_sessionid', mobile='ev.mobile_session__session_id') }} as original_session_identifier, - - -- unpacking the media player event - {{ media_player_field(v1='ev.media_player_event__label', v2='ev.media_player_v2__label') }} as media_label, - {{ media_event_type_field(media_player_event_type='ev.media_player_event__type', event_name='ev.event_name') }} as event_type, - - -- unpacking the media player object - round({{ media_player_field(v1='ev.media_player__duration', v2='ev.media_player_v2__duration') }}) as duration_secs, - {{ media_player_field(v1='ev.media_player__current_time', v2='ev.media_player_v2__current_time') }} as player_current_time, - {{ media_player_field( - v1='ev.media_player__playback_rate', - v2='ev.media_player_v2__playback_rate', - default='1' - ) }} as playback_rate, - {{ percent_progress_field( - v1_percent_progress='ev.media_player__percent_progress', - v1_event_type='ev.media_player_event__type', - event_name='ev.event_name', - v2_current_time='ev.media_player_v2__current_time', - v2_duration='ev.media_player_v2__duration' - ) }} as percent_progress, - {{ media_player_field(v1='ev.media_player__muted', v2='ev.media_player_v2__muted') }} as is_muted, - - -- media session properties - cast({{ media_session_field('ev.media_session__media_session_id') }} as {{ type_string() }}) as media_session_id, -- This is the only key actually used regardless, redshift doesn't like casting a null at a later time - {{ media_session_field('ev.media_session__time_played') }} as media_session_time_played, - {{ media_session_field('ev.media_session__time_played_muted') }} as media_session_time_played_muted, - {{ media_session_field('ev.media_session__time_paused') }} as media_session_time_paused, - {{ media_session_field('ev.media_session__content_watched') }} as media_session_content_watched, - {{ media_session_field('ev.media_session__time_buffering') }} as media_session_time_buffering, - {{ media_session_field('ev.media_session__time_spent_ads') }} as media_session_time_spent_ads, - {{ media_session_field('ev.media_session__ads') }} as media_session_ads, - {{ media_session_field('ev.media_session__ads_clicked') }} as media_session_ads_clicked, - {{ media_session_field('ev.media_session__ads_skipped') }} as media_session_ads_skipped, - {{ media_session_field('ev.media_session__ad_breaks') }} as media_session_ad_breaks, - {{ media_session_field('ev.media_session__avg_playback_rate') }} as media_session_avg_playback_rate, - - -- ad properties - {{ media_ad_field('ev.media_ad__name') }} as ad_name, - {{ media_ad_field('ev.media_ad__ad_id') }} as ad_id, - {{ media_ad_field('ev.media_ad__creative_id') }} as ad_creative_id, - {{ media_ad_field('ev.media_ad__pod_position') }} as ad_pod_position, - {{ media_ad_field('ev.media_ad__duration') }} as ad_duration_secs, - {{ media_ad_field('ev.media_ad__skippable') }} as ad_skippable, - - -- ad break properties - {{ media_ad_break_field('ev.media_ad_break__name') }} as ad_break_name, - {{ media_ad_break_field('ev.media_ad_break__break_id') }} as ad_break_id, - {{ media_ad_break_field('ev.media_ad_break__break_type') }} as ad_break_type, - - -- ad quartile event - {{ media_ad_quartile_event_field('ev.ad_quartile_event__percent_progress') }} as ad_percent_progress, - - -- combined media properties - {{ player_id_field(youtube_player_id='ev.youtube__player_id', media_player_id='ev.html5_media_element__html_id') }} as player_id, - {{ media_player_type_field(v2_player_type='ev.media_player_v2__player_type', youtube_player_id='youtube__player_id', media_player_id='ev.html5_media_element__html_id') }} as media_player_type, - {{ source_url_field(youtube_url='ev.youtube__url', media_current_src='ev.html5_media_element__current_src')}} as source_url, - {{ media_type_field(v2_media_type='ev.media_player_v2__media_type', media_media_type='ev.html5_media_element__media_type')}} as media_type, - {{ playback_quality_field( - v2_quality='ev.media_player_v2__quality', - youtube_quality='ev.youtube__playback_quality', - video_width='ev.html5_video_element__video_width', - video_height='ev.html5_video_element__video_height' - )}} as playback_quality, - - dense_rank() over (partition by ev.session_identifier order by ev.derived_tstamp) as event_in_session_index, - ev.derived_tstamp as start_tstamp - - from base_query ev - - where - {{ snowplow_media_player.event_name_filter(var("snowplow__media_event_names", "['media_player_event']")) }} - -) - -select - coalesce( - p.media_session_id, - {{ dbt_utils.generate_surrogate_key(['p.page_view_id', 'p.player_id', 'p.media_label', 'p.media_type', 'p.media_player_type']) }} - ) as play_id, - {{ dbt_utils.generate_surrogate_key(['p.player_id', 'p.media_label', 'p.media_type', 'p.media_player_type']) }} as media_identifier, - p.*, - coalesce(cast(round(piv.weight_rate * p.duration_secs / 100) as {{ type_int() }}), 0) as play_time_secs, - coalesce(cast(case when p.is_muted then round(piv.weight_rate * p.duration_secs / 100) end as {{ type_int() }}), 0) as play_time_muted_secs - - from prep p - - left join {{ ref("snowplow_media_player_pivot_base") }} piv - on p.percent_progress = piv.percent_progress diff --git a/models/base/scratch/default/sources.yml b/models/base/scratch/default/sources.yml deleted file mode 100644 index d1882be..0000000 --- a/models/base/scratch/default/sources.yml +++ /dev/null @@ -1,33 +0,0 @@ -version: 2 - -sources: - - name: atomic - schema: "{{ var('snowplow__atomic_schema', 'atomic') if project_name != 'snowplow_media_player_integration_tests' else target.schema~'_snplw_media_player_int_tests' }}" - database: "{{ var('snowplow__database', target.database) }}" - tables: - - name: com_snowplowanalytics_snowplow_web_page_1 - description: '{{ doc("table_page_view_context") }}' - - name: com_snowplowanalytics_mobile_screen_1 - description: '{{ doc("table_screen_context") }}' - - name: com_snowplowanalytics_snowplow_client_session_1 - description: '{{ doc("table_client_session_context") }}' - - name: com_snowplowanalytics_snowplow_media_player_event_1 - description: '{{ doc("table_media_player_event") }}' - - name: com_snowplowanalytics_snowplow_media_player_1 - description: '{{ doc("table_media_player_context") }}' - - name: com_snowplowanalytics_snowplow_media_player_2 - description: '{{ doc("table_media_player_context") }}' - - name: com_snowplowanalytics_snowplow_media_session_1 - description: '{{ doc("table_media_session_context") }}' - - name: com_snowplowanalytics_snowplow_media_ad_1 - description: '{{ doc("table_media_ad_context") }}' - - name: com_snowplowanalytics_snowplow_media_ad_break_1 - description: '{{ doc("table_media_ad_break_context") }}' - - name: com_snowplowanalytics_snowplow_media_ad_quartile_event_1 - description: '{{ doc("table_media_ad_quartile_event") }}' - - name: com_youtube_youtube_1 - description: '{{ doc("table_youtube_context") }}' - - name: org_whatwg_media_element_1 - description: '{{ doc("table_html_media_element_context") }}' - - name: org_whatwg_video_element_1 - description: '{{ doc("table_html_video_element_context") }}' diff --git a/models/base/scratch/snowflake/snowplow_media_player_base_events_this_run.sql b/models/base/scratch/snowflake/snowplow_media_player_base_events_this_run.sql deleted file mode 100644 index e1e3786..0000000 --- a/models/base/scratch/snowflake/snowplow_media_player_base_events_this_run.sql +++ /dev/null @@ -1,187 +0,0 @@ -{# -Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. -This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, -and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. -You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ -#} - -{{ - config( - tags=["this_run"], - sql_header=snowplow_utils.set_query_tag(var('snowplow__query_tag', 'snowplow_dbt')) - ) -}} - -{%- set lower_limit, upper_limit = snowplow_utils.return_limits_from_model(ref('snowplow_media_player_base_sessions_this_run'), - 'start_tstamp', - 'end_tstamp') %} - -{# Check for exceptions #} -{% if var("snowplow__enable_whatwg_media") is false and var("snowplow__enable_whatwg_video") %} - {{ exceptions.raise_compiler_error("variable: snowplow__enable_whatwg_video is enabled but variable: snowplow__enable_whatwg_media is not, both need to be enabled for modelling html5 video tracking data.") }} -{% elif not var("snowplow__enable_media_player_v1") and not var("snowplow__enable_media_player_v2") %} - {{ exceptions.raise_compiler_error("No media player context enabled. Please enable at least one media player context: snowplow__enable_media_player_v1 or snowplow__enable_media_player_v2") }} -{% elif not var("snowplow__enable_youtube") and not var("snowplow__enable_whatwg_media") and not var("snowplow__enable_media_player_v2") %} - {{ exceptions.raise_compiler_error("No media context enabled. Please enable as many of the following variables as required: snowplow__enable_media_player_v2, snowplow__enable_youtube, snowplow__enable_whatwg_media, snowplow__enable_whatwg_video") }} -{% endif %} - -{% set base_events_query = snowplow_utils.base_create_snowplow_events_this_run( - sessions_this_run_table='snowplow_media_player_base_sessions_this_run', - session_identifiers=session_identifiers(), - session_sql=var('snowplow__session_sql', none), - session_timestamp=var('snowplow__session_timestamp', 'collector_tstamp'), - derived_tstamp_partitioned=var('snowplow__derived_tstamp_partitioned', true), - days_late_allowed=var('snowplow__days_late_allowed', 3), - max_session_days=var('snowplow__max_session_days', 3), - app_ids=var('snowplow__app_id', []), - snowplow_events_database=var('snowplow__database', target.database) if target.type not in ['databricks', 'spark'] else var('snowplow__databricks_catalog', 'hive_metastore') if target.type in ['databricks'] else var('snowplow__atomic_schema', 'atomic'), - snowplow_events_schema=var('snowplow__atomic_schema', 'atomic'), - snowplow_events_table=var('snowplow__events_table', 'events'), - entities_or_sdes=contexts, - custom_sql=var('snowplow__custom_sql', '') -) %} - -with base_query as ( - {{ base_events_query }} -), - -prep as ( - - select - a.*, - a.derived_tstamp as start_tstamp, - - {{ web_or_mobile_field( - web={ 'field': 'id', 'col_prefix': 'contexts_com_snowplowanalytics_snowplow_web_page_1', 'dtype': 'varchar' }, - mobile={ 'field': 'id', 'col_prefix': 'contexts_com_snowplowanalytics_mobile_screen_1', 'dtype': 'varchar' } - ) }} as page_view_id, - {{ web_or_mobile_field( - web='a.domain_sessionid', - mobile={ 'field': 'sessionId', 'col_prefix': 'contexts_com_snowplowanalytics_snowplow_client_session_1', 'dtype': 'varchar' } - ) }} as original_session_identifier, - - -- unpacking the media player event - {{ media_player_field( - v1={ 'field': 'label', 'col_prefix': 'unstruct_event_com_snowplowanalytics_snowplow_media_player_event_1', 'dtype': 'varchar' }, - v2={ 'field': 'label', 'dtype': 'varchar' } - ) }} as media_label, - {{ media_event_type_field(media_player_event_type={ 'dtype': 'varchar' }, event_name='a.event_name') }} as event_type, - - -- unpacking the media player object - round({{ media_player_field( - v1={ 'field': 'duration', 'dtype': 'float' }, - v2={ 'field': 'duration', 'dtype': 'float' } - ) }}) as duration_secs, - {{ media_player_field( - v1={ 'field': 'currentTime', 'dtype': 'float' }, - v2={ 'field': 'currentTime', 'dtype': 'float' } - ) }} as player_current_time, - {{ media_player_field( - v1={ 'field': 'playbackRate', 'dtype': 'float' }, - v2={ 'field': 'playbackRate', 'dtype': 'float' }, - default='1.0' - ) }} as playback_rate, - {{ percent_progress_field( - v1_percent_progress={ 'field': 'percentProgress', 'dtype': 'varchar' }, - v1_event_type={ 'field': 'type', 'dtype': 'varchar' }, - event_name='a.event_name', - v2_current_time={ 'field': 'currentTime', 'dtype': 'float' }, - v2_duration={ 'field': 'duration', 'dtype': 'float' } - ) }} as percent_progress, - {{ media_player_field( - v1={ 'field': 'muted', 'dtype': 'boolean' }, - v2={ 'field': 'muted', 'dtype': 'boolean' } - ) }} as is_muted, - - -- media session properties - {{ media_session_field({ 'field': 'mediaSessionId', 'dtype': 'varchar' }) }} as media_session_id, - {{ media_session_field({ 'field': 'timePlayed', 'dtype': 'float' }) }} as media_session_time_played, - {{ media_session_field({ 'field': 'timePlayedMuted', 'dtype': 'float' }) }} as media_session_time_played_muted, - {{ media_session_field({ 'field': 'timePaused', 'dtype': 'float' }) }} as media_session_time_paused, - {{ media_session_field({ 'field': 'contentWatched', 'dtype': 'float' }) }} as media_session_content_watched, - {{ media_session_field({ 'field': 'timeBuffering', 'dtype': 'float' }) }} as media_session_time_buffering, - {{ media_session_field({ 'field': 'timeSpentAds', 'dtype': 'float' }) }} as media_session_time_spent_ads, - {{ media_session_field({ 'field': 'ads', 'dtype': 'integer' }) }} as media_session_ads, - {{ media_session_field({ 'field': 'adsClicked', 'dtype': 'integer' }) }} as media_session_ads_clicked, - {{ media_session_field({ 'field': 'adsSkipped', 'dtype': 'integer' }) }} as media_session_ads_skipped, - {{ media_session_field({ 'field': 'adBreaks', 'dtype': 'integer' }) }} as media_session_ad_breaks, - {{ media_session_field({ 'field': 'avgPlaybackRate', 'dtype': 'float' }) }} as media_session_avg_playback_rate, - - -- ad properties - {{ media_ad_field({ 'field': 'name', 'dtype': 'varchar' }) }} as ad_name, - {{ media_ad_field({ 'field': 'adId', 'dtype': 'varchar' }) }} as ad_id, - {{ media_ad_field({ 'field': 'creativeId', 'dtype': 'varchar' }) }} as ad_creative_id, - {{ media_ad_field({ 'field': 'podPosition', 'dtype': 'integer' }) }} as ad_pod_position, - {{ media_ad_field({ 'field': 'duration', 'dtype': 'float' }) }} as ad_duration_secs, - {{ media_ad_field({ 'field': 'skippable', 'dtype': 'boolean' }) }} as ad_skippable, - - -- ad break properties - {{ media_ad_break_field({ 'field': 'name', 'dtype': 'varchar' }) }} as ad_break_name, - {{ media_ad_break_field({ 'field': 'breakId', 'dtype': 'varchar' }) }} as ad_break_id, - {{ media_ad_break_field({ 'field': 'breakType', 'dtype': 'varchar' }) }} as ad_break_type, - - -- ad quartile event - {{ media_ad_quartile_event_field({ 'field': 'percentProgress', 'dtype': 'integer' }) }} as ad_percent_progress, - - -- combined media properties - {{ player_id_field( - youtube_player_id={ 'field': 'playerId', 'dtype': 'varchar' }, - media_player_id={ 'field': 'htmlId', 'dtype': 'varchar' } - ) }} as player_id, - {{ media_player_type_field( - v2_player_type={ 'field': 'playerType', 'dtype': 'varchar' }, - youtube_player_id={ 'field': 'playerId', 'dtype': 'varchar' }, - media_player_id={ 'field': 'htmlId', 'dtype': 'varchar' } - ) }} as media_player_type, - {{ source_url_field( - youtube_url={ 'field': 'url', 'dtype': 'varchar' }, - media_current_src={ 'field': 'currentSrc', 'dtype': 'varchar' } - ) }} as source_url, - {{ media_type_field( - v2_media_type={ 'field': 'mediaType', 'dtype': 'varchar' }, - media_media_type={ 'field': 'mediaType', 'dtype': 'varchar' } - ) }} as media_type, - {{ playback_quality_field( - v2_quality={ 'field': 'quality', 'dtype': 'varchar' }, - youtube_quality={ 'field': 'playbackQuality', 'dtype': 'varchar' }, - video_width={ 'field': 'videoWidth', 'dtype': 'integer' }, - video_height={ 'field': 'videoHeight', 'dtype': 'integer' } - )}} as playback_quality - - from base_query as a - - where - {{ snowplow_media_player.event_name_filter(var("snowplow__media_event_names", "['media_player_event']")) }} - -) - -select - coalesce( - p.media_session_id, - {{ dbt_utils.generate_surrogate_key(['p.page_view_id', 'p.player_id', 'p.media_label', 'p.media_type', 'p.media_player_type']) }} - ) as play_id, - {{ dbt_utils.generate_surrogate_key(['p.player_id', 'p.media_label', 'p.media_type', 'p.media_player_type']) }} as media_identifier, - p.* exclude (percent_progress), - - cast(p.percent_progress as integer) as percent_progress, - - coalesce( - cast(piv.weight_rate * p.duration_secs / 100 as {{ type_int() }}), - 0 - ) as play_time_secs, - coalesce( - cast( - case - when p.is_muted = true then piv.weight_rate * p.duration_secs / 100 - else 0 - end as {{ type_int() }} - ), - 0 - ) as play_time_muted_secs, - - dense_rank() over (partition by session_identifier order by start_tstamp) AS event_in_session_index - - from prep as p - - left join {{ ref("snowplow_media_player_pivot_base") }} piv - on p.percent_progress = piv.percent_progress diff --git a/models/base/scratch/snowplow_media_player_base_events_this_run.sql b/models/base/scratch/snowplow_media_player_base_events_this_run.sql new file mode 100644 index 0000000..c3a66f1 --- /dev/null +++ b/models/base/scratch/snowplow_media_player_base_events_this_run.sql @@ -0,0 +1,295 @@ +{# +Copyright (c) 2022-present Snowplow Analytics Ltd. All rights reserved. +This program is licensed to you under the Snowplow Personal and Academic License Version 1.0, +and you may not use this file except in compliance with the Snowplow Personal and Academic License Version 1.0. +You may obtain a copy of the Snowplow Personal and Academic License Version 1.0 at https://docs.snowplow.io/personal-and-academic-license-1.0/ +#} + +{{ + config( + sort='collector_tstamp', + dist='event_id', + tags=["this_run"] + ) +}} + +{# Check for exceptions #} +{% if var("snowplow__enable_whatwg_media") is false and var("snowplow__enable_whatwg_video") %} + {{ exceptions.raise_compiler_error("variable: snowplow__enable_whatwg_video is enabled but variable: snowplow__enable_whatwg_media is not, both need to be enabled for modelling html5 video tracking data.") }} +{% elif not var("snowplow__enable_media_player_v1") and not var("snowplow__enable_media_player_v2") %} + {{ exceptions.raise_compiler_error("No media player context enabled. Please enable at least one media player context: snowplow__enable_media_player_v1 or snowplow__enable_media_player_v2") }} +{% elif not var("snowplow__enable_youtube") and not var("snowplow__enable_whatwg_media") and not var("snowplow__enable_media_player_v2") %} + {{ exceptions.raise_compiler_error("No media context enabled. Please enable as many of the following variables as required: snowplow__enable_media_player_v2, snowplow__enable_youtube, snowplow__enable_whatwg_media, snowplow__enable_whatwg_video") }} +{% endif %} + +{# Setting sdes or contexts for Postgres / Redshift. dbt passes variables by reference so need to use copy to avoid altering the list multiple times #} +{% set contexts = var('snowplow__entities_or_sdes', []).copy() %} + +{% if var("snowplow__enable_mobile_events") %} + {% do contexts.append({'schema': var('snowplow__context_screen'), 'prefix': 'mobile_screen_', 'single_entity': True}) %} + {% do contexts.append({'schema': var('snowplow__context_mobile_session'), 'prefix': 'mobile_session_', 'single_entity': True}) %} +{% endif %} + +{% if var("snowplow__enable_media_player_v1") %} + {% do contexts.append({'schema': var('snowplow__media_player_event_context'), 'prefix': 'media_player_event_', 'single_entity': True}) %} + {% do contexts.append({'schema': var('snowplow__media_player_context'), 'prefix': 'media_player_v1_', 'single_entity': True}) %} +{% endif %} + +{% if var("snowplow__enable_media_player_v2") %} + {% do contexts.append({'schema': var('snowplow__media_player_v2_context'), 'prefix': 'media_player_v2_', 'single_entity': True}) %} +{% endif %} + +{% if var("snowplow__enable_media_session") %} + {% do contexts.append({'schema': var('snowplow__media_session_context'), 'prefix': 'media_session_', 'single_entity': True}) %} +{% endif %} + +{% if var("snowplow__enable_media_ad") %} + {% do contexts.append({'schema': var('snowplow__media_ad_context'), 'prefix': 'media_ad_', 'single_entity': True}) %} +{% endif %} + +{% if var("snowplow__enable_media_ad_break") %} + {% do contexts.append({'schema': var('snowplow__media_ad_break_context'), 'prefix': 'media_ad_break_', 'single_entity': True}) %} +{% endif %} + +{%- if var("snowplow__enable_youtube") -%} + {% do contexts.append({'schema': var('snowplow__youtube_context'), 'prefix': 'youtube_', 'single_entity': True}) %} +{%- endif %} + +{% if var("snowplow__enable_whatwg_media") -%} + {% do contexts.append({'schema': var('snowplow__html5_media_element_context'), 'prefix': 'html5_media_element_', 'single_entity': True}) %} +{%- endif %} + +{% if var("snowplow__enable_whatwg_video") -%} + {% do contexts.append({'schema': var('snowplow__html5_video_element_context'), 'prefix': 'html5_video_element_', 'single_entity': True}) %} +{%- endif %} + +{% if var("snowplow__enable_web_events") %} + {% do contexts.append({'schema': var('snowplow__context_web_page'), 'prefix': 'web_page_', 'single_entity': True}) %} +{% endif %} + +{% if var("snowplow__enable_ad_quartile_event") %} + {% do contexts.append({'schema': var('snowplow__media_ad_quartile_event'), 'prefix': 'ad_quartile_event_', 'single_entity': True}) %} +{% endif %} + +{% set base_events_query = snowplow_utils.base_create_snowplow_events_this_run( + sessions_this_run_table='snowplow_media_player_base_sessions_this_run', + session_identifiers=session_identifiers(), + session_sql=var('snowplow__session_sql', none), + session_timestamp=var('snowplow__session_timestamp', 'collector_tstamp'), + derived_tstamp_partitioned=var('snowplow__derived_tstamp_partitioned', true), + days_late_allowed=var('snowplow__days_late_allowed', 3), + max_session_days=var('snowplow__max_session_days', 3), + app_ids=var('snowplow__app_id', []), + snowplow_events_database=var('snowplow__database', target.database) if target.type not in ['databricks', 'spark'] else var('snowplow__databricks_catalog', 'hive_metastore') if target.type in ['databricks'] else var('snowplow__atomic_schema', 'atomic'), + snowplow_events_schema=var('snowplow__atomic_schema', 'atomic'), + snowplow_events_table=var('snowplow__events_table', 'events'), + entities_or_sdes=contexts, + custom_sql=var('snowplow__custom_sql', '') +) %} + +with base_query as ( + {{ base_events_query }} +) + +, prep as ( + select + ev.* + + {{ get_context_fields( + enabled=var('snowplow__enable_web_events', false), + context='contexts_com_snowplowanalytics_snowplow_web_page_1', + prefix='web_page_', + fields=[ + {'field':'id', 'dtype':'string'}, + ]) }} + + {{ get_context_fields( + enabled=var('snowplow__enable_mobile_events', false), + context='contexts_com_snowplowanalytics_mobile_screen_1', + prefix='mobile_screen_', + fields=[ + {'field':'id', 'dtype':'string'}, + ]) }} + + {{ get_context_fields( + enabled=var('snowplow__enable_mobile_events', false), + context='contexts_com_snowplowanalytics_snowplow_client_session_1', + prefix='mobile_session_', + fields=[ + {'field':'session_id', 'dtype':'string'}, + ]) }} + + {{ get_context_fields( + enabled=var('snowplow__enable_media_session', false), + context='contexts_com_snowplowanalytics_snowplow_media_session_1', + prefix='media_session_', + fields=[ + {'field':'mediaSessionId', 'dtype': 'string'}, + {'field':'timePlayed', 'dtype': 'number'}, + {'field':'timePlayedMuted', 'dtype': 'number'}, + {'field':'timePaused', 'dtype': 'number'}, + {'field':'contentWatched', 'dtype': 'number'}, + {'field':'timeBuffering', 'dtype': 'number'}, + {'field':'timeSpentAds', 'dtype': 'number'}, + {'field':'ads', 'dtype': 'integer'}, + {'field':'adsClicked', 'dtype': 'integer'}, + {'field':'adsSkipped', 'dtype': 'integer'}, + {'field':'adBreaks', 'dtype': 'integer'}, + {'field':'avgPlaybackRate', 'dtype': 'number'}, + ]) }} + + {{ get_context_fields( + enabled=var('snowplow__enable_media_ad', false), + context='contexts_com_snowplowanalytics_snowplow_media_ad_1', + prefix='media_ad_', + fields=[ + {'field':'name', 'dtype':'string'}, + {'field':'adId', 'dtype':'string'}, + {'field':'creativeId', 'dtype':'string'}, + {'field':'podPosition', 'dtype':'integer'}, + {'field':'duration', 'dtype':'integer'}, + {'field':'skippable', 'dtype':'boolean'}, + ]) }} + + {{ get_context_fields( + enabled=var('snowplow__enable_media_ad_break', false), + context='contexts_com_snowplowanalytics_snowplow_media_ad_break_1', + prefix='media_ad_break_', + fields=[ + {'field':'name', 'dtype':'string'}, + {'field':'breakId', 'dtype':'string'}, + {'field':'breakType', 'dtype':'string'}, + ]) }} + + {{ get_context_fields( + enabled=var('snowplow__enable_ad_quartile_event', false), + context='unstruct_event_com_snowplowanalytics_snowplow_media_ad_quartile_event_1', + prefix='ad_quartile_event_', + fields=[ + {'field':'percentProgress', 'dtype':'integer'} + ]) }} + + {{ get_context_fields( + enabled=var('snowplow__enable_media_player_v1', false), + context='contexts_com_snowplowanalytics_snowplow_media_player_1', + prefix='media_player_v1_', + fields=[ + {'field':'duration', 'dtype':'float'}, + {'field':'currentTime', 'dtype':'float'}, + {'field':'playbackRate', 'dtype':'number'}, + {'field':'muted', 'dtype':'boolean'}, + {'field':'percentProgress', 'dtype':'integer'}, + ]) }} + + {{ get_context_fields( + enabled=var('snowplow__enable_media_player_v2', false), + context='contexts_com_snowplowanalytics_snowplow_media_player_2', + prefix='media_player_v2_', + fields=[ + {'field':'duration', 'dtype':'float'}, + {'field':'currentTime', 'dtype':'float'}, + {'field':'playbackRate', 'dtype':'number'}, + {'field':'muted', 'dtype':'boolean'}, + {'field':'label', 'dtype':'string'}, + {'field':'playerType', 'dtype':'string'}, + {'field':'mediaType', 'dtype':'string'}, + {'field':'quality', 'dtype':'string'}, + ]) }} + + {{ get_context_fields( + enabled=var('snowplow__enable_media_player_v1', false), + context='unstruct_event_com_snowplowanalytics_snowplow_media_player_event_1', + prefix='media_player_event_', + fields=[ + {'field':'label', 'dtype':'string'}, + {'field':'type', 'dtype':'string'}, + ]) }} + + {{ get_context_fields( + enabled=var('snowplow__enable_youtube', false), + context='contexts_com_youtube_youtube_1', + prefix='youtube_', + fields=[ + {'field':'playerId', 'dtype':'string'}, + {'field':'url', 'dtype':'string'}, + {'field':'playbackQuality', 'dtype':'string'}, + ]) }} + + {{ get_context_fields( + enabled=var('snowplow__enable_whatwg_media', false), + context='contexts_org_whatwg_media_element_1', + prefix='html5_media_element_', + fields=[ + {'field':'htmlId', 'dtype':'string'}, + {'field':'currentSrc', 'dtype':'string'}, + {'field':'mediaType', 'dtype':'string'}, + ]) }} + + {{ get_context_fields( + enabled=var('snowplow__enable_whatwg_video', false), + context='contexts_org_whatwg_video_element_1', + prefix='html5_video_element_', + fields=[ + {'field':'videoWidth', 'dtype':'integer'}, + {'field':'videoHeight', 'dtype':'integer'}, + ]) }} + + from base_query ev + + where + {{ snowplow_media_player.event_name_filter(var("snowplow__media_event_names", "['media_player_event']")) }} +) + +, combined_fields as ( + select + p.* + -- combined web and mobile properties + , coalesce(p.web_page__id, p.mobile_screen__id) as page_view_id + , coalesce(p.domain_sessionid, p.mobile_session__session_id) as original_session_identifier + + --combined media properties + , coalesce(p.media_player_v2__label, p.media_player_event__label) as media_label + , round(coalesce(p.media_player_v2__duration, p.media_player_v1__duration)) as duration_secs + , coalesce(p.media_player_v2__current_time, p.media_player_v1__current_time) as player_current_time + , coalesce(p.media_player_v2__playback_rate, p.media_player_v1__playback_rate, 1.0) as playback_rate + , coalesce(p.media_player_v2__muted, p.media_player_v1__muted) as is_muted + , cast({{ percent_progress_field() }} as {{ type_int() }}) as percent_progress + , coalesce(p.youtube__player_id, p.html5_media_element__html_id) as player_id + , {{ media_player_type_field() }} as media_player_type + , coalesce(p.youtube__url, p.html5_media_element__current_src) as source_url + , {{ media_type_field() }} as media_type + , {{ playback_quality_field() }} as playback_quality + , {{ media_event_type_field() }} as event_type + + from prep p +) + +select + coalesce( + cf.media_session__media_session_id, + {{ dbt_utils.generate_surrogate_key(['cf.page_view_id', 'cf.player_id', 'cf.media_label', 'cf.media_type', 'cf.media_player_type']) }} + ) as play_id + , {{ dbt_utils.generate_surrogate_key(['cf.player_id', 'cf.media_label', 'cf.media_type', 'cf.media_player_type']) }} as media_identifier + , cf.* + , coalesce( + cast(round(piv.weight_rate * cf.duration_secs / 100) as {{ type_int() }}), + 0 + ) as play_time_secs + , coalesce( + cast( + round( + case + when cf.is_muted = true then piv.weight_rate * cf.duration_secs / 100 + else 0 + end + ) as {{ type_int() }} + ), + 0 + ) as play_time_muted_secs + , cf.derived_tstamp as start_tstamp + , dense_rank() over (partition by cf.session_identifier order by cf.derived_tstamp) AS event_in_session_index + +from combined_fields as cf + +left join {{ ref("snowplow_media_player_pivot_base") }} piv + on cf.percent_progress = piv.percent_progress diff --git a/models/media_ad_views/scratch/snowplow_media_player_media_ad_views_this_run.sql b/models/media_ad_views/scratch/snowplow_media_player_media_ad_views_this_run.sql index 52d4acb..d5aa81b 100644 --- a/models/media_ad_views/scratch/snowplow_media_player_media_ad_views_this_run.sql +++ b/models/media_ad_views/scratch/snowplow_media_player_media_ad_views_this_run.sql @@ -25,15 +25,15 @@ with events_this_run as ( - select * from {{ ref('snowplow_media_player_base_events_this_run') }} - where ad_id is not null and media_identifier is not null + select * from {{ ref('snowplow_media_player_base_events_this_run') }} + where media_ad__ad_id is not null and media_identifier is not null ) , prep as ( select - {{ dbt_utils.generate_surrogate_key(['ev.platform', 'ev.media_identifier', 'ev.ad_id']) }} as media_ad_id + {{ dbt_utils.generate_surrogate_key(['ev.platform', 'ev.media_identifier', 'ev.media_ad__ad_id']) }} as media_ad_id ,ev.platform ,ev.media_identifier @@ -41,8 +41,8 @@ events_this_run as ( ,ev.session_identifier ,ev.user_id ,ev.play_id - ,{{ media_ad_break_field('ev.ad_break_id') }} as ad_break_id - ,{{ media_ad_field('ev.ad_id') }} as ad_id + ,ev.media_ad_break__break_id as ad_break_id + ,ev.media_ad__ad_id as ad_id {%- if var('snowplow__ad_views_passthroughs', []) -%} {%- set passthrough_names = [] -%} @@ -59,20 +59,20 @@ events_this_run as ( {%- endif %} ,max(ev.media_label) as media_label - ,{{ media_ad_break_field('max(ev.ad_break_name)' ) }} as ad_break_name - ,{{ media_ad_break_field('max(ev.ad_break_type)' ) }} as ad_break_type + ,max(ev.media_ad_break__name) as ad_break_name + ,max(ev.media_ad_break__break_type) as ad_break_type - ,{{ media_ad_field('max(ev.ad_name)') }} as name - ,{{ media_ad_field('max(ev.ad_creative_id)') }} as creative_id - ,{{ media_ad_field('max(ev.ad_duration_secs)') }} as duration_secs - ,{{ media_ad_field('avg(ev.ad_pod_position)') }} as pod_position - ,{{ media_ad_field('sum(case when ev.ad_skippable then 1 else 0 end) > 0') }} as skippable + ,max(ev.media_ad__name) as name + ,max(ev.media_ad__creative_id) as creative_id + ,max(ev.media_ad__duration) as duration_secs + ,avg(ev.media_ad__pod_position) as pod_position + ,sum(case when ev.media_ad__skippable then 1 else 0 end) > 0 as skippable ,max(case when ev.event_type = 'adclick' then 1 else 0 end) > 0 as clicked ,max(case when ev.event_type = 'adskip' then 1 else 0 end) > 0 as skipped - ,{{ media_ad_quartile_event_field("max(case when ev.event_type = 'adcomplete' or (ev.event_type = 'adquartile' and ev.ad_percent_progress >= 25) then 1 else 0 end) > 0") }} as percent_reached_25 - ,{{ media_ad_quartile_event_field("max(case when ev.event_type = 'adcomplete' or (ev.event_type = 'adquartile' and ev.ad_percent_progress >= 50) then 1 else 0 end) > 0") }} as percent_reached_50 - ,{{ media_ad_quartile_event_field("max(case when ev.event_type = 'adcomplete' or (ev.event_type = 'adquartile' and ev.ad_percent_progress >= 75) then 1 else 0 end) > 0") }} as percent_reached_75 + ,max(case when ev.event_type = 'adcomplete' or (ev.event_type = 'adquartile' and ev.ad_quartile_event__percent_progress >= 25) then 1 else 0 end) > 0 as percent_reached_25 + ,max(case when ev.event_type = 'adcomplete' or (ev.event_type = 'adquartile' and ev.ad_quartile_event__percent_progress >= 50) then 1 else 0 end) > 0 as percent_reached_50 + ,max(case when ev.event_type = 'adcomplete' or (ev.event_type = 'adquartile' and ev.ad_quartile_event__percent_progress >= 75) then 1 else 0 end) > 0 as percent_reached_75 ,max(case when ev.event_type = 'adcomplete' then 1 else 0 end) > 0 as percent_reached_100 ,min(ev.start_tstamp) as viewed_at diff --git a/models/media_ads/snowplow_media_player_media_ads.sql b/models/media_ads/snowplow_media_player_media_ads.sql index 054b4da..bf05185 100644 --- a/models/media_ads/snowplow_media_player_media_ads.sql +++ b/models/media_ads/snowplow_media_player_media_ads.sql @@ -55,36 +55,36 @@ new_media_ad_views as ( a.media_identifier, max(a.media_label) as media_label, - {{ media_ad_field('a.ad_id') }} as ad_id, - {{ media_ad_field('max(a.name)') }} as name, - {{ media_ad_field('max(a.creative_id)') }} as creative_id, - {{ media_ad_field('max(a.duration_secs)') }} as duration_secs, - {{ media_ad_field('sum(case when a.skippable then 1 else 0 end) > 0') }} as skippable, - {{ media_ad_field('avg(a.pod_position)') }} as pod_position, + a.ad_id as ad_id, + max(a.name) as name, + max(a.creative_id) as creative_id, + max(a.duration_secs) as duration_secs, + sum(case when a.skippable then 1 else 0 end) > 0 as skippable, + avg(a.pod_position) as pod_position, count(*) as views, sum(case when a.clicked then 1 else 0 end) as clicked, sum(case when a.skipped then 1 else 0 end) as skipped, - {{ media_ad_quartile_event_field('sum(case when a.percent_reached_25 then 1 else 0 end)') }} as percent_reached_25, - {{ media_ad_quartile_event_field('sum(case when a.percent_reached_50 then 1 else 0 end)') }} as percent_reached_50, - {{ media_ad_quartile_event_field('sum(case when a.percent_reached_75 then 1 else 0 end)') }} as percent_reached_75, + sum(case when a.percent_reached_25 then 1 else 0 end) as percent_reached_25, + sum(case when a.percent_reached_50 then 1 else 0 end) as percent_reached_50, + sum(case when a.percent_reached_75 then 1 else 0 end) as percent_reached_75, sum(case when a.percent_reached_100 then 1 else 0 end) as percent_reached_100, {% if is_incremental() %} 0 as views_unique, 0 as clicked_unique, 0 as skipped_unique, - {{ media_ad_quartile_event_field('0') }} as percent_reached_25_unique, - {{ media_ad_quartile_event_field('0') }} as percent_reached_50_unique, - {{ media_ad_quartile_event_field('0') }} as percent_reached_75_unique, + 0 as percent_reached_25_unique, + 0 as percent_reached_50_unique, + 0 as percent_reached_75_unique, 0 as percent_reached_100_unique, {% else %} count(distinct a.user_identifier) as views_unique, count(distinct case when a.clicked then a.user_identifier end) as clicked_unique, count(distinct case when a.skipped then a.user_identifier end) as skipped_unique, - {{ media_ad_quartile_event_field('count(distinct case when a.percent_reached_25 then user_identifier end)') }} as percent_reached_25_unique, - {{ media_ad_quartile_event_field('count(distinct case when a.percent_reached_50 then user_identifier end)') }} as percent_reached_50_unique, - {{ media_ad_quartile_event_field('count(distinct case when a.percent_reached_75 then user_identifier end)') }} as percent_reached_75_unique, + count(distinct case when a.percent_reached_25 then user_identifier end) as percent_reached_25_unique, + count(distinct case when a.percent_reached_50 then user_identifier end) as percent_reached_50_unique, + count(distinct case when a.percent_reached_75 then user_identifier end) as percent_reached_75_unique, count(distinct case when a.percent_reached_100 then user_identifier end) as percent_reached_100_unique, {% endif %} @@ -106,9 +106,9 @@ new_media_ad_views as ( count(distinct a.user_identifier) as views_unique, count(distinct case when a.clicked then a.user_identifier end) as clicked_unique, count(distinct case when a.skipped then a.user_identifier end) as skipped_unique, - {{ media_ad_quartile_event_field('count(distinct case when a.percent_reached_25 then user_identifier end)') }} as percent_reached_25_unique, - {{ media_ad_quartile_event_field('count(distinct case when a.percent_reached_50 then user_identifier end)') }} as percent_reached_50_unique, - {{ media_ad_quartile_event_field('count(distinct case when a.percent_reached_75 then user_identifier end)') }} as percent_reached_75_unique, + count(distinct case when a.percent_reached_25 then user_identifier end) as percent_reached_25_unique, + count(distinct case when a.percent_reached_50 then user_identifier end) as percent_reached_50_unique, + count(distinct case when a.percent_reached_75 then user_identifier end) as percent_reached_75_unique, count(distinct case when a.percent_reached_100 then user_identifier end) as percent_reached_100_unique from {{ ref("snowplow_media_player_media_ad_views") }} a @@ -129,7 +129,7 @@ new_media_ad_views as ( select * from new_data union all select * {% if target.type in ['databricks', 'spark'] %}except(first_view_date){% endif %} - from {{ this }} + from {{ this }} ) @@ -142,27 +142,27 @@ new_media_ad_views as ( a.media_identifier, max(a.media_label) as media_label, - {{ media_ad_field('a.ad_id') }} as ad_id, - {{ media_ad_field('max(a.name)') }} as name, - {{ media_ad_field('max(a.creative_id)') }} as creative_id, - {{ media_ad_field('max(a.duration_secs)') }} as duration_secs, - {{ media_ad_field('sum(case when a.skippable then 1 else 0 end) > 0') }} as skippable, - {{ media_ad_field('sum(a.pod_position * a.views) / sum(a.views)') }} as pod_position, + a.ad_id as ad_id, + max(a.name) as name, + max(a.creative_id) as creative_id, + max(a.duration_secs) as duration_secs, + sum(case when a.skippable then 1 else 0 end) > 0 as skippable, + sum(a.pod_position * a.views) / sum(a.views) as pod_position, sum(a.views) as views, sum(a.clicked) as clicked, sum(a.skipped) as skipped, - {{ media_ad_quartile_event_field('sum(a.percent_reached_25)') }} as percent_reached_25, - {{ media_ad_quartile_event_field('sum(a.percent_reached_50)') }} as percent_reached_50, - {{ media_ad_quartile_event_field('sum(a.percent_reached_75)') }} as percent_reached_75, + sum(a.percent_reached_25) as percent_reached_25, + sum(a.percent_reached_50) as percent_reached_50, + sum(a.percent_reached_75) as percent_reached_75, sum(a.percent_reached_100) as percent_reached_100, sum(a.views_unique) as views_unique, sum(a.clicked_unique) as clicked_unique, sum(a.skipped_unique) as skipped_unique, - {{ media_ad_quartile_event_field('sum(a.percent_reached_25_unique)') }} as percent_reached_25_unique, - {{ media_ad_quartile_event_field('sum(a.percent_reached_50_unique)') }} as percent_reached_50_unique, - {{ media_ad_quartile_event_field('sum(a.percent_reached_75_unique)') }} as percent_reached_75_unique, + sum(a.percent_reached_25_unique) as percent_reached_25_unique, + sum(a.percent_reached_50_unique) as percent_reached_50_unique, + sum(a.percent_reached_75_unique) as percent_reached_75_unique, sum(a.percent_reached_100_unique) as percent_reached_100_unique, min(a.first_view) as first_view, @@ -193,17 +193,17 @@ new_media_ad_views as ( a.views, a.clicked, a.skipped, - {{ media_ad_quartile_event_field('a.percent_reached_25') }} as percent_reached_25, - {{ media_ad_quartile_event_field('a.percent_reached_50') }} as percent_reached_50, - {{ media_ad_quartile_event_field('a.percent_reached_75') }} as percent_reached_75, + a.percent_reached_25 as percent_reached_25, + a.percent_reached_50 as percent_reached_50, + a.percent_reached_75 as percent_reached_75, a.percent_reached_100, coalesce(b.views_unique, a.views_unique) as views_unique, coalesce(b.clicked_unique, a.clicked_unique) as clicked_unique, coalesce(b.skipped_unique, a.skipped_unique) as skipped_unique, - {{ media_ad_quartile_event_field('coalesce(b.percent_reached_25_unique, a.percent_reached_25_unique)') }} as percent_reached_25_unique, - {{ media_ad_quartile_event_field('coalesce(b.percent_reached_50_unique, a.percent_reached_50_unique)') }} as percent_reached_50_unique, - {{ media_ad_quartile_event_field('coalesce(b.percent_reached_75_unique, a.percent_reached_75_unique)') }} as percent_reached_75_unique, + coalesce(b.percent_reached_25_unique, a.percent_reached_25_unique) as percent_reached_25_unique, + coalesce(b.percent_reached_50_unique, a.percent_reached_50_unique) as percent_reached_50_unique, + coalesce(b.percent_reached_75_unique, a.percent_reached_75_unique) as percent_reached_75_unique, coalesce(b.percent_reached_100_unique, a.percent_reached_100_unique) as percent_reached_100_unique, a.first_view, diff --git a/models/media_base/scratch/snowplow_media_player_base_this_run.sql b/models/media_base/scratch/snowplow_media_player_base_this_run.sql index 5d63b9d..bc8e485 100644 --- a/models/media_base/scratch/snowplow_media_player_base_this_run.sql +++ b/models/media_base/scratch/snowplow_media_player_base_this_run.sql @@ -26,7 +26,7 @@ events_this_run as ( select *, row_number() - over (partition by media_session_id order by start_tstamp desc) as media_session_index + over (partition by media_session__media_session_id order by start_tstamp desc) as media_session_index from {{ ref('snowplow_media_player_base_events_this_run') }} ) @@ -108,18 +108,18 @@ events_this_run as ( , media_sessions as ( select - media_session_id, - media_session_time_played, - media_session_time_played_muted, - media_session_time_paused, - media_session_content_watched, - media_session_time_buffering, - media_session_time_spent_ads, - media_session_ads, - media_session_ads_clicked, - media_session_ads_skipped, - media_session_ad_breaks, - media_session_avg_playback_rate + media_session__media_session_id as media_session_id, + media_session__time_played as media_session_time_played, + media_session__time_played_muted as media_session_time_played_muted, + media_session__time_paused as media_session_time_paused, + media_session__content_watched as media_session_content_watched, + media_session__time_buffering as media_session_time_buffering, + media_session__time_spent_ads as media_session_time_spent_ads, + media_session__ads as media_session_ads, + media_session__ads_clicked as media_session_ads_clicked, + media_session__ads_skipped as media_session_ads_skipped, + media_session__ad_breaks as media_session_ad_breaks, + media_session__avg_playback_rate as media_session_avg_playback_rate from events_this_run where media_session_index = 1 @@ -173,7 +173,7 @@ events_this_run as ( ) {% set play_time_secs -%} - coalesce({{ media_session_field('s.media_session_time_played') }}, d.play_time_secs) + coalesce(s.media_session_time_played, d.play_time_secs) {%- endset %} select @@ -205,23 +205,23 @@ select d.start_tstamp, d.end_tstamp, coalesce( - {{ media_session_field('s.media_session_avg_playback_rate') }}, + s.media_session_avg_playback_rate, cast(d.avg_playback_rate as {{ type_float() }}) ) as avg_playback_rate, -- time spent {{ play_time_secs }} as play_time_secs, - coalesce({{ media_session_field('s.media_session_time_played_muted') }}, d.play_time_muted_secs) as play_time_muted_secs, - {{ media_session_field('s.media_session_time_paused') }} as paused_time_secs, - {{ media_session_field('s.media_session_time_buffering') }} as buffering_time_secs, - {{ media_session_field('s.media_session_time_spent_ads') }} as ads_time_secs, + coalesce(s.media_session_time_played_muted, d.play_time_muted_secs) as play_time_muted_secs, + s.media_session_time_paused as paused_time_secs, + s.media_session_time_buffering as buffering_time_secs, + s.media_session_time_spent_ads as ads_time_secs, -- event counts d.seeks, - {{ media_session_field('s.media_session_ads') }} as ads, - {{ media_session_field('s.media_session_ads_clicked') }} as ads_clicked, - {{ media_session_field('s.media_session_ads_skipped') }} as ads_skipped, - {{ media_session_field('s.media_session_ad_breaks') }} as ad_breaks, + s.media_session_ads as ads, + s.media_session_ads_clicked as ads_clicked, + s.media_session_ads_skipped as ads_skipped, + s.media_session_ad_breaks as ad_breaks, -- playback progress d.plays > 0 as is_played, @@ -231,7 +231,7 @@ select end as is_valid_play, case when - coalesce({{ media_session_field('s.media_session_content_watched') }}, d.play_time_secs) / nullif(f.duration_secs, 0) + coalesce(s.media_session_content_watched, d.play_time_secs) / nullif(f.duration_secs, 0) >= {{ var("snowplow__complete_play_rate") }} then true else false @@ -243,11 +243,11 @@ select -- to correct incorrect result due to duplicate session_identifier (one removed) end, 0) as {{ type_float() }}) as retention_rate, p.percent_progress_reached, - {{ media_session_field('s.media_session_content_watched') }} as content_watched_secs, + s.media_session_content_watched as content_watched_secs, case - when d.duration_secs is not null and {{ media_session_field('s.media_session_content_watched') }} is not null and d.duration_secs > 0 + when d.duration_secs is not null and s.media_session_content_watched is not null and d.duration_secs > 0 then least( - {{ media_session_field('s.media_session_content_watched') }} / d.duration_secs, + s.media_session_content_watched / d.duration_secs, 1.0 ) end as content_watched_percent diff --git a/models/media_stats/snowplow_media_player_media_stats.sql b/models/media_stats/snowplow_media_player_media_stats.sql index f3244e1..818636e 100644 --- a/models/media_stats/snowplow_media_player_media_stats.sql +++ b/models/media_stats/snowplow_media_player_media_stats.sql @@ -43,10 +43,10 @@ with new_data as ( sum(case when is_valid_play then 1 else 0 end) as valid_plays, sum(case when p.is_complete_play then 1 else 0 end) as complete_plays, count(distinct p.page_view_id) as impressions, - avg(case when is_played then coalesce({{ media_session_field('p.content_watched_secs') }}, p.play_time_secs, 0) / nullif(p.duration_secs, 0) end) as avg_percent_played, + avg(case when is_played then coalesce(p.content_watched_secs, p.play_time_secs, 0) / nullif(p.duration_secs, 0) end) as avg_percent_played, avg(case when is_played then p.retention_rate end) as avg_retention_rate, avg(case when is_played then p.avg_playback_rate end) as avg_playback_rate, - {{ media_session_field('avg(case when is_played then p.content_watched_secs end)') }} as avg_content_watched_sec, + avg(case when is_played then p.content_watched_secs end) as avg_content_watched_sec, max(start_tstamp) as last_base_tstamp from {{ ref("snowplow_media_player_base") }} p @@ -91,7 +91,7 @@ group by 1,2,3,5,6 (n.avg_percent_played * n.plays / nullif((n.plays + coalesce(t.plays, 0)),0)) + (coalesce(t.avg_percent_played, 0) * coalesce(t.plays, 0) / nullif((n.plays + coalesce(t.plays, 0)), 0)) as avg_percent_played, (n.avg_retention_rate * n.plays / nullif((n.plays + coalesce(t.plays, 0)), 0)) + (coalesce(t.avg_retention_rate, 0) * coalesce(t.plays, 0) / nullif((n.plays + coalesce(t.plays, 0)), 0)) as avg_retention_rate, (n.avg_playback_rate * n.plays / nullif((n.plays + coalesce(t.plays, 0)), 0)) + (coalesce(t.avg_playback_rate, 0) * coalesce(t.plays, 0) / nullif((n.plays + coalesce(t.plays, 0)), 0)) as avg_playback_rate, - cast({{ media_session_field('(coalesce(n.avg_content_watched_sec, 0.0) / cast(60 as ' + type_float() + ') * n.plays + coalesce(t.avg_content_watched_mins, 0.0) * coalesce(t.plays, 0.0)) / nullif((n.plays + coalesce(t.plays, 0.0)), 0.0)') }} as {{ type_float() }}) as avg_content_watched_mins + cast((coalesce(n.avg_content_watched_sec, 0.0) / cast(60 as {{ type_float() }}) * n.plays + coalesce(t.avg_content_watched_mins, 0.0) * coalesce(t.plays, 0.0)) / nullif((n.plays + coalesce(t.plays, 0.0)), 0.0) as {{ type_float() }}) as avg_content_watched_mins from new_data n @@ -193,14 +193,14 @@ with prep as ( sum(case when is_valid_play then 1 else 0 end) as valid_plays, sum(case when p.is_complete_play then 1 else 0 end) as complete_plays, count(distinct p.page_view_id) as impressions, - avg(case when is_played then coalesce({{ media_session_field('p.content_watched_secs') }}, p.play_time_secs, 0) / nullif(p.duration_secs, 0) end) as avg_percent_played, + avg(case when is_played then coalesce(p.content_watched_secs, p.play_time_secs, 0) / nullif(p.duration_secs, 0) end) as avg_percent_played, avg(case when is_played then p.retention_rate end) as avg_retention_rate, avg(case when is_played then p.avg_playback_rate end) as avg_playback_rate, - cast({{ media_session_field('avg( + cast(avg( case when is_played and p.content_watched_secs is not null - then p.content_watched_secs / cast(60 as ' + type_float() + ') end - )') }} as {{ type_float() }}) as avg_content_watched_mins + then p.content_watched_secs / cast(60 as {{ type_float() }}) end + ) as {{ type_float() }}) as avg_content_watched_mins from {{ ref("snowplow_media_player_base") }} p