diff --git a/.github/workflows/run_unit_tests_on_pr.yml b/.github/workflows/run_unit_tests_on_pr.yml index 6a2b6dd4..9790836e 100644 --- a/.github/workflows/run_unit_tests_on_pr.yml +++ b/.github/workflows/run_unit_tests_on_pr.yml @@ -3,6 +3,14 @@ name: Run Unit Tests on Pull Request on: [pull_request_target,workflow_dispatch] env: BIGQUERY_PROJECT: ${{ secrets.BIGQUERY_PROJECT }} + BIGQUERY_PROPERTY_ID: ${{ secrets.BIGQUERY_PROPERTY_ID }} + BIGQUERY_DATASET: ${{ secrets.BIGQUERY_DATASET }} + BIGQUERY_KEYFILE: ./unit_tests/dbt-service-account.json + GA4_CONVERSION_EVENTS: ${{ vars.GA4_CONVERSION_EVENTS }} + GA4_DERIVED_SESSION_PROPERTIES: ${{ vars.GA4_DERIVED_SESSION_PROPERTIES }} + GA4_DERIVED_USER_PROPERTIES: ${{ vars.GA4_DERIVED_USER_PROPERTIES }} + GA4_INCREMENTAL_DAYS: ${{ vars.GA4_INCREMENTAL_DAYS }} + GA4_START_DATE: ${{ vars.GA4_START_DATE }} jobs: pytest_run_all: @@ -35,3 +43,36 @@ jobs: - name: Run tests run: python -m pytest . + + run_dbt_unit_tests: + name: Run dbt Unit Tests + runs-on: ubuntu-latest + steps: + - name: Check out + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - uses: actions/setup-python@v1 + with: + python-version: "3.11.x" + + - name: Authenticate using service account + run: 'echo "$KEYFILE" > ./unit_tests/dbt-service-account.json' + shell: bash + env: + KEYFILE: ${{ secrets.GCP_BIGQUERY_USER_KEYFILE }} + + - name: Install dbt + run: | + pip install dbt-core + pip install dbt-bigquery + dbt deps + + - name: Materialize necessary dbt resources + run: | + dbt seed -f + dbt run -s +test_type:unit -f --empty + + - name: Run dbt unit tests + run: dbt test -s test_type:unit diff --git a/.gitignore b/.gitignore index 84063871..6565ca5f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ target/ dbt_packages/ logs/ +package-lock.yml +.user.yml google-cloud-sdk/ unit_tests/.env diff --git a/README.md b/README.md index 16ecbbda..1c1c3b61 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ packages: ``` ## Required Variables -This package assumes that you have an existing DBT project with a BigQuery profile and a BigQuery GCP instance available with GA4 event data loaded. Source data is defined using the `project` and `dataset` variables below. The `static_incremental_days` variable defines how many days' worth of data to reprocess during incremental runs. +This package assumes that you have an existing DBT project with a BigQuery profile and a BigQuery GCP instance available with GA4 event data loaded. Source data is defined using the `project` and `property_ids` variables below. The `static_incremental_days` variable defines how many days' worth of data to reprocess during incremental runs. The `start_date` variable defines the earliest date for which data is included and loaded into the models in this package. ``` vars: @@ -302,9 +302,33 @@ The easiest option is using OAuth with your Google Account. Summarized instructi ``` gcloud auth application-default login --scopes=https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/iam.test ``` + # Unit Testing -This package uses `pytest` as a method of unit testing individual models. More details can be found in the [unit_tests/README.md](unit_tests) folder. +The dbt-ga4 package treats each model and macro as a 'unit' of code. If we fix the input to each unit, we can test that we received the expected output. + +This package currently uses a combination of dbt unit tests and `pytest` as a method of unit testing individual models. The remaining `pytest` unit test will be refactored to a dbt unit test when possible - progress on the bug preventing that work can be tracked [here](https://github.com/dbt-labs/dbt-core/issues/10353). + +### dbt unit tests + +dbt's documentation on unit tests can be found [here](https://docs.getdbt.com/docs/build/unit-tests). Unit tests are performed the same way other types of dbt tests are executed. + +Execute a specific test: +``` +dbt test -s +``` +Execute all tests configured for a model: +``` +dbt test -s +``` +Execute all dbt unit tests: +``` +dbt test -s test_type:unit +``` + +### pytest + +More details on using `pytest` for unit testing can be found in the [unit_tests/README.md](unit_tests) folder. # Overriding Default Channel Groupings diff --git a/TODO.md b/TODO.md index 84ca8488..ee262afc 100644 --- a/TODO.md +++ b/TODO.md @@ -22,6 +22,7 @@ - Configuration and dynamic templates to create custom event tables and dimensions - Configuration to create custom dimensions (session, user, event_*) from event parameters - Use Fivetran's `union_data` method (or something similar) to handle multiple, unioned GA4 exports. https://github.com/fivetran/dbt_xero_source/blob/main/models/tmp/stg_xero__account_tmp.sql +- Un-comment unit test in `stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml` once [this bug](https://github.com/dbt-labs/dbt-core/issues/10353) is resolved. Once that is complete, the `unit_tests` folder pertaining to the `pytest` unit tests should be removed along with the `pytest_run_all` job in `run_unit_tests_on_pr.yml`. ## Misc diff --git a/dbt_project.yml b/dbt_project.yml index a2f8bf71..cdc786ff 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -8,6 +8,8 @@ seed-paths: ["seeds"] macro-paths: ["macros"] snapshot-paths: ["snapshots"] +profile: 'default' + target-path: "target" # directory which will store compiled SQL files clean-targets: # directories to be removed by `dbt clean` - "target" diff --git a/macros/base_select.sql b/macros/base_select.sql index 8919388f..7603d446 100644 --- a/macros/base_select.sql +++ b/macros/base_select.sql @@ -36,7 +36,8 @@ , ecommerce.transaction_id , items , {%- if var('combined_dataset', false) != false %} cast(left(regexp_replace(_table_suffix, r'^(intraday_)?\d{8}', ''), 100) as int64) - {%- else %} {{ var('property_ids')[0] }} + {%- elif var('property_ids', false) != false %} {{ var('property_ids')[0] }} + {%- else %} {{ env_var('BIGQUERY_PROPERTY_ID') }} {%- endif %} as property_id {% endmacro %} diff --git a/models/marts/core/dim_ga4__sessions_daily.sql b/models/marts/core/dim_ga4__sessions_daily.sql index 8273dd74..854af34d 100644 --- a/models/marts/core/dim_ga4__sessions_daily.sql +++ b/models/marts/core/dim_ga4__sessions_daily.sql @@ -1,5 +1,5 @@ {% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} {{ diff --git a/models/marts/core/fct_ga4__pages.sql b/models/marts/core/fct_ga4__pages.sql index ca37ac0b..85aad420 100644 --- a/models/marts/core/fct_ga4__pages.sql +++ b/models/marts/core/fct_ga4__pages.sql @@ -1,5 +1,5 @@ {% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} {{ diff --git a/models/marts/core/fct_ga4__sessions_daily.sql b/models/marts/core/fct_ga4__sessions_daily.sql index 08c35798..7b09a975 100644 --- a/models/marts/core/fct_ga4__sessions_daily.sql +++ b/models/marts/core/fct_ga4__sessions_daily.sql @@ -1,5 +1,5 @@ {% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} {{ diff --git a/models/staging/base/base_ga4__events.sql b/models/staging/base/base_ga4__events.sql index 533dbc0f..bce14406 100644 --- a/models/staging/base/base_ga4__events.sql +++ b/models/staging/base/base_ga4__events.sql @@ -1,5 +1,5 @@ {% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} @@ -21,9 +21,11 @@ with source as ( select {{ ga4.base_select_source() }} from {{ source('ga4', 'events') }} - where cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) >= {{var('start_date')}} - {% if is_incremental() %} - and parse_date('%Y%m%d', left(replace(_table_suffix, 'intraday_', ''), 8)) in ({{ partitions_to_replace | join(',') }}) + {% if not flags.EMPTY %} + where cast(left(replace(_table_suffix, 'intraday_', ''), 8) as int64) >= {{ env_var('GA4_START_DATE') if env_var('GA4_START_DATE', false) else var('start_date') }} + {% if is_incremental() %} + and parse_date('%Y%m%d', left(replace(_table_suffix, 'intraday_', ''), 8)) in ({{ partitions_to_replace | join(',') }}) + {% endif %} {% endif %} ), renamed as ( diff --git a/models/staging/recommended_events/stg_ga4__event_purchase_deduplicated.sql b/models/staging/recommended_events/stg_ga4__event_purchase_deduplicated.sql index ce5aeae1..66488bee 100644 --- a/models/staging/recommended_events/stg_ga4__event_purchase_deduplicated.sql +++ b/models/staging/recommended_events/stg_ga4__event_purchase_deduplicated.sql @@ -1,6 +1,6 @@ {% if not flags.FULL_REFRESH %} {% set partitions_to_query = ['current_date'] %} - {% for i in range(var('static_incremental_days', 1)) %} + {% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_query = partitions_to_query.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} {% endif %} diff --git a/models/staging/src_ga4.yml b/models/staging/src_ga4.yml index 29104767..8f29701d 100644 --- a/models/staging/src_ga4.yml +++ b/models/staging/src_ga4.yml @@ -4,11 +4,13 @@ sources: - name: ga4 database: | # Source from target.project if multi-property, otherwise source from source_project {%- if var('combined_dataset', false) != false -%} {{target.project}} - {%- else -%} {{var('source_project')}} + {%- elif var('source_project', false) != false -%} {{var('source_project')}} + {%- else -%} {{env_var('BIGQUERY_PROJECT')}} {%- endif -%} schema: | # Source from combined property dataset if set, otherwise source from original GA4 property {%- if var('combined_dataset', false) != false -%} {{var('combined_dataset')}} - {%- else -%} analytics_{{var('property_ids')[0]}} + {%- elif var('property_ids', false) != false -%} analytics_{{var('property_ids')[0]}} + {%- else -%} analytics_{{env_var('BIGQUERY_PROPERTY_ID')}} {%- endif -%} tables: - name: events diff --git a/models/staging/stg_ga4__client_key_first_last_events.yml b/models/staging/stg_ga4__client_key_first_last_events.yml index 4e9cc7f4..87876103 100644 --- a/models/staging/stg_ga4__client_key_first_last_events.yml +++ b/models/staging/stg_ga4__client_key_first_last_events.yml @@ -7,4 +7,20 @@ models: - name: client_key description: Hashed combination of user_pseudo_id and stream_id tests: - - unique \ No newline at end of file + - unique +unit_tests: + - name: test_stg_ga4__client_key_first_last_events + description: Test pulling the first and last event per client key + model: stg_ga4__client_key_first_last_events + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + stream_id,client_key,event_key,event_timestamp + 1,IX+OyYJBgjwqML19GB/XIQ==,H06dLW6OhNJJ6SoEPFsSyg==,1661339279816517 + 1,IX+OyYJBgjwqML19GB/XIQ==,gt1SoAtrxDv33uDGwVeMVA==,1661339279816518 + expect: + format: csv + rows: | + client_key,first_event,last_event + IX+OyYJBgjwqML19GB/XIQ==,H06dLW6OhNJJ6SoEPFsSyg==,gt1SoAtrxDv33uDGwVeMVA== diff --git a/models/staging/stg_ga4__client_key_first_last_pageviews.yml b/models/staging/stg_ga4__client_key_first_last_pageviews.yml index 9623fd66..b93e6850 100644 --- a/models/staging/stg_ga4__client_key_first_last_pageviews.yml +++ b/models/staging/stg_ga4__client_key_first_last_pageviews.yml @@ -7,4 +7,20 @@ models: - name: client_key description: Hashed combination of user_pseudo_id and stream_id tests: - - unique \ No newline at end of file + - unique +unit_tests: + - name: test_stg_ga4__client_key_first_last_pageviews + description: Test pulling the first and last page view per client key + model: stg_ga4__client_key_first_last_pageviews + given: + - input: ref('stg_ga4__event_page_view') + format: csv + rows: | + stream_id,client_key,event_key,event_timestamp,page_location + 1,IX+OyYJBgjwqML19GB/XIQ==,H06dLW6OhNJJ6SoEPFsSyg==,1661339279816517,A + 1,IX+OyYJBgjwqML19GB/XIQ==,gt1SoAtrxDv33uDGwVeMVA==,1661339279816518,B + expect: + format: csv + rows: | + client_key,first_page_view_event_key,last_page_view_event_key,first_page_location,last_page_location + IX+OyYJBgjwqML19GB/XIQ==,H06dLW6OhNJJ6SoEPFsSyg==,gt1SoAtrxDv33uDGwVeMVA==,A,B diff --git a/models/staging/stg_ga4__derived_session_properties.sql b/models/staging/stg_ga4__derived_session_properties.sql index 65fbcfd6..0b4816f7 100644 --- a/models/staging/stg_ga4__derived_session_properties.sql +++ b/models/staging/stg_ga4__derived_session_properties.sql @@ -1,5 +1,5 @@ {{ config( - enabled = true if var('derived_session_properties', false) else false, + enabled = true if var('derived_session_properties', false) or env_var('GA4_DERIVED_SESSION_PROPERTIES', false) else false, materialized = "table" ) }} diff --git a/models/staging/stg_ga4__derived_session_properties.yml b/models/staging/stg_ga4__derived_session_properties.yml index 0ecffcf9..4e955733 100644 --- a/models/staging/stg_ga4__derived_session_properties.yml +++ b/models/staging/stg_ga4__derived_session_properties.yml @@ -8,4 +8,39 @@ models: columns: - name: session_key tests: - - unique \ No newline at end of file + - unique +unit_tests: + - name: test_derived_session_properties + description: Test whether a derived property is successfully retrieved from multiple event payloads + model: stg_ga4__derived_session_properties + given: + - input: ref('stg_ga4__events') + format: sql + rows: | + select + 'AAA' as session_key + , 1617691790431476 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params + , ARRAY[STRUCT('my_property' as key, STRUCT('value1' as string_value) as value)] as user_properties + union all + select + 'AAA' as session_key + , 1617691790431477 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(2 as int_value) as value)] as event_params + , ARRAY[] as user_properties + union all + select + 'BBB' as session_key + , 1617691790431477 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params + , ARRAY[STRUCT('my_property' as key, STRUCT('value2' as string_value) as value)] as user_properties + expect: + format: dict + rows: + - {session_key: AAA, my_derived_property: 2, my_derived_property2: value1} + - {session_key: BBB, my_derived_property: 1, my_derived_property2: value2} + overrides: + vars: {derived_session_properties: [{event_parameter: 'my_param',session_property_name: 'my_derived_property',value_type: 'int_value'},{user_property: 'my_property',session_property_name: 'my_derived_property2',value_type: 'string_value'}]} diff --git a/models/staging/stg_ga4__derived_session_properties_daily.sql b/models/staging/stg_ga4__derived_session_properties_daily.sql index f997d40b..487c29e2 100644 --- a/models/staging/stg_ga4__derived_session_properties_daily.sql +++ b/models/staging/stg_ga4__derived_session_properties_daily.sql @@ -1,10 +1,10 @@ {% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} {{ config( - enabled = true if var('derived_session_properties', false) else false, + enabled = true if var('derived_session_properties', false) or env_var('GA4_DERIVED_SESSION_PROPERTIES', false) else false, materialized = 'incremental', incremental_strategy = 'insert_overwrite', tags = ["incremental"], diff --git a/models/staging/stg_ga4__derived_user_properties.sql b/models/staging/stg_ga4__derived_user_properties.sql index ec1fd6b6..b3bd666b 100644 --- a/models/staging/stg_ga4__derived_user_properties.sql +++ b/models/staging/stg_ga4__derived_user_properties.sql @@ -1,5 +1,5 @@ {{ config( - enabled = true if var('derived_user_properties', false) else false, + enabled = true if var('derived_user_properties', false) or env_var('GA4_DERIVED_USER_PROPERTIES', false) else false, materialized = "table" ) }} diff --git a/models/staging/stg_ga4__derived_user_properties.yml b/models/staging/stg_ga4__derived_user_properties.yml index 3aadf7f4..c347e91c 100644 --- a/models/staging/stg_ga4__derived_user_properties.yml +++ b/models/staging/stg_ga4__derived_user_properties.yml @@ -7,4 +7,36 @@ models: - name: client_key description: Hashed combination of user_pseudo_id and stream_id tests: - - unique \ No newline at end of file + - unique +unit_tests: + - name: test_derived_user_properties + description: Test whether a derived user property is successfully retrieved from multiple event payloads + model: stg_ga4__derived_user_properties + given: + - input: ref('stg_ga4__events') + format: sql + rows: | + select + 'AAA' as client_key + , 1617691790431476 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params + union all + select + 'AAA' as client_key + , 1617691790431477 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(2 as int_value) as value)] as event_params + union all + select + 'BBB' as client_key + , 1617691790431477 as event_timestamp + , 'first_visit' as event_name + , ARRAY[STRUCT('my_param' as key, STRUCT(1 as int_value) as value)] as event_params + expect: + format: dict + rows: + - {client_key: AAA, my_derived_property: 2} + - {client_key: BBB, my_derived_property: 1} + overrides: + vars: {derived_user_properties: [{event_parameter: 'my_param',user_property_name: 'my_derived_property',value_type: 'int_value'}]} diff --git a/models/staging/stg_ga4__event_to_query_string_params.yml b/models/staging/stg_ga4__event_to_query_string_params.yml index 4b4310f4..c97dbaf5 100644 --- a/models/staging/stg_ga4__event_to_query_string_params.yml +++ b/models/staging/stg_ga4__event_to_query_string_params.yml @@ -3,4 +3,23 @@ version: 2 models: - name: stg_ga4__event_to_query_string_params description: This model pivots the query string parameters contained within the event's page_location field to become rows. Each row is a single parameter/value combination contained in a single event's query string. - \ No newline at end of file +unit_tests: + - name: test_stg_ga4__event_to_query_string_params + description: Test whether event query strings are flattened for each query string parameter + model: stg_ga4__event_to_query_string_params + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + event_key,page_query_string + aaa,param1=value1¶m2=value2 + bbb,param1 + ccc,param1= + expect: + format: csv + rows: | + event_key,param,value + aaa,param1,value1 + aaa,param2,value2 + bbb,param1, + ccc,param1, diff --git a/models/staging/stg_ga4__events.yml b/models/staging/stg_ga4__events.yml index 984f606d..bdbab034 100644 --- a/models/staging/stg_ga4__events.yml +++ b/models/staging/stg_ga4__events.yml @@ -21,7 +21,6 @@ models: the data shows that this is not always the case. When a user_engagement event does not fire, the engagement_time_msec parameter is assigned to the next page_view. This engagement time should be credited to the previous page, so for page_view events this field uses the session_key and page_referrer as the key while all other events use the session_key and page_location. - unit_tests: - name: query_parameter_removal description: "Check that query parameters get removed from the model." @@ -42,7 +41,6 @@ unit_tests: - {page_location: https://asite.com/anotherpage, page_referrer: https://asite.com/previous_page?utm_source=source_value} - {page_location: https://anothersite.com/page?not_excluded_param=val¶m=also_not_excluded, page_referrer: https://anothersite.com/previous_page#utm_source=source_value#hash=hash_value} - {page_location: https://twoparams.com/page, page_referrer: https://twoparams.com/previous_page#hash=hash_value} - - name: query_parameter_remove_all description: "Check that the '*all*' flag removes all query parameters from the model." model: stg_ga4__events @@ -58,3 +56,382 @@ unit_tests: rows: - {page_location: https://asite.com/page, page_referrer: https://asite.com/previous_page} - {page_location: https://asite.com/anotherpage, page_referrer: https://asite.com/previous_page} + - name: query_parameter_extraction + description: "Check that query parameters get extracted from the model and added as new fields." + model: stg_ga4__events + given: + - input: ref('base_ga4__events') + rows: + - {page_location: https://asite.com/page?param1=value1} + - {page_location: https://asite.com/anotherpage?param1=value%20with%20encoded%20spaces} + - {page_location: https://sitetwo.com/page?param1=value1¶m2=value2} + - {page_location: https://anothersite.com/page?not_included_param=value¶m1=val¶m=also_not_included} + - {page_location: https://threeparams.com/page¶m1=another_val¶m2=second_val¶m3=value3} + - {page_location: https://noparams.com/} + overrides: + vars: + query_parameter_extraction: ["param1", "param2", "param3"] + expect: + rows: + - {query_param_param1: value1, query_param_param2: null, query_param_param3: null} + - {query_param_param1: value%20with%20encoded%20spaces, query_param_param2: null, query_param_param3: null} + - {query_param_param1: value1, query_param_param2: value2, query_param_param3: null} + - {query_param_param1: val, query_param_param2: null, query_param_param3: null} + - {query_param_param1: another_val, query_param_param2: second_val, query_param_param3: value3} + - {query_param_param1: null, query_param_param2: null, query_param_param3: null} + - name: hostname_extraction_from_url + description: "Check that the hostname is extracted from the URL" + model: stg_ga4__events + given: + - input: ref('base_ga4__events') + rows: + - {page_location: https://fakesite.com/} + - {page_location: https://www.mock.sitehub.io/} + - {page_location: https://cool-site.com/with/this-path} + - {page_location: https://example.site.app/?parameter=this} + - {page_location: https://madeup.org/page?param=true&other_param=sure} + expect: + rows: + - {page_hostname: fakesite.com} + - {page_hostname: mock.sitehub.io} + - {page_hostname: cool-site.com} + - {page_hostname: example.site.app} + - {page_hostname: madeup.org} + - name: query_string_extraction_from_url + description: "Check that the query string is extracted from the URL" + model: stg_ga4__events + given: + - input: ref('base_ga4__events') + rows: + - {page_location: https://fakesite.com/?query_string=something} + - {page_location: https://www.no.query.string/but-has-this-path} + - {page_location: https://cool-site.com/even-cooler-path?utm_term=test-term&utm_source=test-source} + expect: + rows: + - {page_query_string: query_string=something} + - {page_query_string: null} + - {page_query_string: utm_term=test-term&utm_source=test-source} + - name: page_path_extraction + description: "Check that the page path is extracted from the URL" + model: stg_ga4__events + given: + - input: ref('base_ga4__events') + rows: + - {page_location: https://fakesite.com/} + - {page_location: https://cool-site.com/with/this-path} + - {page_location: https://example.site.app/?parameter=no-path} + - {page_location: https://madeup.org/page?param=true&other_param=sure} + expect: + rows: + - {page_path: /} + - {page_path: /with/this-path} + - {page_path: /} + - {page_path: /page} + - name: test_base_to_stg_ga4__events + description: "Check whether a given row from base_ga4__events produces the expected row in stg_ga4__events" + model: stg_ga4__events + given: + - input: ref('base_ga4__events') + format: sql + rows: | + select + date('2021-04-06') as event_date_dt + , 1617691790431476 as event_timestamp + , 'first_visit' as event_name + , array[ + struct( + 'ga_session_number' as key + , struct( + cast(null as string) as string_value + , 1 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) as value + ) + , struct( + 'engaged_session_event' as key + , struct( + cast(null as string) as string_value + , 1 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'ga_session_id' as key + , struct( + cast(null as string) as string_value + , 1617691775 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'source' as key + , struct( + 'bing' as string_value + , null as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'page_title' as key + , struct( + 'Velir | Behavior-Driven Testing in Drupal 8' as string_value + , null as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'page_location' as key + , struct( + 'https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8?utm_term=test' as string_value + , null as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'session_engaged' as key + , struct( + cast(null as string) as string_value + , 1 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'engagement_time_msec' as key + , struct( + cast(null as string) as string_value + , 30000 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + ] as event_params + , null as event_previous_timestamp + , null as event_value_in_usd + , 948327668 as event_bundle_sequence_id + , null as event_server_timestamp_offset + , cast(null as string) as user_id + , '1166526666.1617691776' as user_pseudo_id -- Added + , cast(null as string) as privacy_info_analytics_storage -- Added + , cast(null as string) as privacy_info_ads_storage -- Added + , cast(null as string) as privacy_info_uses_transient_token -- Added + , null as user_properties + , 1617691790431476 as user_first_touch_timestamp + , 0.0 as user_ltv_revenue -- Added + , 'USD' as user_ltv_currency -- Added + , 'desktop' as device_category -- Added + , cast(null as string) as device_mobile_brand_name -- Added + , cast(null as string) as device_mobile_model_name -- Added + , cast(null as string) as device_mobile_marketing_name -- Added + , cast(null as string) as device_mobile_os_hardware_model -- Added + , 'Windows' as device_operating_system -- Added + , 'Windows 10' as device_operating_system_version -- Added + , cast(null as string) as device_vendor_id -- Added + , cast(null as string) as device_advertising_id -- Added + , 'en-us' as device_language -- Added + , 'No' as device_is_limited_ad_tracking -- Added + , null as device_time_zone_offset_seconds -- Added + , cast(null as string) as device_browser -- Added + , cast(null as string) as device_browser_version -- Added + , 'Chrome' as device_web_info_browser -- Added + , '89.0.4389.114' as device_web_info_browser_version -- Added + , 'www.velir.com' as device_web_info_hostname -- Added + , 'Asia' as geo_continent -- Added + , 'Vietnam' as geo_country -- Added + , 'Ho Chi Minh City' as geo_region -- Added + , 'Ho Chi Minh City' as geo_city -- Added + , 'Southeast Asia' as geo_sub_continent -- Added + , '(not set)' as geo_metro -- Added + , cast(null as string) as app_info_id -- Added + , cast(null as string) as app_info_version -- Added + , cast(null as string) as app_info_install_store -- Added + , cast(null as string) as app_info_firebase_app_id -- Added + , cast(null as string) as app_info_install_source -- Added + , '(direct)' as user_campaign -- Added + , '(none)' as user_medium -- Added + , '(direct)' as user_source -- Added + , '1966637064' as stream_id + , 'WEB' as platform + , null as ecommerce + , null as items + , 12345 as property_id + , 1617691775 as session_id + , 'https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8?utm_term=test' as page_location + , 1 as session_number + , 1 as session_engaged + , 30000 as engagement_time_msec -- Added + , 'Velir | Behavior-Driven Testing in Drupal 8' as page_title + , cast(null as string) as page_referrer + , 'bing' as event_source -- Added + , cast(null as string) as event_medium -- Added + , cast(null as string) as event_campaign -- Added + , cast(null as string) as event_content -- Added + , cast(null as string) as event_term -- Added + , 0 as is_page_view + , 0 as is_purchase + expect: + format: sql + rows: | + select + date('2021-04-06') as event_date_dt + , 1617691790431476 as event_timestamp + , 'first_visit' as event_name + , array[ + struct( + 'ga_session_number' as key + , struct( + cast(null as string) as string_value + , 1 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) as value + ) + , struct( + 'engaged_session_event' as key + , struct( + cast(null as string) as string_value + , 1 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'ga_session_id' as key + , struct( + cast(null as string) as string_value + , 1617691775 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'source' as key + , struct( + 'bing' as string_value + , null as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'page_title' as key + , struct( + 'Velir | Behavior-Driven Testing in Drupal 8' as string_value + , null as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'page_location' as key + , struct( + 'https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8?utm_term=test' as string_value + , null as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'session_engaged' as key + , struct( + cast(null as string) as string_value + , 1 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + , struct( + 'engagement_time_msec' as key + , struct( + cast(null as string) as string_value + , 30000 as int_value + , cast(null as FLOAT64) as float_value + , cast(null as FLOAT64) as double_value + ) + ) + ] as event_params + , null as event_previous_timestamp + , null as event_value_in_usd + , 948327668 as event_bundle_sequence_id + , null as event_server_timestamp_offset + , cast(null as string) as user_id + , '1166526666.1617691776' as user_pseudo_id -- Added + , cast(null as string) as privacy_info_analytics_storage -- Added + , cast(null as string) as privacy_info_ads_storage -- Added + , cast(null as string) as privacy_info_uses_transient_token -- Added + , null as user_properties + , 1617691790431476 as user_first_touch_timestamp + , 0.0 as user_ltv_revenue -- Added + , 'USD' as user_ltv_currency -- Added + , 'desktop' as device_category -- Added + , cast(null as string) as device_mobile_brand_name -- Added + , cast(null as string) as device_mobile_model_name -- Added + , cast(null as string) as device_mobile_marketing_name -- Added + , cast(null as string) as device_mobile_os_hardware_model -- Added + , 'Windows' as device_operating_system -- Added + , 'Windows 10' as device_operating_system_version -- Added + , cast(null as string) as device_vendor_id -- Added + , cast(null as string) as device_advertising_id -- Added + , 'en-us' as device_language -- Added + , 'No' as device_is_limited_ad_tracking -- Added + , null as device_time_zone_offset_seconds -- Added + , cast(null as string) as device_browser -- Added + , cast(null as string) as device_browser_version -- Added + , 'Chrome' as device_web_info_browser -- Added + , '89.0.4389.114' as device_web_info_browser_version -- Added + , 'www.velir.com' as device_web_info_hostname -- Added + , 'Asia' as geo_continent -- Added + , 'Vietnam' as geo_country -- Added + , 'Ho Chi Minh City' as geo_region -- Added + , 'Ho Chi Minh City' as geo_city -- Added + , 'Southeast Asia' as geo_sub_continent -- Added + , '(not set)' as geo_metro -- Added + , cast(null as string) as app_info_id -- Added + , cast(null as string) as app_info_version -- Added + , cast(null as string) as app_info_install_store -- Added + , cast(null as string) as app_info_firebase_app_id -- Added + , cast(null as string) as app_info_install_source -- Added + , '(direct)' as user_campaign -- Added + , '(none)' as user_medium -- Added + , '(direct)' as user_source -- Added + , '1966637064' as stream_id + , 'WEB' as platform + , null as ecommerce + , null as items + , 12345 as property_id + , 1617691775 as session_id + , 1 as session_number + , 1 as session_engaged + , 30000 as engagement_time_msec -- Added + , 'Velir | Behavior-Driven Testing in Drupal 8' as page_title + , cast(null as string) as event_content -- Added + , cast(null as string) as event_term -- Added + , 0 as is_page_view + , 0 as is_purchase + , 'KXZY+6kA4bpHda1EzHaVvw==' as client_key + , '90mFUqw4xBzU+Xx9/4ycCQ==' as session_key + , '90mFUqw4xBzU+Xx9/4ycCQ==2021-04-06' as session_partition_key + , '7HoiVpTakjsH3DJ1t4HGNw==' as event_key + , 'bing' as event_source -- Added + , cast(null as string) as event_medium -- Added + , cast(null as string) as event_campaign -- Added + , 'test' as query_param_utm_term + , 'https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8?utm_term=test' as original_page_location + , cast(null as string) as original_page_referrer + , '/blog/2016/08/25/behavior-driven-testing-drupal-8' as page_path + , 'https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8' as page_location + , cast(null as string) as page_referrer + , 'velir.com' as page_hostname + , cast(null as string) as page_query_string + , '2021-04-06https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8' as page_key + , 'il76Ej2ywBpSKKyYSAkOlQ==' as page_engagement_key + overrides: + vars: + query_parameter_exclusions: ["utm_term"] + query_parameter_extraction: ["utm_term"] diff --git a/models/staging/stg_ga4__page_conversions.sql b/models/staging/stg_ga4__page_conversions.sql index ea5a7f57..538f94fd 100644 --- a/models/staging/stg_ga4__page_conversions.sql +++ b/models/staging/stg_ga4__page_conversions.sql @@ -1,5 +1,5 @@ {{ config( - enabled= var('conversion_events', false) != false + enabled= var('conversion_events', false) != false or env_var('GA4_CONVERSION_EVENTS', false) != false ) }} select diff --git a/models/staging/stg_ga4__page_conversions.yml b/models/staging/stg_ga4__page_conversions.yml new file mode 100644 index 00000000..2dfd6d55 --- /dev/null +++ b/models/staging/stg_ga4__page_conversions.yml @@ -0,0 +1,44 @@ +version: 2 + +models: + - name: stg_ga4__page_conversions + description: Model that calculates the number of conversions per page. Conversions are defined as variables in the project configurations. +unit_tests: + - name: test_page_conversion_count + description: Test whether the page-level count of conversions is correct + model: stg_ga4__page_conversions + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + event_name,page_key + page_view,A + page_view,A + page_view,B + expect: + format: csv + rows: | + page_key,page_view_count + A,2 + B,1 + overrides: + vars: {conversion_events: ['page_view']} + - name: test_page_conversion_count_non_event_name + description: Test whether the page-level count of conversions is correct + model: stg_ga4__page_conversions + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + event_name,page_key + page-view,A + page-view,A + page-view,B + expect: + format: csv + rows: | + page_key,page_view_count + A,2 + B,1 + overrides: + vars: {conversion_events: ['page-view']} diff --git a/models/staging/stg_ga4__session_conversions_daily.sql b/models/staging/stg_ga4__session_conversions_daily.sql index 49b0ed85..5ac65a36 100644 --- a/models/staging/stg_ga4__session_conversions_daily.sql +++ b/models/staging/stg_ga4__session_conversions_daily.sql @@ -1,10 +1,14 @@ {% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} + +{% if is_incremental() %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} +{% endif %} + {{ config( - enabled= var('conversion_events', false) != false, + enabled= var('conversion_events', false) != false or env_var('GA4_CONVERSION_EVENTS', false) != false, materialized = 'incremental', incremental_strategy = 'insert_overwrite', tags = ["incremental"], diff --git a/models/staging/stg_ga4__session_conversions_daily.yml b/models/staging/stg_ga4__session_conversions_daily.yml index 2f26a7c6..edc6f08b 100644 --- a/models/staging/stg_ga4__session_conversions_daily.yml +++ b/models/staging/stg_ga4__session_conversions_daily.yml @@ -9,4 +9,57 @@ models: columns: - name: session_partition_key tests: - - unique \ No newline at end of file + - unique +unit_tests: + - name: test_session_conversion_count + description: Test whether the session-level count of conversions is correct + model: stg_ga4__session_conversions_daily + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + session_key,session_partition_key,event_name,event_date_dt + A,A2022-01-01,page_view,2022-01-01 + A,A2022-01-01,my_conversion,2022-01-01 + A,A2022-01-01,my_conversion,2022-01-01 + B,B2022-01-01,my_conversion,2022-01-01 + C,C2022-01-01,some_other_event,2022-01-01 + A,A2022-01-02,my_conversion,2022-01-02 + expect: + format: csv + rows: | + session_key,session_partition_key,session_partition_date,my_conversion_count + A,A2022-01-01,2022-01-01,2 + B,B2022-01-01,2022-01-01,1 + C,C2022-01-01,2022-01-01,0 + A,A2022-01-02,2022-01-02,1 + overrides: + macros: + is_incremental: false + vars: {conversion_events: ['my_conversion']} + - name: test_stg_ga4__session_conversions_daily_non_standard_event_name + description: Test whether the session-level count of conversions is correct + model: stg_ga4__session_conversions_daily + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + session_key,session_partition_key,event_name,event_date_dt + A,A2022-01-01,page_view,2022-01-01 + A,A2022-01-01,my-conversion,2022-01-01 + A,A2022-01-01,my-conversion,2022-01-01 + B,B2022-01-01,my-conversion,2022-01-01 + C,C2022-01-01,some_other_event,2022-01-01 + A,A2022-01-02,my-conversion,2022-01-02 + expect: + format: csv + rows: | + session_key,session_partition_key,session_partition_date,my_conversion_count + A,A2022-01-01,2022-01-01,2 + B,B2022-01-01,2022-01-01,1 + C,C2022-01-01,2022-01-01,0 + A,A2022-01-02,2022-01-02,1 + overrides: + macros: + is_incremental: false + vars: {conversion_events: ['my-conversion']} diff --git a/models/staging/stg_ga4__sessions_traffic_sources.sql b/models/staging/stg_ga4__sessions_traffic_sources.sql index b0f55c40..ffb02f1a 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources.sql @@ -24,12 +24,12 @@ session_source as ( select session_key ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN event_source END) IGNORE NULLS) OVER (session_window), '(direct)') AS session_source - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_medium, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_medium - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(source_category, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_source_category - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_campaign, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_campaign - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_content, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_content - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_term, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_term - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(default_channel_grouping, 'Direct') END) IGNORE NULLS) OVER (session_window), 'Direct') AS session_default_channel_grouping + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(event_medium, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_medium + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(source_category, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_source_category + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(event_campaign, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_campaign + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(event_content, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_content + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(event_term, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_term + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(default_channel_grouping, 'Direct') END) IGNORE NULLS) OVER (session_window), 'Direct') AS session_default_channel_grouping from set_default_channel_grouping WINDOW session_window AS (PARTITION BY session_key ORDER BY event_timestamp ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ) diff --git a/models/staging/stg_ga4__sessions_traffic_sources.yml b/models/staging/stg_ga4__sessions_traffic_sources.yml index fa5a54eb..589f82e6 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources.yml +++ b/models/staging/stg_ga4__sessions_traffic_sources.yml @@ -14,4 +14,78 @@ models: - name: session_source description: First non-null source value of the session tests: - - not_null \ No newline at end of file + - not_null +unit_tests: + - name: test_default_channel_grouping + description: Test whether the defaul_channel_grouping macro is assigning channels correctly + model: stg_ga4__sessions_traffic_sources + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + session_key,event_timestamp,event_name,event_source,event_medium,event_campaign + A,172000000000000,event,(direct),(none), + B,172000000000000,event,(direct),(not set), + C,172000000000000,event,some-source,some-medium,some-cross-network-campaign + D,172000000000000,event,some-source,some-medium,cross-network + E,172000000000000,event,alibaba,cpc, + F,172000000000000,event,some-source,retargeting,shopping + G,172000000000000,event,google,ppc, + H,172000000000000,event,facebook,retargeting, + I,172000000000000,event,youtube.com,paid-something, + J,172000000000000,event,youtube.com,display, + K,172000000000000,event,some-source,cpc, + L,172000000000000,event,Google Shopping,, + M,172000000000000,event,some-source,,some-shopping-campaign + N,172000000000000,event,facebook,, + O,172000000000000,event,some-source,social, + P,172000000000000,event,youtube.com,, + Q,172000000000000,event,some-source,video, + R,172000000000000,event,bing,, + S,172000000000000,event,some-source,organic, + T,172000000000000,event,some-source,referral, + U,172000000000000,event,email,, + V,172000000000000,event,,e mail, + W,172000000000000,event,some-source,affiliate, + X,172000000000000,event,some-source,audio, + Y,172000000000000,event,sms,, + Z,172000000000000,event,,sms, + AA,172000000000000,event,some-source,something-push, + AB,172000000000000,event,some-source,mobile-notification, + AC,172000000000000,event,firebase,, + AD,172000000000000,event,some-source,some-medium,some-campaign + - input: ref('ga4_source_categories') + expect: + format: csv + rows: | + session_default_channel_grouping + Direct + Direct + Cross-network + Cross-network + Paid Shopping + Paid Shopping + Paid Search + Paid Social + Paid Video + Display + Paid Other + Organic Shopping + Organic Shopping + Organic Social + Organic Social + Organic Video + Organic Video + Organic Search + Organic Search + Referral + Email + Email + Affiliates + Audio + SMS + SMS + Mobile Push Notifications + Mobile Push Notifications + Mobile Push Notifications + Unassigned diff --git a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql index 1847d8d8..eaa7c252 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_daily.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources_daily.sql @@ -1,5 +1,5 @@ {% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} {{ @@ -52,12 +52,12 @@ first_session_source as ( ,session_partition_date ,event_timestamp ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN event_source END) IGNORE NULLS) OVER (session_window), '(direct)') AS session_source - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_medium, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_medium - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(source_category, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_source_category - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_campaign, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_campaign - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_content, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_content - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(event_term, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_term - ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' THEN COALESCE(default_channel_grouping, 'Direct') END) IGNORE NULLS) OVER (session_window), 'Direct') AS session_default_channel_grouping + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(event_medium, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_medium + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(source_category, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_source_category + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(event_campaign, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_campaign + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(event_content, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_content + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(event_term, '(none)') END) IGNORE NULLS) OVER (session_window), '(none)') AS session_term + ,COALESCE(FIRST_VALUE((CASE WHEN event_source <> '(direct)' or event_source is null THEN COALESCE(default_channel_grouping, 'Direct') END) IGNORE NULLS) OVER (session_window), 'Direct') AS session_default_channel_grouping from set_default_channel_grouping WINDOW session_window AS (PARTITION BY session_partition_key ORDER BY event_timestamp ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ), diff --git a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql index 5c7fc69f..38298312 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql +++ b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.sql @@ -1,7 +1,11 @@ {% set partitions_to_replace = ['current_date'] %} -{% for i in range(var('static_incremental_days')) %} + +{% if is_incremental() %} +{% for i in range(env_var('GA4_INCREMENTAL_DAYS')|int if env_var('GA4_INCREMENTAL_DAYS', false) else var('static_incremental_days')) %} {% set partitions_to_replace = partitions_to_replace.append('date_sub(current_date, interval ' + (i+1)|string + ' day)') %} {% endfor %} +{% endif %} + {{ config( materialized = 'incremental', @@ -35,7 +39,7 @@ with last_non_direct_session_partition_key as ( last_value(non_direct_session_partition_key ignore nulls) over( partition by client_key order by - session_partition_timestamp range between {{var('session_attribution_lookback_window_days', 30 ) * 24 * 60 * 60 * 1000000 }} preceding + session_partition_timestamp range between {{ var('session_attribution_lookback_window_days', 30 ) * 24 * 60 * 60 * 1000000 }} preceding and current row -- lookback window ) ELSE non_direct_session_partition_key @@ -44,7 +48,7 @@ with last_non_direct_session_partition_key as ( {{ref('stg_ga4__sessions_traffic_sources_daily')}} {% if is_incremental() %} -- Add 30 to static_incremental_days to include the session attribution lookback window - where session_partition_date >= date_sub(current_date, interval ({{var('static_incremental_days',3) + var('session_attribution_lookback_window_days', 30 )}} ) day) + where session_partition_date >= date_sub(current_date, interval ({{ var('static_incremental_days',3) + var('session_attribution_lookback_window_days', 30 ) }} ) day) {% endif %} ) ,join_last_non_direct_session_source as ( @@ -68,7 +72,7 @@ with last_non_direct_session_partition_key as ( ,coalesce(last_non_direct_source.session_term, '(none)') as last_non_direct_term ,coalesce(last_non_direct_source.session_default_channel_grouping, 'Direct') as last_non_direct_default_channel_grouping from last_non_direct_session_partition_key - left join {{ref('stg_ga4__sessions_traffic_sources_daily')}} last_non_direct_source on + left join {{ ref('stg_ga4__sessions_traffic_sources_daily') }} last_non_direct_source on last_non_direct_session_partition_key.session_partition_key_last_non_direct = last_non_direct_source.session_partition_key {% if is_incremental() %} -- Only keep the records in the partitions we wish to replace (as opposed to the whole 30 day lookback window) diff --git a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml index 0b34832b..26981cd1 100644 --- a/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml +++ b/models/staging/stg_ga4__sessions_traffic_sources_last_non_direct_daily.yml @@ -21,4 +21,29 @@ models: - name: last_non_direct_default_channel_grouping description: The the most recent non-direct channel grouping within a 30-day lookback window. tests: - - not_null \ No newline at end of file + - not_null +# unit_tests: +# - name: test_stg_ga4__sessions_traffic_sources_last_non_direct_daily +# description: Test pulling the last non direct session parameters per client_key +# model: stg_ga4__sessions_traffic_sources_last_non_direct_daily +# given: +# - input : ref('stg_ga4__sessions_traffic_sources_daily') +# format: csv +# rows: | +# client_key,session_partition_key,session_partition_date,session_partition_timestamp,session_source,session_medium,session_source_category,session_campaign,session_content,session_term,session_default_channel_grouping,non_direct_session_partition_key +# A,A,20230505,1683321359,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,A +# A,B,20230506,1683407759,(direct),,,,,,, +# A,C,20230507,1683494159,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,C +# A,D,20230508,1683580559,(direct),,,,,,, +# expect: +# format: csv +# rows: +# client_key,session_partition_key,session_partition_date,session_source,session_medium,session_source_category,session_campaign,session_content,session_term,session_default_channel_grouping,session_partition_key_last_non_direct,last_non_direct_source,last_non_direct_medium,last_non_direct_source_category,last_non_direct_campaign,last_non_direct_content,last_non_direct_term,last_non_direct_default_channel_grouping +# A,A,20230505,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,A,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a +# A,B,20230506,(direct),,,,,,,A,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a +# A,C,20230507,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a,C,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a +# A,D,20230508,(direct),,,,,,,C,source_a,medium_a,source_category_a,campaign_a,content_a,term_a,default_channel_grouping_a +# overrides: +# macros: +# is_incremental: false +# vars: {session_attribution_lookback_window_days: 30} diff --git a/models/staging/stg_ga4__user_id_mapping.yml b/models/staging/stg_ga4__user_id_mapping.yml index bdca0579..311376e7 100644 --- a/models/staging/stg_ga4__user_id_mapping.yml +++ b/models/staging/stg_ga4__user_id_mapping.yml @@ -9,4 +9,27 @@ models: tests: - not_null - unique - +unit_tests: + - name: test_user_id_mapping + description: Test whether the latest client_key to user_id mapping logic is correct + model: stg_ga4__user_id_mapping + given: + - input: ref('stg_ga4__events') + format: csv + rows: | + client_key,user_id,event_timestamp + a1,,100 + a1,A,101 + b1,B,102 + c1,C,103 + c2,C,104 + c2,,105 + d1,,100 + expect: + format: csv + rows: | + last_seen_user_id,client_key,last_seen_user_id_timestamp + A,a1,101 + B,b1,102 + C,c1,103 + C,c2,104 diff --git a/models/staging/stg_ga4__user_properties.sql b/models/staging/stg_ga4__user_properties.sql index c9deaba5..ff531c8e 100644 --- a/models/staging/stg_ga4__user_properties.sql +++ b/models/staging/stg_ga4__user_properties.sql @@ -1,5 +1,5 @@ {{ config( - enabled = true if var('user_properties', false) else false, + enabled = true if var('user_properties', false) or env_var('GA4_DERIVED_USER_PROPERTIES', false) else false, materialized = "table" ) }} diff --git a/profiles.yml b/profiles.yml new file mode 100644 index 00000000..3649cad7 --- /dev/null +++ b/profiles.yml @@ -0,0 +1,10 @@ +default: + target: bigquery + outputs: + bigquery: + type: bigquery + method: service-account + keyfile: "{{ env_var('BIGQUERY_KEYFILE') }}" + project: "{{ env_var('BIGQUERY_PROJECT') }}" + dataset: "{{ env_var('BIGQUERY_DATASET') }}" + timeout_seconds: 300 diff --git a/unit_tests/test_macro_default_channel_grouping.py b/unit_tests/test_macro_default_channel_grouping.py deleted file mode 100644 index ac300c95..00000000 --- a/unit_tests/test_macro_default_channel_grouping.py +++ /dev/null @@ -1,302 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -traffic_data_with_expected_channels = [ - # Direct: Source exactly matches "(direct)" AND Medium is one of ("(not set)", "(none)") - { - "source": "(direct)", - "medium": "(none)", - "campaign": "", - "expected_channel": "Direct" - }, - { - "source": "(direct)", - "medium": "(not set)", - "campaign": "", - "expected_channel": "Direct" - }, - # Cross-network: Campaign Name contains "cross-network" - { - "source": "some-source", - "medium": "some-medium", - "campaign": "some-cross-network-campaign", - "expected_channel": "Cross-network" - }, - { - "source": "some-source", - "medium": "some-medium", - "campaign": "cross-network", - "expected_channel": "Cross-network" - }, - # Paid Shopping: - # (Source matches a list of shopping sites - # OR - # Campaign Name matches regex ^(.*(([^a-df-z]|^)shop|shopping).*)$) - # AND - # Medium matches regex ^(.*cp.*|ppc|retargeting|paid.*)$ - { - "source": "alibaba", - "medium": "", - "campaign": "", - "expected_channel": "Paid Shopping" - }, - { - "source": "some-source", - "medium": "retargeting", - "campaign": "shopping", - "expected_channel": "Paid Shopping" - }, - # Paid Search: - # Source matches a list of search sites - # AND - # Medium matches regex ^(.*cp.*|ppc|retargeting|paid.*)$ - { - "source": "google", - "medium": "ppc", - "campaign": "", - "expected_channel": "Paid Search" - }, - # Paid Social: - # Source matches a regex list of social sites - # AND - # Medium matches regex ^(.*cp.*|ppc|retargeting|paid.*)$ - { - "source": "facebook", - "medium": "retargeting", - "campaign": "", - "expected_channel": "Paid Social" - }, - # Paid Video: - # Source matches a list of video sites - # AND - # Medium matches regex ^(.*cp.*|ppc|retargeting|paid.*)$ - { - "source": "youtube.com", - "medium": "paid-something", - "campaign": "", - "expected_channel": "Paid Video" - }, - # Display: - # Medium is one of (“display”, “banner”, “expandable”, “interstitial”, “cpm”) - { - "source": "youtube.com", - "medium": "display", - "campaign": "", - "expected_channel": "Display" - }, - # Paid Other: - # Medium matches regex ^(.*cp.*|ppc|retargeting|paid.*)$ - { - "source": "some-source", - "medium": "cpc", - "campaign": "", - "expected_channel": "Paid Other" - }, - # Organic Shopping: - # Source matches a list of shopping sites - # OR - # Campaign name matches regex ^(.*(([^a-df-z]|^)shop|shopping).*)$ - { - "source": "Google Shopping", - "medium": "", - "campaign": "", - "expected_channel": "Organic Shopping" - }, - { - "source": "some-source", - "medium": "", - "campaign": "some-shopping-campaign", - "expected_channel": "Organic Shopping" - }, - # Organic Social: - # Source matches a regex list of social sites - # OR - # Medium is one of (“social”, “social-network”, “social-media”, “sm”, “social network”, “social media”) - { - "source": "facebook", - "medium": "", - "campaign": "", - "expected_channel": "Organic Social" - }, - { - "source": "some-source", - "medium": "social", - "campaign": "", - "expected_channel": "Organic Social" - }, - # Organic Video: - # Source matches a list of video sites - # OR - # Medium matches regex ^(.*video.*)$ - { - "source": "youtube.com", - "medium": "", - "campaign": "", - "expected_channel": "Organic Video" - }, - { - "source": "some-source", - "medium": "video", - "campaign": "", - "expected_channel": "Organic Video" - }, - # Organic Search: - # Source matches a list of search sites - # OR - # Medium exactly matches organic - { - "source": "bing", - "medium": "", - "campaign": "", - "expected_channel": "Organic Search" - }, - { - "source": "some-source", - "medium": "organic", - "campaign": "", - "expected_channel": "Organic Search" - }, - # Referral: - # Medium is one of ("referral", "app", or "link") - { - "source": "some-source", - "medium": "referral", - "campaign": "", - "expected_channel": "Referral" - }, - # Email: - # Source = email|e-mail|e_mail|e mail - # OR - # Medium = email|e-mail|e_mail|e mail - { - "source": "email", - "medium": "", - "campaign": "", - "expected_channel": "Email" - }, - { - "source": "", - "medium": "e mail", - "campaign": "", - "expected_channel": "Email" - }, - # Affiliates: - # Medium = affiliate - { - "source": "some-source", - "medium": "affiliate", - "campaign": "", - "expected_channel": "Affiliates" - }, - # Audio: - # Medium exactly matches audio - { - "source": "some-source", - "medium": "audio", - "campaign": "", - "expected_channel": "Audio" - }, - # SMS: - # Source exactly matches sms - # OR - # Medium exactly matches sms - { - "source": "sms", - "medium": "", - "campaign": "", - "expected_channel": "SMS" - }, - { - "source": "", - "medium": "sms", - "campaign": "", - "expected_channel": "SMS" - }, - # Mobile Push Notifications: - # Medium ends with "push" - # OR - # Medium contains "mobile" or "notification" - # OR - # Source exactly matches "firebase" - { - "source": "some-source", - "medium": "something-push", - "campaign": "", - "expected_channel": "Mobile Push Notifications" - }, - { - "source": "some-source", - "medium": "mobile-notification", - "campaign": "", - "expected_channel": "Mobile Push Notifications" - }, - { - "source": "firebase", - "medium": "", - "campaign": "", - "expected_channel": "Mobile Push Notifications" - }, - # Unassigned is the value Analytics uses when there are no other channel rules that match the event data. - { - "source": "some-source", - "medium": "some-medium", - "campaign": "some-campaign", - "expected_channel": "Unassigned" - }, -] - -# Generate the input CSV content and the expected CSV content -csv_header = "source,medium,campaign" -expected_header = "default_channel_grouping" - -traffic_input_lines = [csv_header] + [ - f"{row['source']},{row['medium']},{row['campaign']}" for row in traffic_data_with_expected_channels -] - -expected_csv_lines = [expected_header] + [ - row['expected_channel'] for row in traffic_data_with_expected_channels -] - -# Join the lines into a single string for input and expected CSV -traffic_input = "\n".join(traffic_input_lines) -expected_csv = "\n".join(expected_csv_lines) - - -actual = """ -with input as ( - select * from {{ref('traffic_input')}} - left join {{ref('source_category_mapping')}} using (source) -) -select -{{default_channel_grouping('source', 'medium', 'source_category','campaign')}} as default_channel_grouping -from input -""" - -class TestDefaultChannelGrouping(): - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "source_category_mapping.csv": read_file('../seeds/ga4_source_categories.csv'), - "traffic_input.csv": traffic_input, - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "macro_to_test.sql": read_file('../macros/default_channel_grouping.sql'), - } - - def test_mock_run_and_check(self, project): - #breakpoint() - run_dbt(["build"]) - check_relations_equal(project.adapter, ["actual", "expected"]) \ No newline at end of file diff --git a/unit_tests/test_macro_exclude_query_parameters.py b/unit_tests/test_macro_exclude_query_parameters.py deleted file mode 100644 index fb30f7ce..00000000 --- a/unit_tests/test_macro_exclude_query_parameters.py +++ /dev/null @@ -1,52 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -# Define mocks via CSV (seeds) or SQL (models) -urls_to_test_csv = """url -www.website.com/?param_to_exclude=1234 -www.website.com/?param_to_exclude= -www.website.com/?foo=bar¶m_to_exclude=1234 -www.website.com/?foo=bar¶m_to_exclude=1234&another=parameter -www.website.com/?foo=bar¶m_to_exclude=1234&another=parameter&exclude=nope -""".lstrip() - -expected_csv = """url -www.website.com/ -www.website.com/ -www.website.com/?foo=bar -www.website.com/?foo=bar&another=parameter -www.website.com/?foo=bar&another=parameter&exclude=nope -""".lstrip() - -actual = """ -select -{{remove_query_parameters('url', ['param_to_exclude'])}} as url -from {{ref('urls_to_test')}} -""" - -class TestUsersFirstLastEvents(): - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "urls_to_test.csv": urls_to_test_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "macro_to_test.sql": read_file('../macros/url_parsing.sql'), - } - - def test_mock_run_and_check(self, project): - run_dbt(["build"]) - check_relations_equal(project.adapter, ["actual", "expected"]) diff --git a/unit_tests/test_macro_extract_query_parameter_value.py b/unit_tests/test_macro_extract_query_parameter_value.py deleted file mode 100644 index 473b5367..00000000 --- a/unit_tests/test_macro_extract_query_parameter_value.py +++ /dev/null @@ -1,54 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -# Define mocks via CSV (seeds) or SQL (models) -urls_to_test_csv = """url -www.website.com/?param1=A -www.website.com/?param1=A¶m2=B -www.website.com/?param1=A¶m2=B¶m3=C -www.website.com/ -www.website.com/? -""".lstrip() - -expected_csv = """param1,param2,param3 -A,, -A,B, -A,B,C -,, -,, -""".lstrip() - -actual = """ - select - {{ extract_query_parameter_value( 'url' , 'param1' ) }} as param1, - {{ extract_query_parameter_value( 'url' , 'param2' ) }} as param2, - {{ extract_query_parameter_value( 'url' , 'param3' ) }} as param3 - from {{ref('urls_to_test')}} -""" - -class TestUsersFirstLastEvents(): - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "urls_to_test.csv": urls_to_test_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "macro_to_test.sql": read_file('../macros/url_parsing.sql'), - } - - def test_mock_run_and_check(self, project): - run_dbt(["build"]) - check_relations_equal(project.adapter, ["actual", "expected"]) diff --git a/unit_tests/test_stg_Ga4__user_id_mapping.py b/unit_tests/test_stg_Ga4__user_id_mapping.py deleted file mode 100644 index bcb291f1..00000000 --- a/unit_tests/test_stg_Ga4__user_id_mapping.py +++ /dev/null @@ -1,43 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -# Define mocks via CSV (seeds) or SQL (models) -mock_stg_ga4__events_csv = """client_key,user_id,event_timestamp -a1,,100 -a1,A,101 -b1,B,102 -c1,C,103 -c2,C,104 -c2,,105 -d1,,100 -""".lstrip() - -expected_csv = """last_seen_user_id,client_key,last_seen_user_id_timestamp -A,a1,101 -B,b1,102 -C,c1,103 -C,c2,104 -""".lstrip() - -actual = read_file('../models/staging/stg_ga4__user_id_mapping.sql') - -class TestUserIdMapping(): - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "stg_ga4__events.csv": mock_stg_ga4__events_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - def test_mock_run_and_check(self, project): - run_dbt(["build"]) - #breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) diff --git a/unit_tests/test_stg_ga4__derived_session_properties.py b/unit_tests/test_stg_ga4__derived_session_properties.py deleted file mode 100644 index 16c960eb..00000000 --- a/unit_tests/test_stg_ga4__derived_session_properties.py +++ /dev/null @@ -1,74 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -mock_stg_ga4__events_json = """ -{ "session_key": "AAA", "event_timestamp": "1617691790431476", "event_name": "first_visit", "event_params": [{ "key": "my_param", "value": { "string_value": null, "int_value": 1, "float_value": null, "double_value": null }}], "user_properties": [{ "key": "my_property", "value": { "string_value": "value1", "int_value": null, "float_value": null, "double_value": null }}]} -{ "session_key": "AAA", "event_timestamp": "1617691790431477", "event_name": "first_visit", "event_params": [{ "key": "my_param", "value": { "string_value": null, "int_value": 2, "float_value": null, "double_value": null }}]} -{ "session_key": "BBB", "event_timestamp": "1617691790431477", "event_name": "first_visit", "event_params": [{ "key": "my_param", "value": { "string_value": null, "int_value": 1, "float_value": null, "double_value": null }}], "user_properties": [{ "key": "my_property", "value": { "string_value": "value2", "int_value": null, "float_value": null, "double_value": null }}]} -""".lstrip() - -expected_csv = """session_key,my_derived_property,my_derived_property2 -AAA,2,value1 -BBB,1,value2 -""".lstrip() - -models__config_yml = """ -version: 2 -sources: - - name: fixture - schema: "{{ target.schema }}" - tables: - - name: mock_stg_ga4__events_json -""" - -class TestDerivedSessionProperties(): - # Update project name to ga4 so we can call macros with ga4.macro_name - @pytest.fixture(scope="class") - def project_config_update(self): - return { - "name": "ga4" - } - - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "config.yml": models__config_yml, - "stg_ga4__events.sql": "select * from {{source('fixture','mock_stg_ga4__events_json')}}", - "actual.sql": read_file('../models/staging/stg_ga4__derived_session_properties.sql') - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "unnest_key.sql": read_file('../macros/unnest_key.sql'), - } - - def upload_json_fixture(self, project, file_name, json, table_name): - local_file_path = file_name - with open(local_file_path, "w") as outfile: - outfile.write(json) - project.adapter.upload_file( - local_file_path = local_file_path, - database = project.database, - table_schema = project.test_schema, - table_name = table_name, - kwargs = { - "source_format": "NEWLINE_DELIMITED_JSON", - "autodetect":"true" - } - ) - - def test_mock_run_and_check(self, project): - self.upload_json_fixture(project, "source.json", mock_stg_ga4__events_json, "mock_stg_ga4__events_json" ) - run_dbt(["build", "--vars", "derived_session_properties: [{'event_parameter':'my_param','session_property_name':'my_derived_property','value_type':'int_value'},{'user_property':'my_property','session_property_name':'my_derived_property2','value_type':'string_value'}]"]) - #breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) diff --git a/unit_tests/test_stg_ga4__derived_user_properties.py b/unit_tests/test_stg_ga4__derived_user_properties.py deleted file mode 100644 index 2c04c34b..00000000 --- a/unit_tests/test_stg_ga4__derived_user_properties.py +++ /dev/null @@ -1,74 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -mock_stg_ga4__events_json = """ -{ "client_key": "AAA", "event_timestamp": "1617691790431476", "event_name": "first_visit", "event_params": [{ "key": "my_param", "value": { "string_value": null, "int_value": 1, "float_value": null, "double_value": null }}]} -{ "client_key": "AAA", "event_timestamp": "1617691790431477", "event_name": "first_visit", "event_params": [{ "key": "my_param", "value": { "string_value": null, "int_value": 2, "float_value": null, "double_value": null }}]} -{ "client_key": "BBB", "event_timestamp": "1617691790431477", "event_name": "first_visit", "event_params": [{ "key": "my_param", "value": { "string_value": null, "int_value": 1, "float_value": null, "double_value": null }}]} -""".lstrip() - -expected_csv = """client_key,my_derived_property -AAA,2 -BBB,1 -""".lstrip() - -models__config_yml = """ -version: 2 -sources: - - name: fixture - schema: "{{ target.schema }}" - tables: - - name: mock_stg_ga4__events_json -""" - -class TestDerivedUserProperties(): - # Update project name to ga4 so we can call macros with ga4.macro_name - @pytest.fixture(scope="class") - def project_config_update(self): - return { - "name": "ga4" - } - - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "config.yml": models__config_yml, - "stg_ga4__events.sql": "select * from {{source('fixture','mock_stg_ga4__events_json')}}", - "actual.sql": read_file('../models/staging/stg_ga4__derived_user_properties.sql') - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "unnest_key.sql": read_file('../macros/unnest_key.sql'), - } - - def upload_json_fixture(self, project, file_name, json, table_name): - local_file_path = file_name - with open(local_file_path, "w") as outfile: - outfile.write(json) - project.adapter.upload_file( - local_file_path = local_file_path, - database = project.database, - table_schema = project.test_schema, - table_name = table_name, - kwargs = { - "source_format": "NEWLINE_DELIMITED_JSON", - "autodetect":"true" - } - ) - - def test_mock_run_and_check(self, project): - self.upload_json_fixture(project, "source.json", mock_stg_ga4__events_json, "mock_stg_ga4__events_json" ) - run_dbt(["build", "--vars", "derived_user_properties: [{'event_parameter':'my_param','user_property_name':'my_derived_property','value_type':'int_value'}]"]) - #breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) diff --git a/unit_tests/test_stg_ga4__event_to_query_string_params.py b/unit_tests/test_stg_ga4__event_to_query_string_params.py deleted file mode 100644 index ab10eeeb..00000000 --- a/unit_tests/test_stg_ga4__event_to_query_string_params.py +++ /dev/null @@ -1,45 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - - -PARAMS_CSV = """event_key,page_query_string -aaa,param1=value1¶m2=value2 -bbb,param1 -ccc,param1= -""".lstrip() - -EXPECTED_CSV = """event_key,param,value -aaa,param1,value1 -aaa,param2,value2 -bbb,param1, -ccc,param1, -""".lstrip() - -actual = read_file('../models/staging/stg_ga4__event_to_query_string_params.sql').replace( - "ref('stg_ga4__events')", - "ref('params')" -) - - - -class TestEventToQueryStringParams(): - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "params.csv": PARAMS_CSV, - "expected.csv": EXPECTED_CSV, - - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual - } - - def test_mock_run_and_check(self, project): - #self.upload_json_fixture(project, "source.json", SOURCE_JSON, "SOURCE_JSON" ) - run_dbt(["build"]) - check_relations_equal(project.adapter, ["actual", "expected"]) diff --git a/unit_tests/test_stg_ga4__events.example b/unit_tests/test_stg_ga4__events.example deleted file mode 100644 index d0733e26..00000000 --- a/unit_tests/test_stg_ga4__events.example +++ /dev/null @@ -1,45 +0,0 @@ -# This test doesn't quite work because the key columns are of type BYTE, but the JSON uploads the data as STRING. -# Keeping this file for now as an example of using JSON for both the input and expected output - -import pytest -from base_unit_test import BaseUnitTestModel -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -SOURCE_JSON = """ -{ "event_date_dt": "2021-04-06", "event_timestamp": "1617691790431476", "event_name": "first_visit", "event_params": [{ "key": "ga_session_number", "value": { "string_value": null, "int_value": "1", "float_value": null, "double_value": null } }, { "key": "engaged_session_event", "value": { "string_value": null, "int_value": "1", "float_value": null, "double_value": null } }, { "key": "ga_session_id", "value": { "string_value": null, "int_value": "1617691775", "float_value": null, "double_value": null } }, { "key": "page_title", "value": { "string_value": "Velir | Behavior-Driven Testing in Drupal 8", "int_value": null, "float_value": null, "double_value": null } }, { "key": "page_location", "value": { "string_value": "https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8", "int_value": null, "float_value": null, "double_value": null } }, { "key": "session_engaged", "value": { "string_value": null, "int_value": "1", "float_value": null, "double_value": null } }], "event_previous_timestamp": null, "event_value_in_usd": null, "event_bundle_sequence_id": "948327668", "event_server_timestamp_offset": null, "user_id": null, "client_id": "1166526666.1617691776", "privacy_info": null, "user_properties": [], "user_first_touch_timestamp": "1617691790431476", "user_ltv": { "revenue": "0.0", "currency": "USD" }, "device": { "category": "desktop", "mobile_brand_name": null, "mobile_model_name": null, "mobile_marketing_name": null, "mobile_os_hardware_model": null, "operating_system": "Windows", "operating_system_version": "Windows 10", "vendor_id": null, "advertising_id": null, "language": "en-us", "is_limited_ad_tracking": "No", "time_zone_offset_seconds": null, "browser": null, "browser_version": null, "web_info": { "browser": "Chrome", "browser_version": "89.0.4389.114", "hostname": "www.velir.com" } }, "geo": { "continent": "Asia", "country": "Vietnam", "region": "Ho Chi Minh City", "city": "Ho Chi Minh City", "sub_continent": "Southeast Asia", "metro": "(not set)" }, "app_info": null, "traffic_source": { "name": "(direct)", "medium": "(none)", "source": "(direct)" }, "stream_id": "1966637064", "platform": "WEB", "ecommerce": null, "items": [], "ga_session_id": "1617691775", "page_location": "https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8", "ga_session_number": "1", "session_engaged": "1", "page_title": "Velir | Behavior-Driven Testing in Drupal 8", "page_referrer": null, "is_page_view": "0", "is_purchase": "0"} -""".lstrip() -EXPECTED_JSON = """ -{ "event_date_dt": "2021-04-06", "event_timestamp": "1617691790431476", "event_name": "first_visit", "event_params": [{ "key": "ga_session_number", "value": { "string_value": null, "int_value": "1", "float_value": null, "double_value": null } }, { "key": "engaged_session_event", "value": { "string_value": null, "int_value": "1", "float_value": null, "double_value": null } }, { "key": "ga_session_id", "value": { "string_value": null, "int_value": "1617691775", "float_value": null, "double_value": null } }, { "key": "page_title", "value": { "string_value": "Velir | Behavior-Driven Testing in Drupal 8", "int_value": null, "float_value": null, "double_value": null } }, { "key": "page_location", "value": { "string_value": "https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8", "int_value": null, "float_value": null, "double_value": null } }, { "key": "session_engaged", "value": { "string_value": null, "int_value": "1", "float_value": null, "double_value": null } }], "event_previous_timestamp": null, "event_value_in_usd": null, "event_bundle_sequence_id": "948327668", "event_server_timestamp_offset": null, "user_id": null, "client_id": "1166526666.1617691776", "privacy_info": null, "user_properties": [], "user_first_touch_timestamp": "1617691790431476", "user_ltv": { "revenue": "0.0", "currency": "USD" }, "device": { "category": "desktop", "mobile_brand_name": null, "mobile_model_name": null, "mobile_marketing_name": null, "mobile_os_hardware_model": null, "operating_system": "Windows", "operating_system_version": "Windows 10", "vendor_id": null, "advertising_id": null, "language": "en-us", "is_limited_ad_tracking": "No", "time_zone_offset_seconds": null, "browser": null, "browser_version": null, "web_info": { "browser": "Chrome", "browser_version": "89.0.4389.114", "hostname": "www.velir.com" } }, "geo": { "continent": "Asia", "country": "Vietnam", "region": "Ho Chi Minh City", "city": "Ho Chi Minh City", "sub_continent": "Southeast Asia", "metro": "(not set)" }, "app_info": null, "traffic_source": { "name": "(direct)", "medium": "(none)", "source": "(direct)" }, "stream_id": "1966637064", "platform": "WEB", "ecommerce": null, "items": [], "ga_session_id": "1617691775", "page_location": "https://www.velir.com/blog/2016/08/25/behavior-driven-testing-drupal-8", "ga_session_number": "1", "session_engaged": "1", "page_title": "Velir | Behavior-Driven Testing in Drupal 8", "page_referrer": null, "is_page_view": "0", "is_purchase": "0", "session_key": "TAp7hHaymXXA/Way5byPBw\u003d\u003d", "session_event_number": "1", "event_key": "DGb378zSx/aIZs76gM4aTQ\u003d\u003d", "page_hostname": "velir.com", "page_query_string": null} -""".lstrip() - -models__config_yml = """ -version: 2 -sources: - - name: fixture - schema: "{{ target.schema }}" - tables: - - name: SOURCE_JSON - - name: EXPECTED_OUTPUT -""" - -actual = read_file('../models/staging/stg_ga4__events.sql').replace( - "ref('base_ga4__events')", - "source('fixture', 'SOURCE_JSON')" -) - -class TestStgGa4Events(BaseUnitTestModel): - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "config.yml": models__config_yml, - "actual.sql": actual, - "expected.sql": "select * from {{ source('fixture', 'EXPECTED_OUTPUT') }}" - } - - def test_mock_run_and_check(self, project): - self.upload_json_fixture(project, "source.json", SOURCE_JSON, "SOURCE_JSON" ) - self.upload_json_fixture(project, "expected.json", EXPECTED_JSON, "EXPECTED_OUTPUT" ) - run_dbt(["run"]) - breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) diff --git a/unit_tests/test_stg_ga4__events.todo b/unit_tests/test_stg_ga4__events.todo deleted file mode 100644 index a1d76e9c..00000000 --- a/unit_tests/test_stg_ga4__events.todo +++ /dev/null @@ -1,43 +0,0 @@ -# Test test currently fails because the event_key depends on the event_params nested field. Cannot mock that using CSV. - -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -# Define mocks via CSV (seeds) or SQL (models) -mock_base_ga4__events_csv = """user_id,event_name,event_timestamp,client_key,ga_session_id,stream_id,page_location,page_referrer,source,medium,campaign -user_id_1,pageview,12345,client_key_1,ga_session_id_1,stream_id_1,http://www.website.com/?foo=bar,http://www.cnn.com/,google,organic,(organic) -""".lstrip() - -expected_csv = """user_id,event_name,event_timestamp,client_key,ga_session_id,stream_id,source,user_key,session_key,event_key,medium,campaign,original_page_location,original_page_referrer,page_location,page_referrer,page_hostname,page_query_string -user_id_1,pageview,12345,client_key_1,ga_session_id_1,stream_id_1,google,c/nWU/GWhlWiLU0S6R/rwg==,9fDgaCrbd4ieAj1QpcWDjw==,70B/o+ww2nOTa32ASF/ulw==,organic,(organic),http://www.website.com/?foo=bar,http://www.cnn.com/,http://www.website.com/?foo=bar,http://www.cnn.com/,website.com,foo=bar -""" - -actual = read_file('../models/staging/stg_ga4__events.sql') - -class TestStgEvents(): - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "base_ga4__events.csv": mock_base_ga4__events_csv, - "expected.csv": expected_csv - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "macros.sql": read_file('../macros/url_parsing.sql'), - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual - } - - def test_mock_run_and_check(self, project): - run_dbt(["build"]) - breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) diff --git a/unit_tests/test_stg_ga4__page_conversions.py b/unit_tests/test_stg_ga4__page_conversions.py deleted file mode 100644 index 6d3cd7da..00000000 --- a/unit_tests/test_stg_ga4__page_conversions.py +++ /dev/null @@ -1,85 +0,0 @@ -import pytest -from dbt.tests.util import check_relations_equal, read_file, run_dbt - -# Define mocks via CSV (seeds) or SQL (models) -mock_stg_ga4__events_csv = """event_name,page_key -page_view,A -page_view,A -page_view,B -""".lstrip() - -mock_stg_ga4__nonstandard_events_csv = """event_name,page_key -page-view,A -page-view,A -page-view,B -""".lstrip() - -expected_csv = """page_key,page_view_count -A,2 -B,1 -""".lstrip() - -actual = read_file("../models/staging/stg_ga4__page_conversions.sql") - - -class TestPageConversions: - # Update project name to ga4 so we can call macros with ga4.macro_name - @pytest.fixture(scope="class") - def project_config_update(self): - return {"name": "ga4"} - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "valid_column_name.sql": read_file("../macros/valid_column_name.sql"), - } - - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "stg_ga4__events.csv": mock_stg_ga4__events_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - def test_mock_run_and_check(self, project): - run_dbt(["build", "--vars", "conversion_events: ['page_view']"]) - # breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) - - -class TestPageConversionsNonStandardEventName: - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "stg_ga4__events.csv": mock_stg_ga4__nonstandard_events_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "valid_column_name.sql": read_file("../macros/valid_column_name.sql"), - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - def test_mock_run_and_check(self, project): - run_dbt(["build", "--vars", "conversion_events: ['page-view']"]) - # breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) diff --git a/unit_tests/test_stg_ga4__session_conversions_daily.py b/unit_tests/test_stg_ga4__session_conversions_daily.py deleted file mode 100644 index 8ad1e7ae..00000000 --- a/unit_tests/test_stg_ga4__session_conversions_daily.py +++ /dev/null @@ -1,93 +0,0 @@ -import pytest -from dbt.tests.util import check_relations_equal, read_file, run_dbt - -# Define mocks via CSV (seeds) or SQL (models) -mock_stg_ga4__events_csv = """session_key,session_partition_key,event_name,event_date_dt -A,A2022-01-01,page_view,2022-01-01 -A,A2022-01-01,my_conversion,2022-01-01 -A,A2022-01-01,my_conversion,2022-01-01 -B,B2022-01-01,my_conversion,2022-01-01 -C,C2022-01-01,some_other_event,2022-01-01 -A,A2022-01-02,my_conversion,2022-01-02 -""".lstrip() - -mock_stg_ga4__nonstandard_events_csv = """session_key,session_partition_key,event_name,event_date_dt -A,A2022-01-01,page_view,2022-01-01 -A,A2022-01-01,my-conversion,2022-01-01 -A,A2022-01-01,my-conversion,2022-01-01 -B,B2022-01-01,my-conversion,2022-01-01 -C,C2022-01-01,some_other_event,2022-01-01 -A,A2022-01-02,my-conversion,2022-01-02 -""".lstrip() - -expected_csv = """session_key,session_partition_key,session_partition_date,my_conversion_count -A,A2022-01-01,2022-01-01,2 -B,B2022-01-01,2022-01-01,1 -C,C2022-01-01,2022-01-01,0 -A,A2022-01-02,2022-01-02,1 -""".lstrip() - -actual = read_file("../models/staging/stg_ga4__session_conversions_daily.sql") - - -class TestUsersFirstLastEvents: - # Update project name to ga4 so we can call macros with ga4.macro_name - @pytest.fixture(scope="class") - def project_config_update(self): - return {"name": "ga4", "vars": {"static_incremental_days": 3}} - - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "stg_ga4__events.csv": mock_stg_ga4__events_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "valid_column_name.sql": read_file("../macros/valid_column_name.sql"), - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - def test_mock_run_and_check(self, project): - run_dbt(["build", "--vars", "conversion_events: ['my_conversion']"]) - # breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) - - -class TestUsersNonStandardEventName: - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "stg_ga4__events.csv": mock_stg_ga4__nonstandard_events_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "macros" - @pytest.fixture(scope="class") - def macros(self): - return { - "valid_column_name.sql": read_file("../macros/valid_column_name.sql"), - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - def test_mock_run_and_check(self, project): - run_dbt(["build", "--vars", "conversion_events: ['my-conversion']"]) - # breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"]) diff --git a/unit_tests/test_stg_ga4__users_first_last_events.py b/unit_tests/test_stg_ga4__users_first_last_events.py deleted file mode 100644 index 7880aaf3..00000000 --- a/unit_tests/test_stg_ga4__users_first_last_events.py +++ /dev/null @@ -1,35 +0,0 @@ -import pytest -from dbt.tests.util import read_file,check_relations_equal,run_dbt - -# Define mocks via CSV (seeds) or SQL (models) -mock_stg_ga4__events_csv = """stream_id,client_key,event_key,event_timestamp,geo_continent,geo_country,geo_region,geo_city,geo_sub_continent,geo_metro,device_category,device_mobile_brand_name,device_mobile_model_name,device_mobile_marketing_name,device_mobile_os_hardware_model,device_operating_system,device_operating_system_version,device_vendor_id,device_advertising_id,device_language,device_is_limited_ad_tracking,device_time_zone_offset_seconds,device_browser,device_browser_version,device_web_info_browser,device_web_info_browser_version,device_web_info_hostname,user_campaign,user_medium,user_source -1,IX+OyYJBgjwqML19GB/XIQ==,H06dLW6OhNJJ6SoEPFsSyg==,1661339279816517,Asia,India,Maharashtra,Mumbai,Southern Asia,(not set),desktop,Google,Chrome,,,Windows,Windows 10,,,en-us,No,,,,Chrome,104.0.0.0,www.velir.com,,, -1,IX+OyYJBgjwqML19GB/XIQ==,gt1SoAtrxDv33uDGwVeMVA==,1661339279816518,USA,Massachusetts,Maharashtra,Mumbai,Southern Asia,(not set),mobile,Google,Chrome,,,Windows,Windows 10,,,en-us,No,,,,Chrome,104.0.0.0,www.velir.com,,, -""".lstrip() - -expected_csv = """client_key,first_event,last_event,stream_id,first_geo_continent,first_geo_country,first_geo_region,first_geo_city,first_geo_sub_continent,first_geo_metro,first_device_category,first_device_mobile_brand_name,first_device_mobile_model_name,first_device_mobile_marketing_name,first_device_mobile_os_hardware_model,first_device_operating_system,first_device_operating_system_version,first_device_vendor_id,first_device_advertising_id,first_device_language,first_device_is_limited_ad_tracking,first_device_time_zone_offset_seconds,first_device_browser,first_device_browser_version,first_device_web_info_browser,first_device_web_info_browser_version,first_device_web_info_hostname,first_user_campaign,first_user_medium,first_user_source,last_geo_continent,last_geo_country,last_geo_region,last_geo_city,last_geo_sub_continent,last_geo_metro,last_device_category,last_device_mobile_brand_name,last_device_mobile_model_name,last_device_mobile_marketing_name,last_device_mobile_os_hardware_model,last_device_operating_system,last_device_operating_system_version,last_device_vendor_id,last_device_advertising_id,last_device_language,last_device_is_limited_ad_tracking,last_device_time_zone_offset_seconds,last_device_browser,last_device_browser_version,last_device_web_info_browser,last_device_web_info_browser_version,last_device_web_info_hostname,last_user_campaign,last_user_medium,last_user_source -IX+OyYJBgjwqML19GB/XIQ==,H06dLW6OhNJJ6SoEPFsSyg==,gt1SoAtrxDv33uDGwVeMVA==,1,Asia,India,Maharashtra,Mumbai,Southern Asia,(not set),desktop,Google,Chrome,,,Windows,Windows 10,,,en-us,No,,,,Chrome,104.0.0.0,www.velir.com,,,,USA,Massachusetts,Maharashtra,Mumbai,Southern Asia,(not set),mobile,Google,Chrome,,,Windows,Windows 10,,,en-us,No,,,,Chrome,104.0.0.0,www.velir.com,,, -""".lstrip() - -actual = read_file('../models/staging/stg_ga4__client_key_first_last_events.sql') - -class TestUsersFirstLastEvents(): - # everything that goes in the "seeds" directory (= CSV format) - @pytest.fixture(scope="class") - def seeds(self): - return { - "stg_ga4__events.csv": mock_stg_ga4__events_csv, - "expected.csv": expected_csv, - } - - # everything that goes in the "models" directory (= SQL) - @pytest.fixture(scope="class") - def models(self): - return { - "actual.sql": actual, - } - - def test_mock_run_and_check(self, project): - run_dbt(["build"]) - #breakpoint() - check_relations_equal(project.adapter, ["actual", "expected"])