Skip to content

Commit

Permalink
DENG - Added clients daily aggregate by default browser lifecycle sta…
Browse files Browse the repository at this point in the history
…ge table (#6736)

* Added clients last seen aggregate tables

* Fixed schema description

* Fixed profile column

* Fixed formatting

* Changed to use clients daily

* Updated agg table name

* Fixed formatting
  • Loading branch information
wwyc authored Dec 31, 2024
1 parent 62d941e commit 67fbf88
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.telemetry.clients_daily_agg_by_default_browser_lifecycle_stage`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.telemetry_derived.clients_daily_agg_by_default_browser_lifecycle_stage_v1`
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
friendly_name: Clients Daily Aggregates By Default Browser Lifecycle Stage
description: |-
Aggregate table for clients daily by default browser lifecycle stage
owners:
- [email protected]
labels:
incremental: true
owner1: [email protected]
table_type: aggregate
shredder_mitigation: true
scheduling:
dag_name: bqetl_default_browser_aggregates
bigquery:
time_partitioning:
type: day
field: submission_date
require_partition_filter: true
expiration_days: null
range_partitioning: null
clustering:
fields:
- normalized_os_version
references: {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
SELECT
submission_date,
is_default_browser,
normalized_os_version,
os,
normalized_channel,
country,
CASE
WHEN first_seen_date = submission_date
THEN 'new_profile'
WHEN DATE_DIFF(submission_date, first_seen_date, DAY)
BETWEEN 1
AND 27
THEN 'repeat_user'
WHEN DATE_DIFF(submission_date, first_seen_date, DAY) >= 28
THEN 'existing_user'
ELSE 'Unknown'
END AS lifecycle_stage,
COUNT(*) AS row_count,
FROM
`moz-fx-data-shared-prod.telemetry.clients_daily`
WHERE
submission_date = @submission_date
GROUP BY
submission_date,
is_default_browser,
os,
normalized_os_version,
normalized_channel,
country,
lifecycle_stage
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
fields:
- name: submission_date
type: DATE
mode: NULLABLE
description: Submission Date
- name: is_default_browser
type: BOOLEAN
mode: NULLABLE
description: Is Default Browser Indicator
- name: os
type: STRING
mode: NULLABLE
description: Operating System
- name: normalized_os_version
type: STRING
mode: NULLABLE
description: Normalized OS Version
- name: normalized_channel
type: STRING
mode: NULLABLE
description: Normalized Channel
- name: country
type: STRING
mode: NULLABLE
description: country
- name: lifecycle_stage
type: STRING
mode: NULLABLE
description: Indicates the life stage of a client profile
- name: row_count
type: INTEGER
mode: NULLABLE
description: The total number of rows aggregated.

1 comment on commit 67fbf88

@dataops-ci-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Integration report for "DENG - Added clients daily aggregate by default browser lifecycle stage table (#6736)"

sql.diff

Click to expand!
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_default_browser_aggregates.py /tmp/workspace/generated-sql/dags/bqetl_default_browser_aggregates.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_default_browser_aggregates.py	2024-12-31 16:25:31.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_default_browser_aggregates.py	2024-12-31 16:27:02.000000000 +0000
@@ -64,6 +64,19 @@
         pool="DATA_ENG_EXTERNALTASKSENSOR",
     )
 
+    wait_for_telemetry_derived__clients_daily_joined__v1 = ExternalTaskSensor(
+        task_id="wait_for_telemetry_derived__clients_daily_joined__v1",
+        external_dag_id="bqetl_main_summary",
+        external_task_id="telemetry_derived__clients_daily_joined__v1",
+        execution_delta=datetime.timedelta(seconds=72000),
+        check_existence=True,
+        mode="reschedule",
+        poke_interval=datetime.timedelta(minutes=5),
+        allowed_states=ALLOWED_STATES,
+        failed_states=FAILED_STATES,
+        pool="DATA_ENG_EXTERNALTASKSENSOR",
+    )
+
     default_browser_agent_derived__default_browser_agg__v1 = bigquery_etl_query(
         task_id="default_browser_agent_derived__default_browser_agg__v1",
         destination_table="default_browser_agg_v1",
@@ -97,6 +110,17 @@
         depends_on_past=False,
     )
 
+    telemetry_derived__clients_daily_agg_by_default_browser_lifecycle_stage__v1 = bigquery_etl_query(
+        task_id="telemetry_derived__clients_daily_agg_by_default_browser_lifecycle_stage__v1",
+        destination_table="clients_daily_agg_by_default_browser_lifecycle_stage_v1",
+        dataset_id="telemetry_derived",
+        project_id="moz-fx-data-shared-prod",
+        owner="[email protected]",
+        email=["[email protected]", "[email protected]"],
+        date_partition_parameter="submission_date",
+        depends_on_past=False,
+    )
+
     default_browser_agent_derived__default_browser_agg__v1.set_upstream(
         wait_for_copy_deduplicate_all
     )
@@ -108,3 +132,7 @@
     firefox_desktop_background_defaultagent_derived__default_agent_agg__v1.set_upstream(
         wait_for_copy_deduplicate_all
     )
+
+    telemetry_derived__clients_daily_agg_by_default_browser_lifecycle_stage__v1.set_upstream(
+        wait_for_telemetry_derived__clients_daily_joined__v1
+    )
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py /tmp/workspace/generated-sql/dags/bqetl_main_summary.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_main_summary.py	2024-12-31 16:25:31.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_main_summary.py	2024-12-31 16:26:56.000000000 +0000
@@ -453,6 +453,13 @@
         )
 
         ExternalTaskMarker(
+            task_id="bqetl_default_browser_aggregates__wait_for_telemetry_derived__clients_daily_joined__v1",
+            external_dag_id="bqetl_default_browser_aggregates",
+            external_task_id="wait_for_telemetry_derived__clients_daily_joined__v1",
+            execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=14400)).isoformat() }}",
+        )
+
+        ExternalTaskMarker(
             task_id="bqetl_desktop_retention_model__wait_for_telemetry_derived__clients_daily_joined__v1",
             external_dag_id="bqetl_desktop_retention_model",
             external_task_id="wait_for_telemetry_derived__clients_daily_joined__v1",
Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry: clients_daily_agg_by_default_browser_lifecycle_stage
Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived: clients_daily_agg_by_default_browser_lifecycle_stage_v1
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/apple_ads_external/ios_app_campaign_stats_v1/bigconfig.yml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/apple_ads_external/ios_app_campaign_stats_v1/bigconfig.yml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/apple_ads_external/ios_app_campaign_stats_v1/bigconfig.yml	2024-12-31 16:19:53.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/apple_ads_external/ios_app_campaign_stats_v1/bigconfig.yml	2024-12-31 16:23:42.000000000 +0000
@@ -1,7 +1,6 @@
 type: BIGCONFIG_FILE
-
 tag_deployments:
-  - collection:
+- collection:
       name: Operational Checks
       notification_channels:
         - slack: '#de-bigeye-triage'
@@ -24,10 +23,10 @@
         metrics:
           - saved_metric_id: is_not_null
             lookback:
-              lookback_type: DATA_TIME
               lookback_window:
                 interval_type: DAYS
                 interval_value: 28
+        lookback_type: DATA_TIME
             rct_overrides:
               - date
       - column_selectors:
@@ -35,10 +34,10 @@
         metrics:
           - saved_metric_id: is_2_char_len
             lookback:
-              lookback_type: DATA_TIME
               lookback_window:
                 interval_type: DAYS
                 interval_value: 28
+        lookback_type: DATA_TIME
             rct_overrides:
               - date
       - column_selectors:
@@ -46,17 +45,17 @@
         metrics:
           - saved_metric_id: volume
             lookback:
-              lookback_type: DATA_TIME
               lookback_window:
                 interval_type: DAYS
                 interval_value: 28
+        lookback_type: DATA_TIME
             rct_overrides:
               - date
           - saved_metric_id: freshness
             lookback:
-              lookback_type: DATA_TIME
               lookback_window:
                 interval_type: DAYS
                 interval_value: 28
+        lookback_type: DATA_TIME
             rct_overrides:
               - date
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/android_app_campaign_stats_v1/bigconfig.yml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/android_app_campaign_stats_v1/bigconfig.yml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/android_app_campaign_stats_v1/bigconfig.yml	2024-12-31 16:19:53.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_ads_derived/android_app_campaign_stats_v1/bigconfig.yml	2024-12-31 16:23:38.000000000 +0000
@@ -1,7 +1,6 @@
 type: BIGCONFIG_FILE
-
 tag_deployments:
-  - collection:
+- collection:
       name: Operational Checks
       notification_channels:
         - slack: '#de-bigeye-triage'
@@ -23,10 +22,10 @@
         metrics:
           - saved_metric_id: is_not_null
             lookback:
-              lookback_type: DATA_TIME
               lookback_window:
                 interval_type: DAYS
                 interval_value: 28
+        lookback_type: DATA_TIME
             rct_overrides:
               - date
       - column_selectors:
@@ -34,10 +33,10 @@
         metrics:
           - saved_metric_id: is_2_char_len
             lookback:
-              lookback_type: DATA_TIME
               lookback_window:
                 interval_type: DAYS
                 interval_value: 28
+        lookback_type: DATA_TIME
             rct_overrides:
               - date
       - column_selectors:
@@ -45,17 +44,17 @@
         metrics:
           - saved_metric_id: volume
             lookback:
-              lookback_type: DATA_TIME
               lookback_window:
                 interval_type: DAYS
                 interval_value: 28
+        lookback_type: DATA_TIME
             rct_overrides:
               - date
           - saved_metric_id: freshness
             lookback:
-              lookback_type: DATA_TIME
               lookback_window:
                 interval_type: DAYS
                 interval_value: 28
+        lookback_type: DATA_TIME
             rct_overrides:
               - date
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry/clients_daily_agg_by_default_browser_lifecycle_stage/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry/clients_daily_agg_by_default_browser_lifecycle_stage/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry/clients_daily_agg_by_default_browser_lifecycle_stage/metadata.yaml	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry/clients_daily_agg_by_default_browser_lifecycle_stage/metadata.yaml	2024-12-31 16:22:08.000000000 +0000
@@ -0,0 +1,14 @@
+friendly_name: Clients Daily Agg By Default Browser Lifecycle Stage
+description: |-
+  Please provide a description for the query
+owners: []
+labels: {}
+bigquery: null
+workgroup_access:
+- role: roles/bigquery.dataViewer
+  members:
+  - workgroup:dataops-managed/taar
+  - workgroup:mozilla-confidential
+references:
+  view.sql:
+  - moz-fx-data-shared-prod.telemetry_derived.clients_daily_agg_by_default_browser_lifecycle_stage_v1
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry/clients_daily_agg_by_default_browser_lifecycle_stage/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry/clients_daily_agg_by_default_browser_lifecycle_stage/view.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry/clients_daily_agg_by_default_browser_lifecycle_stage/view.sql	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry/clients_daily_agg_by_default_browser_lifecycle_stage/view.sql	2024-12-31 16:19:57.000000000 +0000
@@ -0,0 +1,7 @@
+CREATE OR REPLACE VIEW
+  `moz-fx-data-shared-prod.telemetry.clients_daily_agg_by_default_browser_lifecycle_stage`
+AS
+SELECT
+  *
+FROM
+  `moz-fx-data-shared-prod.telemetry_derived.clients_daily_agg_by_default_browser_lifecycle_stage_v1`
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_agg_by_default_browser_lifecycle_stage_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_agg_by_default_browser_lifecycle_stage_v1/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_agg_by_default_browser_lifecycle_stage_v1/metadata.yaml	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_agg_by_default_browser_lifecycle_stage_v1/metadata.yaml	2024-12-31 16:22:10.000000000 +0000
@@ -0,0 +1,28 @@
+friendly_name: Clients Daily Aggregates By Default Browser Lifecycle Stage
+description: |-
+  Aggregate table for clients daily by default browser lifecycle stage
+owners:
+- [email protected]
+labels:
+  incremental: true
+  owner1: wichan
+  table_type: aggregate
+  shredder_mitigation: true
+  dag: bqetl_default_browser_aggregates
+scheduling:
+  dag_name: bqetl_default_browser_aggregates
+bigquery:
+  time_partitioning:
+    type: day
+    field: submission_date
+    require_partition_filter: true
+    expiration_days: null
+  range_partitioning: null
+  clustering:
+    fields:
+    - normalized_os_version
+workgroup_access:
+- role: roles/bigquery.dataViewer
+  members:
+  - workgroup:mozilla-confidential
+references: {}
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_agg_by_default_browser_lifecycle_stage_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_agg_by_default_browser_lifecycle_stage_v1/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_agg_by_default_browser_lifecycle_stage_v1/query.sql	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_agg_by_default_browser_lifecycle_stage_v1/query.sql	2024-12-31 16:19:57.000000000 +0000
@@ -0,0 +1,31 @@
+SELECT
+  submission_date,
+  is_default_browser,
+  normalized_os_version,
+  os,
+  normalized_channel,
+  country,
+  CASE
+    WHEN first_seen_date = submission_date
+      THEN 'new_profile'
+    WHEN DATE_DIFF(submission_date, first_seen_date, DAY)
+      BETWEEN 1
+      AND 27
+      THEN 'repeat_user'
+    WHEN DATE_DIFF(submission_date, first_seen_date, DAY) >= 28
+      THEN 'existing_user'
+    ELSE 'Unknown'
+  END AS lifecycle_stage,
+  COUNT(*) AS row_count,
+FROM
+  `moz-fx-data-shared-prod.telemetry.clients_daily`
+WHERE
+  submission_date = @submission_date
+GROUP BY
+  submission_date,
+  is_default_browser,
+  os,
+  normalized_os_version,
+  normalized_channel,
+  country,
+  lifecycle_stage
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_agg_by_default_browser_lifecycle_stage_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_agg_by_default_browser_lifecycle_stage_v1/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_agg_by_default_browser_lifecycle_stage_v1/schema.yaml	1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_agg_by_default_browser_lifecycle_stage_v1/schema.yaml	2024-12-31 16:19:57.000000000 +0000
@@ -0,0 +1,33 @@
+fields:
+- name: submission_date
+  type: DATE
+  mode: NULLABLE
+  description: Submission Date
+- name: is_default_browser
+  type: BOOLEAN
+  mode: NULLABLE
+  description: Is Default Browser Indicator
+- name: os
+  type: STRING
+  mode: NULLABLE
+  description: Operating System
+- name: normalized_os_version
+  type: STRING
+  mode: NULLABLE
+  description: Normalized OS Version
+- name: normalized_channel
+  type: STRING
+  mode: NULLABLE
+  description: Normalized Channel
+- name: country
+  type: STRING
+  mode: NULLABLE
+  description: country
+- name: lifecycle_stage
+  type: STRING
+  mode: NULLABLE
+  description: Indicates the life stage of a client profile
+- name: row_count
+  type: INTEGER
+  mode: NULLABLE
+  description: The total number of rows aggregated.

Link to full diff

Please sign in to comment.