From 69f07b3ccd3e22193419a22ecfa28a206fe9c9d0 Mon Sep 17 00:00:00 2001 From: pnadolny13 Date: Thu, 20 Jul 2023 16:40:45 -0400 Subject: [PATCH] use schedules data from the deployments table --- .../telemetry/base/cloud_executions_base.sql | 4 +- .../models/staging/dynamodb/schema.yml | 3 + .../stg_dynamodb__project_deployments.sql | 56 +++----------- ...stg_dynamodb__project_deployments_base.sql | 47 ++++++++++++ .../stg_dynamodb__project_schedules_table.sql | 73 +++++-------------- 5 files changed, 81 insertions(+), 102 deletions(-) create mode 100644 data/transform/models/staging/dynamodb/stg_dynamodb__project_deployments_base.sql diff --git a/data/transform/models/marts/telemetry/base/cloud_executions_base.sql b/data/transform/models/marts/telemetry/base/cloud_executions_base.sql index bedfbbb7..e9046ca9 100644 --- a/data/transform/models/marts/telemetry/base/cloud_executions_base.sql +++ b/data/transform/models/marts/telemetry/base/cloud_executions_base.sql @@ -27,8 +27,8 @@ SELECT project_dim.project_lifespan_days AS oss_project_lifespan_days, project_dim.project_first_event_at AS oss_project_first_event_at, project_dim.project_last_event_at AS oss_project_last_event_at, - stg_dynamodb__project_schedules_table.interval AS schedule_interval, - stg_dynamodb__project_schedules_table.is_enabled AS schedule_is_enabled, + stg_dynamodb__project_schedules_table.schedule_interval, + stg_dynamodb__project_schedules_table.schedule_is_enabled, stg_dynamodb__workload_metadata_table.started_ts, stg_dynamodb__workload_metadata_table.finished_ts, stg_dynamodb__workload_metadata_table.command_text_hash, diff --git a/data/transform/models/staging/dynamodb/schema.yml b/data/transform/models/staging/dynamodb/schema.yml index 5bc4619d..d6b34337 100644 --- a/data/transform/models/staging/dynamodb/schema.yml +++ b/data/transform/models/staging/dynamodb/schema.yml @@ -46,6 +46,9 @@ models: - name: cloud_schedule_name_hash tests: - not_null + - name: deployment_surrogate_key + tests: + - not_null - name: stg_dynamodb__projects_table columns: - name: project_surrogate_key diff --git a/data/transform/models/staging/dynamodb/stg_dynamodb__project_deployments.sql b/data/transform/models/staging/dynamodb/stg_dynamodb__project_deployments.sql index 6af95eb6..999df250 100644 --- a/data/transform/models/staging/dynamodb/stg_dynamodb__project_deployments.sql +++ b/data/transform/models/staging/dynamodb/stg_dynamodb__project_deployments.sql @@ -1,46 +1,10 @@ -WITH source AS ( - - SELECT - *, - SPLIT_PART( - "TENANT_RESOURCE_KEY::PROJECT_ID", '::', 1 -- noqa: RF05 - ) AS tenant_resource_key, - SPLIT_PART( - "TENANT_RESOURCE_KEY::PROJECT_ID", '::', 2 -- noqa: RF05 - ) AS cloud_project_id - FROM {{ source('tap_dynamodb', 'project_deployments_table') }} -), - -clean_source AS ( - SELECT - *, - {{ dbt_utils.surrogate_key( - ['tenant_resource_key', 'cloud_project_id', 'deployment_name'] - ) }} AS deployment_surrogate_key, - ROW_NUMBER() OVER ( - PARTITION BY deployment_surrogate_key - ORDER BY CAST(_sdc_batched_at AS TIMESTAMP_NTZ) DESC - ) AS row_num - FROM source -), - -renamed AS ( - - SELECT - deployment_surrogate_key, - tenant_resource_key, - cloud_project_id, - git_rev, - git_rev_hash, - SHA2_HEX(deployment_name) AS cloud_deployment_name_hash, - SHA2_HEX(environment_name) AS cloud_environment_name_hash, - CAST( - last_deployed_timestamp AS TIMESTAMP_NTZ - ) AS last_deployed_timestamp - FROM clean_source - WHERE row_num = 1 - -) - -SELECT * -FROM renamed +SELECT + deployment_surrogate_key, + tenant_resource_key, + cloud_project_id, + git_rev, + git_rev_hash, + cloud_deployment_name_hash, + cloud_environment_name_hash, + last_deployed_timestamp +FROM {{ ref('stg_dynamodb__project_deployments_base') }} diff --git a/data/transform/models/staging/dynamodb/stg_dynamodb__project_deployments_base.sql b/data/transform/models/staging/dynamodb/stg_dynamodb__project_deployments_base.sql new file mode 100644 index 00000000..62281aa6 --- /dev/null +++ b/data/transform/models/staging/dynamodb/stg_dynamodb__project_deployments_base.sql @@ -0,0 +1,47 @@ +WITH source AS ( + + SELECT + *, + SPLIT_PART( + "TENANT_RESOURCE_KEY::PROJECT_ID", '::', 1 -- noqa: RF05 + ) AS tenant_resource_key, + SPLIT_PART( + "TENANT_RESOURCE_KEY::PROJECT_ID", '::', 2 -- noqa: RF05 + ) AS cloud_project_id + FROM {{ source('tap_dynamodb', 'project_deployments_table') }} +), + +clean_source AS ( + SELECT + *, + {{ dbt_utils.surrogate_key( + ['tenant_resource_key', 'cloud_project_id', 'deployment_name'] + ) }} AS deployment_surrogate_key, + ROW_NUMBER() OVER ( + PARTITION BY deployment_surrogate_key + ORDER BY CAST(_sdc_batched_at AS TIMESTAMP_NTZ) DESC + ) AS row_num + FROM source +), + +renamed AS ( + + SELECT + deployment_surrogate_key, + tenant_resource_key, + cloud_project_id, + git_rev, + git_rev_hash, + schedules AS schedules_obj, + SHA2_HEX(deployment_name) AS cloud_deployment_name_hash, + SHA2_HEX(environment_name) AS cloud_environment_name_hash, + CAST( + last_deployed_timestamp AS TIMESTAMP_NTZ + ) AS last_deployed_timestamp + FROM clean_source + WHERE row_num = 1 + +) + +SELECT * +FROM renamed diff --git a/data/transform/models/staging/dynamodb/stg_dynamodb__project_schedules_table.sql b/data/transform/models/staging/dynamodb/stg_dynamodb__project_schedules_table.sql index 4c18e504..a4a3f742 100644 --- a/data/transform/models/staging/dynamodb/stg_dynamodb__project_schedules_table.sql +++ b/data/transform/models/staging/dynamodb/stg_dynamodb__project_schedules_table.sql @@ -1,54 +1,19 @@ -WITH source AS ( - - SELECT - *, - SPLIT_PART( - "DEPLOYMENT_NAME::SCHEDULE_NAME", '::', 1 -- noqa: RF05 - ) AS cloud_deployment_name, - REPLACE( - SPLIT_PART( - "DEPLOYMENT_NAME::SCHEDULE_NAME", -- noqa: RF05 - '::', - 2 - ), '-', '_' - ) AS cloud_schedule_name, - SPLIT_PART( - "TENANT_RESOURCE_KEY::PROJECT_ID", '::', 1 -- noqa: RF05 - ) AS tenant_resource_key, - SPLIT_PART( - "TENANT_RESOURCE_KEY::PROJECT_ID", '::', 2 -- noqa: RF05 - ) AS cloud_project_id - FROM {{ source('tap_dynamodb', 'project_schedules_table') }} -), - -clean_source AS ( - SELECT - *, - {{ dbt_utils.surrogate_key( - ['tenant_resource_key', 'cloud_project_id', 'cloud_deployment_name', 'cloud_schedule_name'] - ) }} AS schedule_surrogate_key, - ROW_NUMBER() OVER ( - PARTITION BY schedule_surrogate_key - ORDER BY CAST(_sdc_batched_at AS TIMESTAMP_NTZ) DESC - ) AS row_num - FROM source -), - -renamed AS ( - - SELECT - schedule_surrogate_key, - interval, - enabled AS is_enabled, - tenant_resource_key, - cloud_project_id, - eventbridge_name, - SHA2_HEX(cloud_deployment_name) AS cloud_deployment_name_hash, - SHA2_HEX(cloud_schedule_name) AS cloud_schedule_name_hash - FROM clean_source - WHERE row_num = 1 - -) - -SELECT * -FROM renamed +SELECT + {{ dbt_utils.surrogate_key( + [ + 'stg_dynamodb__project_deployments_base.deployment_surrogate_key', + 'CAST(schedule_elem.value:schedule_name AS STRING)' + ] + ) }} AS schedule_surrogate_key, + stg_dynamodb__project_deployments_base.deployment_surrogate_key, + CAST(schedule_elem.value:interval AS STRING) AS schedule_interval, + CAST(schedule_elem.value:enabled AS BOOLEAN) AS schedule_is_enabled, + stg_dynamodb__project_deployments_base.tenant_resource_key, + stg_dynamodb__project_deployments_base.cloud_project_id, + CAST(schedule_elem.value:eventbridge_name AS STRING) AS eventbridge_name, + stg_dynamodb__project_deployments_base.cloud_deployment_name_hash, + SHA2_HEX( + CAST(schedule_elem.value:schedule_name AS STRING) + ) AS cloud_schedule_name_hash +FROM {{ ref('stg_dynamodb__project_deployments_base') }}, + LATERAL FLATTEN(input => schedules_obj) AS schedule_elem