Skip to content

Commit

Permalink
refactor: use dynamodb schedules from the deployments table (#668)
Browse files Browse the repository at this point in the history
  • Loading branch information
pnadolny13 authored Jul 20, 2023
1 parent 02c5443 commit d79519a
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 102 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ SELECT
project_dim.project_lifespan_days AS oss_project_lifespan_days,
project_dim.project_first_event_at AS oss_project_first_event_at,
project_dim.project_last_event_at AS oss_project_last_event_at,
stg_dynamodb__project_schedules_table.interval AS schedule_interval,
stg_dynamodb__project_schedules_table.is_enabled AS schedule_is_enabled,
stg_dynamodb__project_schedules_table.schedule_interval,
stg_dynamodb__project_schedules_table.schedule_is_enabled,
stg_dynamodb__workload_metadata_table.started_ts,
stg_dynamodb__workload_metadata_table.finished_ts,
stg_dynamodb__workload_metadata_table.command_text_hash,
Expand Down
3 changes: 3 additions & 0 deletions data/transform/models/staging/dynamodb/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ models:
- name: cloud_schedule_name_hash
tests:
- not_null
- name: deployment_surrogate_key
tests:
- not_null
- name: stg_dynamodb__projects_table
columns:
- name: project_surrogate_key
Expand Down
Original file line number Diff line number Diff line change
@@ -1,46 +1,10 @@
WITH source AS (

SELECT
*,
SPLIT_PART(
"TENANT_RESOURCE_KEY::PROJECT_ID", '::', 1 -- noqa: RF05
) AS tenant_resource_key,
SPLIT_PART(
"TENANT_RESOURCE_KEY::PROJECT_ID", '::', 2 -- noqa: RF05
) AS cloud_project_id
FROM {{ source('tap_dynamodb', 'project_deployments_table') }}
),

clean_source AS (
SELECT
*,
{{ dbt_utils.surrogate_key(
['tenant_resource_key', 'cloud_project_id', 'deployment_name']
) }} AS deployment_surrogate_key,
ROW_NUMBER() OVER (
PARTITION BY deployment_surrogate_key
ORDER BY CAST(_sdc_batched_at AS TIMESTAMP_NTZ) DESC
) AS row_num
FROM source
),

renamed AS (

SELECT
deployment_surrogate_key,
tenant_resource_key,
cloud_project_id,
git_rev,
git_rev_hash,
SHA2_HEX(deployment_name) AS cloud_deployment_name_hash,
SHA2_HEX(environment_name) AS cloud_environment_name_hash,
CAST(
last_deployed_timestamp AS TIMESTAMP_NTZ
) AS last_deployed_timestamp
FROM clean_source
WHERE row_num = 1

)

SELECT *
FROM renamed
SELECT
deployment_surrogate_key,
tenant_resource_key,
cloud_project_id,
git_rev,
git_rev_hash,
cloud_deployment_name_hash,
cloud_environment_name_hash,
last_deployed_timestamp
FROM {{ ref('stg_dynamodb__project_deployments_base') }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
WITH source AS (

SELECT
*,
SPLIT_PART(
"TENANT_RESOURCE_KEY::PROJECT_ID", '::', 1 -- noqa: RF05
) AS tenant_resource_key,
SPLIT_PART(
"TENANT_RESOURCE_KEY::PROJECT_ID", '::', 2 -- noqa: RF05
) AS cloud_project_id
FROM {{ source('tap_dynamodb', 'project_deployments_table') }}
),

clean_source AS (
SELECT
*,
{{ dbt_utils.surrogate_key(
['tenant_resource_key', 'cloud_project_id', 'deployment_name']
) }} AS deployment_surrogate_key,
ROW_NUMBER() OVER (
PARTITION BY deployment_surrogate_key
ORDER BY CAST(_sdc_batched_at AS TIMESTAMP_NTZ) DESC
) AS row_num
FROM source
),

renamed AS (

SELECT
deployment_surrogate_key,
tenant_resource_key,
cloud_project_id,
git_rev,
git_rev_hash,
schedules AS schedules_obj,
SHA2_HEX(deployment_name) AS cloud_deployment_name_hash,
SHA2_HEX(environment_name) AS cloud_environment_name_hash,
CAST(
last_deployed_timestamp AS TIMESTAMP_NTZ
) AS last_deployed_timestamp
FROM clean_source
WHERE row_num = 1

)

SELECT *
FROM renamed
Original file line number Diff line number Diff line change
@@ -1,54 +1,19 @@
WITH source AS (

SELECT
*,
SPLIT_PART(
"DEPLOYMENT_NAME::SCHEDULE_NAME", '::', 1 -- noqa: RF05
) AS cloud_deployment_name,
REPLACE(
SPLIT_PART(
"DEPLOYMENT_NAME::SCHEDULE_NAME", -- noqa: RF05
'::',
2
), '-', '_'
) AS cloud_schedule_name,
SPLIT_PART(
"TENANT_RESOURCE_KEY::PROJECT_ID", '::', 1 -- noqa: RF05
) AS tenant_resource_key,
SPLIT_PART(
"TENANT_RESOURCE_KEY::PROJECT_ID", '::', 2 -- noqa: RF05
) AS cloud_project_id
FROM {{ source('tap_dynamodb', 'project_schedules_table') }}
),

clean_source AS (
SELECT
*,
{{ dbt_utils.surrogate_key(
['tenant_resource_key', 'cloud_project_id', 'cloud_deployment_name', 'cloud_schedule_name']
) }} AS schedule_surrogate_key,
ROW_NUMBER() OVER (
PARTITION BY schedule_surrogate_key
ORDER BY CAST(_sdc_batched_at AS TIMESTAMP_NTZ) DESC
) AS row_num
FROM source
),

renamed AS (

SELECT
schedule_surrogate_key,
interval,
enabled AS is_enabled,
tenant_resource_key,
cloud_project_id,
eventbridge_name,
SHA2_HEX(cloud_deployment_name) AS cloud_deployment_name_hash,
SHA2_HEX(cloud_schedule_name) AS cloud_schedule_name_hash
FROM clean_source
WHERE row_num = 1

)

SELECT *
FROM renamed
SELECT
{{ dbt_utils.surrogate_key(
[
'stg_dynamodb__project_deployments_base.deployment_surrogate_key',
'CAST(schedule_elem.value:schedule_name AS STRING)'
]
) }} AS schedule_surrogate_key,
stg_dynamodb__project_deployments_base.deployment_surrogate_key,
CAST(schedule_elem.value:interval AS STRING) AS schedule_interval,
CAST(schedule_elem.value:enabled AS BOOLEAN) AS schedule_is_enabled,
stg_dynamodb__project_deployments_base.tenant_resource_key,
stg_dynamodb__project_deployments_base.cloud_project_id,
CAST(schedule_elem.value:eventbridge_name AS STRING) AS eventbridge_name,
stg_dynamodb__project_deployments_base.cloud_deployment_name_hash,
SHA2_HEX(
CAST(schedule_elem.value:schedule_name AS STRING)
) AS cloud_schedule_name_hash
FROM {{ ref('stg_dynamodb__project_deployments_base') }},
LATERAL FLATTEN(input => schedules_obj) AS schedule_elem

0 comments on commit d79519a

Please sign in to comment.