Skip to content

Commit

Permalink
chose to break out the weekly full refresh into it's own dag so that …
Browse files Browse the repository at this point in the history
…the original dag is preserved if it nedds to be used
  • Loading branch information
charlie-costanzo committed Feb 7, 2024
1 parent 862afb8 commit 1555c00
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 1 deletion.
2 changes: 1 addition & 1 deletion airflow/dags/transform_warehouse_full_refresh/METADATA.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
description: "Runs dbt with --full-refresh; use this to refresh/rebuild/backfill incremental models."
schedule_interval: "0 12 * * 0"
schedule_interval: Null
tags:
- all_gusty_features
default_args:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
description: "Runs dbt with --full-refresh; use this to refresh/rebuild/backfill incremental models."
schedule_interval: "0 12 * * 0"
tags:
- all_gusty_features
default_args:
owner: airflow
depends_on_past: False
start_date: !days_ago 1
email:
- "[email protected]"
- "[email protected]"
- "[email protected]"
email_on_failure: True
email_on_retry: False
retries: 0
retry_delay: !timedelta 'minutes: 2'
concurrency: 50
#sla: !timedelta 'hours: 2'
latest_only: True
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
operator: 'operators.PodOperator'
name: 'dbt-run-and-upload-artifacts'
image: 'ghcr.io/cal-itp/data-infra/warehouse:{{ image_tag() }}'

cmds:
- python3
arguments:
- '/app/scripts/run_and_upload.py'
- 'run'
- '--full-refresh'
- '--dbt-docs'
- '--save-artifacts'
- '--deploy-docs'
- '--sync-metabase'
- '--select'
- "{{ dag_run.conf.get('dbt_select_statement', '') }}"
- '--exclude'
- 'gtfs_rt_external_tables+'

is_delete_operator_pod: true
get_logs: true
is_gke: true
pod_location: us-west1
cluster_name: data-infra-apps
namespace: airflow-jobs
priority_class_name: dbt-high-priority

env_vars:
AIRFLOW_ENV: "{{ env_var('AIRFLOW_ENV') }}"
CALITP_BUCKET__DBT_ARTIFACTS: "{{ env_var('CALITP_BUCKET__DBT_ARTIFACTS') }}"
BIGQUERY_KEYFILE_LOCATION: /secrets/jobs-data/service_account.json
DBT_PROJECT_DIR: /app
DBT_PROFILE_DIR: /app
DBT_DATABASE: "{{ get_project_id() }}"
DBT_TARGET: "{{ env_var('DBT_TARGET') }}"
MB_HOST: dashboards.calitp.org
NETLIFY_SITE_ID: cal-itp-dbt-docs
SENTRY_DSN: "{{ env_var('SENTRY_DSN') }}"
SENTRY_ENVIRONMENT: "{{ env_var('SENTRY_ENVIRONMENT') }}"

secrets:
- deploy_type: volume
deploy_target: /secrets/jobs-data/
secret: jobs-data
key: service-account.json
- deploy_type: env
deploy_target: MB_USER
secret: jobs-data
key: metabase-user
- deploy_type: env
deploy_target: MB_PASSWORD
secret: jobs-data
key: metabase-password
- deploy_type: env
deploy_target: NETLIFY_AUTH_TOKEN
secret: jobs-data
key: netlify-auth-token

k8s_resources:
request_memory: 2.0Gi
request_cpu: 1

tolerations:
- key: pod-role
operator: Equal
value: computetask
effect: NoSchedule

affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: pod-role
operator: In
values:
- computetask
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
operator: 'operators.PodOperator'
name: 'dbt-test'
image: 'ghcr.io/cal-itp/data-infra/warehouse:{{ image_tag() }}'

cmds:
- python3
arguments:
- '/app/scripts/run_and_upload.py'
- 'run'
- '--no-dbt-seed'
- '--no-dbt-run'
- '--dbt-test'
- '--select'
- "{{ dag_run.conf.get('dbt_select_statement', '') }}"

dependencies:
- dbt_run_and_upload_artifacts
trigger_rule: all_done

is_delete_operator_pod: true
get_logs: true
is_gke: true
pod_location: us-west1
cluster_name: data-infra-apps
namespace: airflow-jobs

env_vars:
AIRFLOW_ENV: "{{ env_var('AIRFLOW_ENV') }}"
CALITP_BUCKET__DBT_ARTIFACTS: "{{ env_var('CALITP_BUCKET__DBT_ARTIFACTS') }}"
BIGQUERY_KEYFILE_LOCATION: /secrets/jobs-data/service_account.json
DBT_PROJECT_DIR: /app
DBT_PROFILE_DIR: /app
DBT_TARGET: "{{ env_var('DBT_TARGET') }}"
SENTRY_DSN: "{{ env_var('SENTRY_DSN') }}"
SENTRY_ENVIRONMENT: "{{ env_var('SENTRY_ENVIRONMENT') }}"

secrets:
- deploy_type: volume
deploy_target: /secrets/jobs-data/
secret: jobs-data
key: service-account.json

k8s_resources:
request_memory: 2.0Gi
request_cpu: 1

tolerations:
- key: pod-role
operator: Equal
value: computetask
effect: NoSchedule

affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: pod-role
operator: In
values:
- computetask

0 comments on commit 1555c00

Please sign in to comment.