Skip to content

Commit

Permalink
add variable to disable removig sql source files for ingestion workfl…
Browse files Browse the repository at this point in the history
…ows.
  • Loading branch information
madewithkode committed Apr 30, 2024
1 parent 1d875c8 commit 477cdc6
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 3 deletions.
12 changes: 9 additions & 3 deletions catalog/dags/providers/provider_api_scripts/inaturalist.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,10 @@ def create_preingestion_tasks():
python_callable=INaturalistDataIngester.load_catalog_of_life_names,
doc_md="Load vernacular taxon names from Catalog of Life",
op_kwargs={
"remove_api_files": "{{params.sql_rm_source_data_after_ingesting}}"
"remove_api_files": any(
"{{ params.sql_rm_source_data_after_ingesting }}",
"{{ var.json.SQL_RM_SOURCE_DATA_AFTER_INGESTION}}",
)
},
execution_timeout=timedelta(minutes=15),
)
Expand All @@ -347,8 +350,11 @@ def create_postingestion_tasks():
check_drop_parameter = ShortCircuitOperator(
task_id="check_drop_parameter",
doc_md="Skip post-ingestion if NOT sql_rm_source_data_after_ingesting.",
op_args=["{{ params.sql_rm_source_data_after_ingesting }}"],
python_callable=(lambda x: x),
op_args=[
"{{ params.sql_rm_source_data_after_ingesting }}",
"{{ var.json.SQL_RM_SOURCE_DATA_AFTER_INGESTION}}",
],
python_callable=(lambda *x: any(x)),
trigger_rule=TriggerRule.NONE_SKIPPED,
# just skip the drop steps, not the final reporting step in the dag
ignore_downstream_trigger_rules=False,
Expand Down
4 changes: 4 additions & 0 deletions catalog/env.template
Original file line number Diff line number Diff line change
Expand Up @@ -131,3 +131,7 @@ AIRFLOW_VAR_AIRFLOW_RDS_SNAPSHOTS_TO_RETAIN=7
# Whether to toggle production CloudWatch alarms when running a data refresh DAG.
# Used to prevent requiring AWS credentials when running locally.
AIRFLOW_VAR_TOGGLE_CLOUDWATCH_ALARMS=false

# Whether to delete source data from airflow and DB once ingestion is complete.
# This is used to support data quality testing in SQL-only DAGs like iNaturalist
AIRFLOW_VAR_SQL_RM_SOURCE_DATA_AFTER_INGESTION=false

0 comments on commit 477cdc6

Please sign in to comment.