Rename the template to scd1_upsert

vmware · Nov 29, 2024 · f30595e · f30595e
1 parent 9161cba
commit f30595e
Show file tree

Hide file tree

Showing 14 changed files with 77 additions and 7 deletions.
diff --git a/...sion/scd_upsert/00-verify-valid-target.py → ...ion/scd1_upsert/00-verify-valid-target.py b/...sion/scd_upsert/00-verify-valid-target.py → ...ion/scd1_upsert/00-verify-valid-target.py
diff --git a/...upsert/01-test-if-view-matches-target.sql → ...upsert/01-test-if-view-matches-target.sql b/...upsert/01-test-if-view-matches-target.sql → ...upsert/01-test-if-view-matches-target.sql
diff --git a/...ripts/02-create-table-and-insert-data.sql → ...ripts/02-create-table-and-insert-data.sql b/...ripts/02-create-table-and-insert-data.sql → ...ripts/02-create-table-and-insert-data.sql
diff --git a/...2-requisite-sql-scripts/02-drop-table.sql → ...2-requisite-sql-scripts/02-drop-table.sql b/...2-requisite-sql-scripts/02-drop-table.sql → ...2-requisite-sql-scripts/02-drop-table.sql
diff --git a/...site-sql-scripts/02-insert-into-table.sql → ...site-sql-scripts/02-insert-into-table.sql b/...site-sql-scripts/02-insert-into-table.sql → ...site-sql-scripts/02-insert-into-table.sql
diff --git a/...site-sql-scripts/02-show-create-table.sql → ...site-sql-scripts/02-show-create-table.sql b/...site-sql-scripts/02-show-create-table.sql → ...site-sql-scripts/02-show-create-table.sql
diff --git a/...03-handle-quality-checks_and_move_data.py → ...03-handle-quality-checks_and_move_data.py b/...03-handle-quality-checks_and_move_data.py → ...03-handle-quality-checks_and_move_data.py
@@ -35,7 +35,6 @@ def run(job_input: IJobInput):
         - copy the data from staging to target table
     3. Copying the data:
         - truncate target table and insert the data from staging table
-    4. Drop staging table
     """
 
     job_arguments = job_input.get_arguments()

diff --git a/...lates/load/dimension/scd_upsert/README.md → ...ates/load/dimension/scd1_upsert/README.md b/...lates/load/dimension/scd_upsert/README.md → ...ates/load/dimension/scd1_upsert/README.md
@@ -1,19 +1,19 @@
 ### Purpose:
 
-This template can be used to load raw data from a database to target table in a database.
+This template can be used to load raw data from a database to target 'Slowly Changing Dimension Type 1' table with specific implementation.
 In summary, it upserts the target table with the source data.
 
 ### Template Name (template_name):
 
-- "scd_upsert"
+- "scd1_upsert"
 
 ### Template Parameters (template_args):
 
 - target_schema   - database schema, where target data is loaded
 - target_table    - database table where target data is loaded
 - source_schema   - database schema, where source raw data is loaded from
 - source_view     - database view, where source raw data is loaded from
-- id_colum        - column that will be used for tracking which row should be updated and which inserted
+- id_column       - column that will be used for tracking which row should be updated and which inserted
 - check           - (Optional) Callback function responsible for checking the quality of the data. Takes in a table name as a parameter which will be used for data validation
 - staging_schema  - (Optional) Schema where the checks will be executed. If not provided target_schema will be used as default
 
@@ -44,6 +44,6 @@ def run(job_input):
         'target_table': 'dim_sddc',
         'id_column': 'dim_sddc_id'
     }
-    job_input.execute_template("scd_upsert", template_args, database="trino")
+    job_input.execute_template("scd1_upsert", template_args, database="trino")
     # . . .
 ```
diff --git a/projects/vdk-plugins/vdk-trino/src/vdk/plugin/trino/trino_plugin.py b/projects/vdk-plugins/vdk-trino/src/vdk/plugin/trino/trino_plugin.py
@@ -76,8 +76,8 @@ def initialize_job(self, context: JobContext):
             )
 
             context.templates.add_template(
-                "scd_upsert",
-                pathlib.Path(get_job_path("load/dimension/scd_upsert")),
+                "scd1_upsert",
+                pathlib.Path(get_job_path("load/dimension/scd1_upsert")),
                 connection_name,
             )
 

diff --git a/...ert_template_job/01_prepare_input_data.py → ...ert_template_job/01_prepare_input_data.py b/...ert_template_job/01_prepare_input_data.py → ...ert_template_job/01_prepare_input_data.py
diff --git a/...ob/02_run_load_dimension_scd1_template.py → ...ob/02_run_load_dimension_scd1_template.py b/...ob/02_run_load_dimension_scd1_template.py → ...ob/02_run_load_dimension_scd1_template.py
diff --git a/test-new-template/20_python_step.py b/test-new-template/20_python_step.py
@@ -0,0 +1,40 @@
+# Copyright 2023-2024 Broadcom
+# SPDX-License-Identifier: Apache-2.0
+import logging
+
+from vdk.api.job_input import IJobInput
+
+log = logging.getLogger(__name__)
+
+
+def run(job_input: IJobInput):
+    """
+    Function named `run` is required in order for a python script to be recognized as a Data Job Python step and executed.
+
+    VDK provides to every python step an object - job_input - that has methods for:
+
+    * executing queries to OLAP Database;
+    * ingesting data into a database;
+    * processing data into a database.
+    See IJobInput documentation for more details.
+    """
+    log.info(f"Starting job step {__name__}")
+
+    def sample_check(random_table_name):
+        return True
+
+    # Write your python code inside here ... for example:
+    job_input.execute_template(
+        template_name='scd_upsert',
+        template_args={
+            'source_schema': 'starshot_internal_dw_stg',
+            'source_view': 'sbuldeev_vw_template_test',
+            'target_schema': 'starshot_internal_dw_stg',
+            'target_table': 'sbuldeev_dw_template_test',
+            'id_column': 'org_id',
+            'check': sample_check
+        },
+        database="trino",
+    )
+
+    job_input.execute_query("SELECT 1")
diff --git a/test-new-template/config.ini b/test-new-template/config.ini
@@ -0,0 +1,28 @@
+; Supported format: https://docs.python.org/3/library/configparser.html#supported-ini-file-structure
+
+; This is the only file required to deploy a Data Job.
+; Read more to understand what each option means:
+
+; Information about the owner of the Data Job
+[owner]
+
+; Team is a way to group Data Jobs that belonged to the same team.
+team = my-team
+
+; Configuration related to running data jobs
+[job]
+; For format see https://en.wikipedia.org/wiki/Cron
+; The cron expression is evaluated in UTC time.
+; If it is time for a new job run and the previous job run hasn’t finished yet,
+; the cron job waits until the previous execution has finished.
+schedule_cron = 11 23 5 8 1
+
+[vdk]
+db_default_type=trino
+trino_user=sb004367
+trino_password=1597415974Ss%%
+trino_host=trino.broadcom.net
+trino_port=443
+trino_schema=sc_hms
+trino_use_ssl=True
+trino_catalog=sc_hms
diff --git a/test-new-template/requirements.txt b/test-new-template/requirements.txt
@@ -0,0 +1,3 @@
+# Python jobs can specify extra library dependencies in requirements.txt file.
+# See https://pip.readthedocs.io/en/stable/user_guide/#requirements-files
+# The file is optional and can be deleted if no extra library dependencies are necessary.