From 4941d5bab37a0bdc1e5873ce8e7288483703751f Mon Sep 17 00:00:00 2001 From: Constantin M Adam Date: Fri, 15 Nov 2024 10:46:43 -0500 Subject: [PATCH] Cleanup main entry point and local implementation of spark transforms Signed-off-by: Constantin M Adam --- transforms/universal/fdedup/spark/Dockerfile | 6 +----- .../universal/fdedup/spark/src/data_cleaning_local_spark.py | 4 ++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/transforms/universal/fdedup/spark/Dockerfile b/transforms/universal/fdedup/spark/Dockerfile index 772dfef79..b04994d46 100644 --- a/transforms/universal/fdedup/spark/Dockerfile +++ b/transforms/universal/fdedup/spark/Dockerfile @@ -32,11 +32,7 @@ RUN pip3 install -r requirements.txt RUN pip install --no-cache-dir -e . # copy the main() entry point to the image -COPY ./src/signature_calc_spark.py . - -# copy some of the samples in -COPY src/signature_calc_transform_spark.py fdedup_transform_spark.py -COPY src/signature_calc_spark.py local/fdedup_local_spark.py +COPY ./src/fdedup_transform_spark.py . # copy test COPY test/ test/ diff --git a/transforms/universal/fdedup/spark/src/data_cleaning_local_spark.py b/transforms/universal/fdedup/spark/src/data_cleaning_local_spark.py index 9c14c67d8..eb1e61845 100644 --- a/transforms/universal/fdedup/spark/src/data_cleaning_local_spark.py +++ b/transforms/universal/fdedup/spark/src/data_cleaning_local_spark.py @@ -14,6 +14,10 @@ import sys import polars as pl +from data_cleaning_transform import ( + document_id_column_cli_param, + duplicate_list_location_cli_param, +) from data_cleaning_transform_spark import DataCleaningSparkTransformConfiguration from data_processing.utils import ParamsUtils from data_processing_spark.runtime.spark import SparkTransformLauncher