diff --git a/dpypelines/pipeline/dataset_ingress_v1.py b/dpypelines/pipeline/dataset_ingress_v1.py index 9388f91..c03720b 100644 --- a/dpypelines/pipeline/dataset_ingress_v1.py +++ b/dpypelines/pipeline/dataset_ingress_v1.py @@ -98,17 +98,31 @@ def dataset_ingress_v1(files_dir: str, pipeline_config: dict): de_notifier.failure() raise err + + skip_data_upload = os.environ.get("SKIP_DATA_UPLOAD", False) + + if skip_data_upload is not False: + try: + skip_data_upload = str_to_bool(skip_data_upload) + except Exception as err: + logger.error( + "Unable to cast SKIP_DATA_UPLOAD to boolean", + err, + data={"value": skip_data_upload}, + ) + de_notifier.failure() + raise err + # Get Upload Service URL from environment variable - try: - upload_url = os.environ.get("UPLOAD_SERVICE_URL", None) - assert ( - upload_url is not None - ), "UPLOAD_SERVICE_URL environment variable not set." - logger.info("Got Upload Service URL", data={"upload_url": upload_url}) - except Exception as err: - logger.error("Error occurred when getting Upload Service URL", err) - de_notifier.failure() - raise err + if skip_data_upload is False: + try: + upload_url = os.environ.get("UPLOAD_SERVICE_URL", None) + assert (upload_url is not None), "UPLOAD_SERVICE_URL environment variable not set" + logger.info("Got Upload Service URL", data={"upload_url": upload_url}) + except Exception as err: + logger.error("Error occurred when getting Upload Service URL", err) + de_notifier.failure() + raise err # Extract the patterns for required files from the pipeline configuration try: @@ -354,18 +368,6 @@ def dataset_ingress_v1(files_dir: str, pipeline_config: dict): # Allow DE's to skip the upload to s3 part of the pipeline while # developing code locally. - skip_data_upload = os.environ.get("SKIP_DATA_UPLOAD", False) - if skip_data_upload is not False: - try: - skip_data_upload = str_to_bool(skip_data_upload) - except Exception as err: - logger.error( - "Unable to cast SKIP_DATA_UPLOAD to boolean", - err, - data={"value": skip_data_upload}, - ) - de_notifier.failure() - raise err logger.info( "skip_data_upload set from SKIP_DATA_UPLOAD env var", diff --git a/dpypelines/pipeline/generic_file_ingress_v1.py b/dpypelines/pipeline/generic_file_ingress_v1.py index 37b4b60..81cf0f5 100644 --- a/dpypelines/pipeline/generic_file_ingress_v1.py +++ b/dpypelines/pipeline/generic_file_ingress_v1.py @@ -80,16 +80,29 @@ def generic_file_ingress_v1(files_dir: str, pipeline_config: dict): raise err # Get Upload Service URL from environment variable - try: - upload_url = os.environ.get("UPLOAD_SERVICE_URL", None) - assert ( - upload_url is not None - ), "UPLOAD_SERVICE_URL environment variable not set." - logger.info("Got Upload Service URL", data={"upload_url": upload_url}) - except Exception as err: - logger.error("Error occurred when getting Upload Service URL", err) - notifier.failure() - raise err + skip_data_upload = os.environ.get("SKIP_DATA_UPLOAD", False) + + if skip_data_upload is not False: + try: + skip_data_upload = str_to_bool(skip_data_upload) + except Exception as err: + logger.error( + "Unable to cast SKIP_DATA_UPLOAD to boolean", + err, + data={"value": skip_data_upload}, + ) + notifier.failure() + raise err + + if skip_data_upload is False: + try: + upload_url = os.environ.get("UPLOAD_SERVICE_URL", None) + assert (upload_url is not None), "UPLOAD_SERVICE_URL environment variable not set" + logger.info("Got Upload Service URL", data={"upload_url": upload_url}) + except Exception as err: + logger.error("Error occurred when getting Upload Service URL", err) + notifier.failure() + raise err # Extract the patterns for required files from the pipeline configuration try: @@ -148,18 +161,6 @@ def generic_file_ingress_v1(files_dir: str, pipeline_config: dict): # Allow DE's to skip the upload to s3 part of the pipeline while # developing code locally. - skip_data_upload = os.environ.get("SKIP_DATA_UPLOAD", False) - if skip_data_upload is not False: - try: - skip_data_upload = str_to_bool(skip_data_upload) - except Exception as err: - logger.error( - "Unable to cast SKIP_DATA_UPLOAD to boolean", - err, - data={"value": skip_data_upload}, - ) - notifier.failure() - raise err logger.info( "skip_data_upload set from SKIP_DATA_UPLOAD env var", diff --git a/features/environment.py b/features/environment.py index a6af897..c1b6fd0 100644 --- a/features/environment.py +++ b/features/environment.py @@ -32,9 +32,6 @@ def before_all(context): context.upload_service_url = os.environ.get("UPLOAD_SERVICE_URL", None) os.environ["UPLOAD_SERVICE_URL"] = "http://127.0.0.1:5001/upload-new" - context.upload_service_s3_bucket = os.environ.get("UPLOAD_SERVICE_S3_BUCKET", None) - os.environ["UPLOAD_SERVICE_S3_BUCKET"] = "my-bucket/my.tar" - context.service_token_for_upload = os.environ.get("SERVICE_TOKEN_FOR_UPLOAD", None) os.environ["SERVICE_TOKEN_FOR_UPLOAD"] = "not-a-real-token" @@ -133,9 +130,6 @@ def after_all(context): if context.upload_service_url is not None: os.environ["UPLOAD_SERVICE_URL"] = context.upload_service_url - if context.upload_service_s3_bucket is not None: - os.environ["UPLOAD_SERVICE_S3_BUCKET"] = context.upload_service_s3_bucket - if context.service_token_for_upload is not None: os.environ["SERVICE_TOKEN_FOR_UPLOAD"] = context.service_token_for_upload