From 25d8023f518bc2075bbfb701f1eb825c2d2449ee Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Tue, 27 Aug 2024 13:57:00 +0200 Subject: [PATCH] Automatically detect whether code download is necessary (#2946) * Automatically detect whether code download is necessary * Cleanup unused variables * Make sure /app dir exists in azureml entrypoint * Re-Add config path env variable --- src/zenml/constants.py | 1 - .../base_entrypoint_configuration.py | 57 ++++++++++++++++--- .../step_entrypoint_configuration.py | 7 ++- .../azureml_orchestrator_entrypoint_config.py | 1 + .../utils/pipeline_docker_image_builder.py | 12 +--- .../test_pipeline_docker_image_builder.py | 3 - 6 files changed, 55 insertions(+), 26 deletions(-) diff --git a/src/zenml/constants.py b/src/zenml/constants.py index cf73ce691f7..ca790da7ba1 100644 --- a/src/zenml/constants.py +++ b/src/zenml/constants.py @@ -162,7 +162,6 @@ def handle_int_env_var(var: str, default: int = 0) -> int: ) ENV_ZENML_DISABLE_WORKSPACE_WARNINGS = "ZENML_DISABLE_WORKSPACE_WARNINGS" ENV_ZENML_SKIP_IMAGE_BUILDER_DEFAULT = "ZENML_SKIP_IMAGE_BUILDER_DEFAULT" -ENV_ZENML_REQUIRES_CODE_DOWNLOAD = "ZENML_REQUIRES_CODE_DOWNLOAD" ENV_ZENML_SERVER = "ZENML_SERVER" ENV_ZENML_LOCAL_SERVER = "ZENML_LOCAL_SERVER" ENV_ZENML_ENFORCE_TYPE_ANNOTATIONS = "ZENML_ENFORCE_TYPE_ANNOTATIONS" diff --git a/src/zenml/entrypoints/base_entrypoint_configuration.py b/src/zenml/entrypoints/base_entrypoint_configuration.py index e35ead1cedb..2785f778f62 100644 --- a/src/zenml/entrypoints/base_entrypoint_configuration.py +++ b/src/zenml/entrypoints/base_entrypoint_configuration.py @@ -17,15 +17,11 @@ import os import sys from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Any, Dict, List, NoReturn, Set +from typing import TYPE_CHECKING, Any, Dict, List, NoReturn, Optional, Set from uuid import UUID from zenml.client import Client from zenml.code_repositories import BaseCodeRepository -from zenml.constants import ( - ENV_ZENML_REQUIRES_CODE_DOWNLOAD, - handle_bool_env_var, -) from zenml.logger import get_logger from zenml.utils import ( code_repository_utils, @@ -194,22 +190,28 @@ def load_deployment(self) -> "PipelineDeploymentResponse": return Client().zen_store.get_deployment(deployment_id=deployment_id) def download_code_if_necessary( - self, deployment: "PipelineDeploymentResponse" + self, + deployment: "PipelineDeploymentResponse", + step_name: Optional[str] = None, ) -> None: """Downloads user code if necessary. Args: deployment: The deployment for which to download the code. + step_name: Name of the step to be run. This will be used to + determine whether code download is necessary. If not given, + the DockerSettings of the pipeline will be used to make that + decision instead. Raises: RuntimeError: If the current environment requires code download but the deployment does not have a reference to any code. """ - requires_code_download = handle_bool_env_var( - ENV_ZENML_REQUIRES_CODE_DOWNLOAD + should_download_code = self._should_download_code( + deployment=deployment, step_name=step_name ) - if not requires_code_download: + if not should_download_code: return if code_reference := deployment.code_reference: @@ -284,6 +286,43 @@ def download_code_from_artifact_store(self, code_path: str) -> None: sys.path.insert(0, extract_dir) os.chdir(extract_dir) + def _should_download_code( + self, + deployment: "PipelineDeploymentResponse", + step_name: Optional[str] = None, + ) -> bool: + """Checks whether code should be downloaded. + + Args: + deployment: The deployment to check. + step_name: Name of the step to be run. This will be used to + determine whether code download is necessary. If not given, + the DockerSettings of the pipeline will be used to make that + decision instead. + + Returns: + Whether code should be downloaded. + """ + docker_settings = ( + deployment.step_configurations[step_name].config.docker_settings + if step_name + else deployment.pipeline_configuration.docker_settings + ) + + if ( + deployment.code_reference + and docker_settings.allow_download_from_code_repository + ): + return True + + if ( + deployment.code_path + and docker_settings.allow_download_from_artifact_store + ): + return True + + return False + @abstractmethod def run(self) -> None: """Runs the entrypoint configuration.""" diff --git a/src/zenml/entrypoints/step_entrypoint_configuration.py b/src/zenml/entrypoints/step_entrypoint_configuration.py index cf1f09bf2a9..047dfc11d54 100644 --- a/src/zenml/entrypoints/step_entrypoint_configuration.py +++ b/src/zenml/entrypoints/step_entrypoint_configuration.py @@ -151,9 +151,12 @@ def run(self) -> None: # and stack component flavors are registered. integration_registry.activate_integrations() - self.download_code_if_necessary(deployment=deployment) - step_name = self.entrypoint_args[STEP_NAME_OPTION] + + self.download_code_if_necessary( + deployment=deployment, step_name=step_name + ) + pipeline_name = deployment.pipeline_configuration.name step = deployment.step_configurations[step_name] diff --git a/src/zenml/integrations/azure/orchestrators/azureml_orchestrator_entrypoint_config.py b/src/zenml/integrations/azure/orchestrators/azureml_orchestrator_entrypoint_config.py index 3c9b45b0d82..89431cd0feb 100644 --- a/src/zenml/integrations/azure/orchestrators/azureml_orchestrator_entrypoint_config.py +++ b/src/zenml/integrations/azure/orchestrators/azureml_orchestrator_entrypoint_config.py @@ -69,6 +69,7 @@ def run(self) -> None: # Azure automatically changes the working directory, we have to set it # back to /app before running the step. + os.makedirs("/app", exist_ok=True) os.chdir("/app") # Run the step diff --git a/src/zenml/utils/pipeline_docker_image_builder.py b/src/zenml/utils/pipeline_docker_image_builder.py index 319569c48b6..c0295ffc4f9 100644 --- a/src/zenml/utils/pipeline_docker_image_builder.py +++ b/src/zenml/utils/pipeline_docker_image_builder.py @@ -38,7 +38,6 @@ ENV_ZENML_CONFIG_PATH, ENV_ZENML_ENABLE_REPO_INIT_WARNINGS, ENV_ZENML_LOGGING_COLORS_DISABLED, - ENV_ZENML_REQUIRES_CODE_DOWNLOAD, handle_bool_env_var, ) from zenml.enums import OperatingSystemType @@ -55,10 +54,7 @@ logger = get_logger(__name__) DOCKER_IMAGE_WORKDIR = "/app" -DOCKER_IMAGE_ZENML_CONFIG_DIR = ".zenconfig" -DOCKER_IMAGE_ZENML_CONFIG_PATH = ( - f"{DOCKER_IMAGE_WORKDIR}/{DOCKER_IMAGE_ZENML_CONFIG_DIR}" -) +DOCKER_IMAGE_ZENML_CONFIG_PATH = f"{DOCKER_IMAGE_WORKDIR}/.zenconfig" DEFAULT_DOCKER_PARENT_IMAGE = ( f"zenmldocker/zenml:{zenml.__version__}-" @@ -334,7 +330,6 @@ def build_docker_image( dockerfile = self._generate_zenml_pipeline_dockerfile( parent_image=parent_image, docker_settings=docker_settings, - download_files=download_files, requirements_files=requirements_files, apt_packages=apt_packages, entrypoint=entrypoint, @@ -563,7 +558,6 @@ def gather_requirements_files( def _generate_zenml_pipeline_dockerfile( parent_image: str, docker_settings: DockerSettings, - download_files: bool, requirements_files: Sequence[Tuple[str, str, List[str]]] = (), apt_packages: Sequence[str] = (), entrypoint: Optional[str] = None, @@ -573,7 +567,6 @@ def _generate_zenml_pipeline_dockerfile( Args: parent_image: The image to use as parent for the Dockerfile. docker_settings: Docker settings for this image build. - download_files: Whether to download files in the build context. requirements_files: List of tuples that contain three items: - the name of a requirements file, - the content of that file, @@ -641,9 +634,6 @@ def _generate_zenml_pipeline_dockerfile( ) lines.append(f"ENV {ENV_ZENML_ENABLE_REPO_INIT_WARNINGS}=False") - if download_files: - lines.append(f"ENV {ENV_ZENML_REQUIRES_CODE_DOWNLOAD}=True") - lines.append( f"ENV {ENV_ZENML_CONFIG_PATH}={DOCKER_IMAGE_ZENML_CONFIG_PATH}" ) diff --git a/tests/unit/utils/test_pipeline_docker_image_builder.py b/tests/unit/utils/test_pipeline_docker_image_builder.py index 171d4a3b9fe..515e889db0d 100644 --- a/tests/unit/utils/test_pipeline_docker_image_builder.py +++ b/tests/unit/utils/test_pipeline_docker_image_builder.py @@ -31,7 +31,6 @@ def test_check_user_is_set(): PipelineDockerImageBuilder._generate_zenml_pipeline_dockerfile( "image:tag", docker_settings, - download_files=False, ) ) assert "USER" not in generated_dockerfile @@ -41,7 +40,6 @@ def test_check_user_is_set(): PipelineDockerImageBuilder._generate_zenml_pipeline_dockerfile( "image:tag", docker_settings, - download_files=False, ) ) assert "USER test_user" in generated_dockerfile @@ -148,7 +146,6 @@ def test_python_package_installer_args(): PipelineDockerImageBuilder._generate_zenml_pipeline_dockerfile( "image:tag", docker_settings, - download_files=False, requirements_files=requirements_files, ) )