Skip to content

Commit

Permalink
Automatically detect whether code download is necessary (#2946)
Browse files Browse the repository at this point in the history
* Automatically detect whether code download is necessary

* Cleanup unused variables

* Make sure /app dir exists in azureml entrypoint

* Re-Add config path env variable
  • Loading branch information
schustmi authored Aug 27, 2024
1 parent 8ffd9a7 commit 25d8023
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 26 deletions.
1 change: 0 additions & 1 deletion src/zenml/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ def handle_int_env_var(var: str, default: int = 0) -> int:
)
ENV_ZENML_DISABLE_WORKSPACE_WARNINGS = "ZENML_DISABLE_WORKSPACE_WARNINGS"
ENV_ZENML_SKIP_IMAGE_BUILDER_DEFAULT = "ZENML_SKIP_IMAGE_BUILDER_DEFAULT"
ENV_ZENML_REQUIRES_CODE_DOWNLOAD = "ZENML_REQUIRES_CODE_DOWNLOAD"
ENV_ZENML_SERVER = "ZENML_SERVER"
ENV_ZENML_LOCAL_SERVER = "ZENML_LOCAL_SERVER"
ENV_ZENML_ENFORCE_TYPE_ANNOTATIONS = "ZENML_ENFORCE_TYPE_ANNOTATIONS"
Expand Down
57 changes: 48 additions & 9 deletions src/zenml/entrypoints/base_entrypoint_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,11 @@
import os
import sys
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, Dict, List, NoReturn, Set
from typing import TYPE_CHECKING, Any, Dict, List, NoReturn, Optional, Set
from uuid import UUID

from zenml.client import Client
from zenml.code_repositories import BaseCodeRepository
from zenml.constants import (
ENV_ZENML_REQUIRES_CODE_DOWNLOAD,
handle_bool_env_var,
)
from zenml.logger import get_logger
from zenml.utils import (
code_repository_utils,
Expand Down Expand Up @@ -194,22 +190,28 @@ def load_deployment(self) -> "PipelineDeploymentResponse":
return Client().zen_store.get_deployment(deployment_id=deployment_id)

def download_code_if_necessary(
self, deployment: "PipelineDeploymentResponse"
self,
deployment: "PipelineDeploymentResponse",
step_name: Optional[str] = None,
) -> None:
"""Downloads user code if necessary.
Args:
deployment: The deployment for which to download the code.
step_name: Name of the step to be run. This will be used to
determine whether code download is necessary. If not given,
the DockerSettings of the pipeline will be used to make that
decision instead.
Raises:
RuntimeError: If the current environment requires code download
but the deployment does not have a reference to any code.
"""
requires_code_download = handle_bool_env_var(
ENV_ZENML_REQUIRES_CODE_DOWNLOAD
should_download_code = self._should_download_code(
deployment=deployment, step_name=step_name
)

if not requires_code_download:
if not should_download_code:
return

if code_reference := deployment.code_reference:
Expand Down Expand Up @@ -284,6 +286,43 @@ def download_code_from_artifact_store(self, code_path: str) -> None:
sys.path.insert(0, extract_dir)
os.chdir(extract_dir)

def _should_download_code(
self,
deployment: "PipelineDeploymentResponse",
step_name: Optional[str] = None,
) -> bool:
"""Checks whether code should be downloaded.
Args:
deployment: The deployment to check.
step_name: Name of the step to be run. This will be used to
determine whether code download is necessary. If not given,
the DockerSettings of the pipeline will be used to make that
decision instead.
Returns:
Whether code should be downloaded.
"""
docker_settings = (
deployment.step_configurations[step_name].config.docker_settings
if step_name
else deployment.pipeline_configuration.docker_settings
)

if (
deployment.code_reference
and docker_settings.allow_download_from_code_repository
):
return True

if (
deployment.code_path
and docker_settings.allow_download_from_artifact_store
):
return True

return False

@abstractmethod
def run(self) -> None:
"""Runs the entrypoint configuration."""
7 changes: 5 additions & 2 deletions src/zenml/entrypoints/step_entrypoint_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,12 @@ def run(self) -> None:
# and stack component flavors are registered.
integration_registry.activate_integrations()

self.download_code_if_necessary(deployment=deployment)

step_name = self.entrypoint_args[STEP_NAME_OPTION]

self.download_code_if_necessary(
deployment=deployment, step_name=step_name
)

pipeline_name = deployment.pipeline_configuration.name

step = deployment.step_configurations[step_name]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def run(self) -> None:

# Azure automatically changes the working directory, we have to set it
# back to /app before running the step.
os.makedirs("/app", exist_ok=True)
os.chdir("/app")

# Run the step
Expand Down
12 changes: 1 addition & 11 deletions src/zenml/utils/pipeline_docker_image_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
ENV_ZENML_CONFIG_PATH,
ENV_ZENML_ENABLE_REPO_INIT_WARNINGS,
ENV_ZENML_LOGGING_COLORS_DISABLED,
ENV_ZENML_REQUIRES_CODE_DOWNLOAD,
handle_bool_env_var,
)
from zenml.enums import OperatingSystemType
Expand All @@ -55,10 +54,7 @@
logger = get_logger(__name__)

DOCKER_IMAGE_WORKDIR = "/app"
DOCKER_IMAGE_ZENML_CONFIG_DIR = ".zenconfig"
DOCKER_IMAGE_ZENML_CONFIG_PATH = (
f"{DOCKER_IMAGE_WORKDIR}/{DOCKER_IMAGE_ZENML_CONFIG_DIR}"
)
DOCKER_IMAGE_ZENML_CONFIG_PATH = f"{DOCKER_IMAGE_WORKDIR}/.zenconfig"

DEFAULT_DOCKER_PARENT_IMAGE = (
f"zenmldocker/zenml:{zenml.__version__}-"
Expand Down Expand Up @@ -334,7 +330,6 @@ def build_docker_image(
dockerfile = self._generate_zenml_pipeline_dockerfile(
parent_image=parent_image,
docker_settings=docker_settings,
download_files=download_files,
requirements_files=requirements_files,
apt_packages=apt_packages,
entrypoint=entrypoint,
Expand Down Expand Up @@ -563,7 +558,6 @@ def gather_requirements_files(
def _generate_zenml_pipeline_dockerfile(
parent_image: str,
docker_settings: DockerSettings,
download_files: bool,
requirements_files: Sequence[Tuple[str, str, List[str]]] = (),
apt_packages: Sequence[str] = (),
entrypoint: Optional[str] = None,
Expand All @@ -573,7 +567,6 @@ def _generate_zenml_pipeline_dockerfile(
Args:
parent_image: The image to use as parent for the Dockerfile.
docker_settings: Docker settings for this image build.
download_files: Whether to download files in the build context.
requirements_files: List of tuples that contain three items:
- the name of a requirements file,
- the content of that file,
Expand Down Expand Up @@ -641,9 +634,6 @@ def _generate_zenml_pipeline_dockerfile(
)

lines.append(f"ENV {ENV_ZENML_ENABLE_REPO_INIT_WARNINGS}=False")
if download_files:
lines.append(f"ENV {ENV_ZENML_REQUIRES_CODE_DOWNLOAD}=True")

lines.append(
f"ENV {ENV_ZENML_CONFIG_PATH}={DOCKER_IMAGE_ZENML_CONFIG_PATH}"
)
Expand Down
3 changes: 0 additions & 3 deletions tests/unit/utils/test_pipeline_docker_image_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def test_check_user_is_set():
PipelineDockerImageBuilder._generate_zenml_pipeline_dockerfile(
"image:tag",
docker_settings,
download_files=False,
)
)
assert "USER" not in generated_dockerfile
Expand All @@ -41,7 +40,6 @@ def test_check_user_is_set():
PipelineDockerImageBuilder._generate_zenml_pipeline_dockerfile(
"image:tag",
docker_settings,
download_files=False,
)
)
assert "USER test_user" in generated_dockerfile
Expand Down Expand Up @@ -148,7 +146,6 @@ def test_python_package_installer_args():
PipelineDockerImageBuilder._generate_zenml_pipeline_dockerfile(
"image:tag",
docker_settings,
download_files=False,
requirements_files=requirements_files,
)
)
Expand Down

0 comments on commit 25d8023

Please sign in to comment.