Edited config + changed step

zenml-io · Jan 22, 2024 · 53fd3ef · 53fd3ef
1 parent 8da2698
commit 53fd3ef
Show file tree

Hide file tree

Showing 5 changed files with 138 additions and 136 deletions.
diff --git a/llm-finetuning/huggingface/hf_deployment_base_config.py b/llm-finetuning/huggingface/hf_deployment_base_config.py
@@ -0,0 +1,22 @@
+from pydantic import BaseModel
+from typing import Optional, Dict
+
+
+class HuggingFaceBaseConfig(BaseModel):
+    endpoint_name: str
+    repository: str
+    framework: str
+    accelerator: str
+    instance_size: str
+    instance_type: str
+    region: str
+    vendor: str
+    token: str
+    account_id: Optional[str] = None
+    min_replica: Optional[int] = 0
+    max_replica: Optional[int] = 1
+    revision: Optional[str] = None
+    task: Optional[str] = None
+    custom_image: Optional[Dict] = None
+    namespace: Optional[str] = None
+    endpoint_type: str = "public"
diff --git a/llm-finetuning/huggingface/hf_deployment.py → ...ning/huggingface/hf_deployment_service.py b/llm-finetuning/huggingface/hf_deployment.py → ...ning/huggingface/hf_deployment_service.py
@@ -8,6 +8,7 @@
     InferenceEndpoint,
 )
 from huggingface_hub import create_inference_endpoint, get_inference_endpoint
+from huggingface.hf_deployment_base_config import HuggingFaceBaseConfig
 
 from pydantic import Field
 
@@ -16,26 +17,8 @@
 POLLING_TIMEOUT = 1200
 
 
-class HuggingFaceDeploymentConfig(ServiceConfig):
-    """Base class for all ZenML model deployer configurations."""
-
-    endpoint_name: str
-    repository: str
-    framework: str
-    accelerator: str
-    instance_size: str
-    instance_type: str
-    region: str
-    vendor: str
-    token: str
-    account_id: Optional[str] = None
-    min_replica: Optional[int] = 0
-    max_replica: Optional[int] = 1
-    revision: Optional[str] = None
-    task: Optional[str] = None
-    custom_image: Optional[Dict] = None
-    namespace: Optional[str] = None
-    endpoint_type: str = "public"
+class HuggingFaceServiceConfig(HuggingFaceBaseConfig, ServiceConfig):
+    """Base class for Huggingface configurations."""
 
 
 class HuggingFaceServiceStatus(ServiceStatus):
@@ -51,12 +34,12 @@ class HuggingFaceDeploymentService(BaseDeploymentService):
         flavor="hfendpoint",
         description="Huggingface inference endpoint service",
     )
-    config: HuggingFaceDeploymentConfig
+    config: HuggingFaceServiceConfig
     status: HuggingFaceServiceStatus = Field(
         default_factory=lambda: HuggingFaceServiceStatus()
     )
 
-    def __init__(self, config: HuggingFaceDeploymentConfig, **attrs: Any):
+    def __init__(self, config: HuggingFaceServiceConfig, **attrs: Any):
         """_summary_."""
         super().__init__(config=config, **attrs)
 
@@ -73,6 +56,27 @@ def hf_endpoint(self) -> InferenceEndpoint:
             namespace=self.config.namespace,
         )
 
+    @property
+    def prediction_url(self) -> Optional[str]:
+        """The prediction URI exposed by the prediction service.
+
+        Returns:
+            The prediction URI exposed by the prediction service, or None if
+            the service is not yet ready.
+        """
+        if not self.is_running:
+            return None
+        return self.hf_endpoint.url
+
+    @property
+    def inference_client(self) -> InferenceClient:
+        """_summary_.
+
+        Returns:
+            InferenceClient: _description_
+        """
+        return self.hf_endpoint.client
+
     def provision(self) -> None:
         """_summary_."""
 
@@ -105,22 +109,14 @@ def provision(self) -> None:
                 "Failed to start huggingface inference endpoint service..."
             )
 
-    def _get_client(self) -> InferenceClient:
-        """_summary_.
-
-        Returns:
-            InferenceClient: _description_
-        """
-        return self.hf_endpoint.client
-
     def check_status(self) -> Tuple[ServiceState, str]:
         """_summary_.
 
         Returns:
             Tuple[ServiceState, str]: _description_
         """
         try:
-            _ = self._get_client()
+            _ = self.inference_client
         except InferenceEndpointError:
             return (ServiceState.INACTIVE, "")
 
@@ -170,9 +166,8 @@ def predict(self, data: "Any", max_new_tokens: int) -> "Any":
                 "Please start the service before making predictions."
             )
         if self.hf_endpoint.prediction_url is not None:
-            client = self._get_client()
             if self.hf_endpoint.task == "text-generation":
-                result = client.task_generation(
+                result = self.inference_client.task_generation(
                     data, max_new_tokens=max_new_tokens
                 )
         else:
@@ -181,18 +176,6 @@ def predict(self, data: "Any", max_new_tokens: int) -> "Any":
             )
         return result
 
-    @property
-    def prediction_url(self) -> Optional[str]:
-        """The prediction URI exposed by the prediction service.
-
-        Returns:
-            The prediction URI exposed by the prediction service, or None if
-            the service is not yet ready.
-        """
-        if not self.is_running:
-            return None
-        return self.hf_endpoint.url
-
     def get_logs(
         self, follow: bool = False, tail: int = None
     ) -> Generator[str, bool, None]:

diff --git a/llm-finetuning/huggingface/hf_model_deployer.py b/llm-finetuning/huggingface/hf_model_deployer.py
@@ -5,10 +5,11 @@
 
 from typing import List, Optional, cast, ClassVar, Type, Dict
 from zenml.services import BaseService, ServiceConfig
-from huggingface.hf_deployment import (
+from huggingface.hf_deployment_service import (
     HuggingFaceDeploymentService,
-    HuggingFaceDeploymentConfig,
+    HuggingFaceServiceConfig,
 )
+from huggingface.hf_model_deployer_flavor import HuggingFaceModelDeployerSettings, HuggingFaceModelDeployerConfig
 from zenml.model_deployers.base_model_deployer import (
     DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT,
     BaseModelDeployerFlavor,
@@ -25,6 +26,60 @@ class HuggingFaceModelDeployer(BaseModelDeployer):
         Type[BaseModelDeployerFlavor]
     ] = HuggingFaceModelDeployerFlavor
 
+    @property
+    def config(self) -> HuggingFaceModelDeployerConfig:
+        """Config class for the Huggingface Model deployer settings class.
+
+        Returns:
+            The configuration.
+        """
+        return cast(HuggingFaceModelDeployerConfig, self._config)
+
+    @property
+    def settings_class(self) -> Type[HuggingFaceModelDeployerSettings]:
+        """Settings class for the Huggingface Model deployer settings class.
+
+        Returns:
+            The settings class.
+        """
+        return HuggingFaceModelDeployerSettings
+
+    def _create_new_service(
+        self, timeout: int, config: HuggingFaceServiceConfig
+    ) -> HuggingFaceDeploymentService:
+        """Creates a new HuggingFaceDeploymentService.
+
+        Args:
+            timeout: the timeout in seconds to wait for the Huggingface inference endpoint
+                to be provisioned and successfully started or updated.
+            config: the configuration of the model to be deployed with Hugginface model deployer.
+
+        Returns:
+            The HuggingFaceServiceConfig object that can be used to interact
+            with the Huggingface inference endpoint.
+        """
+        # create a new service for the new model
+        service = HuggingFaceDeploymentService(config)
+        service.start(timeout=timeout)
+        return service
+
+    def _clean_up_existing_service(
+        self,
+        timeout: int,
+        force: bool,
+        existing_service: HuggingFaceDeploymentService,
+    ) -> None:
+        """_summary_
+
+        Args:
+            timeout (int): _description_
+            force (bool): _description_
+            existing_service (HuggingFaceDeploymentService): _description_
+        """
+        # stop the older service
+        existing_service.stop(timeout=timeout, force=force)
+
+
     def deploy_model(
         self,
         config: ServiceConfig,
@@ -46,14 +101,18 @@ def deploy_model(
         Returns:
             BaseService: _description_
         """
-        config = cast(HuggingFaceDeploymentConfig, config)
+        config = cast(HuggingFaceServiceConfig, config)
+
+        # Add zenml prefix
+        if not config.endpoint_name.startswith("zenml-"):
+            config.endpoint_name = "zenml-" + config.endpoint_name
 
         # if replace is True, remove all existing services
         if replace is True:
             existing_services = self.find_model_server(
                 pipeline_name=config.pipeline_name,
                 pipeline_step_name=config.pipeline_step_name,
-                model_name=config.model_name,
+                model_name=config.repository,
             )
 
             for existing_service in existing_services:
@@ -92,41 +151,6 @@ def deploy_model(
 
         return cast(BaseService, service)
 
-    def _create_new_service(
-        self, timeout: int, config: HuggingFaceDeploymentConfig
-    ) -> HuggingFaceDeploymentService:
-        """Creates a new HuggingFaceDeploymentService.
-
-        Args:
-            timeout: the timeout in seconds to wait for the Huggingface inference endpoint
-                to be provisioned and successfully started or updated.
-            config: the configuration of the model to be deployed with Hugginface model deployer.
-
-        Returns:
-            The HuggingFaceDeploymentConfig object that can be used to interact
-            with the Huggingface inference endpoint.
-        """
-        # create a new service for the new model
-        service = HuggingFaceDeploymentService(config)
-        service.start(timeout=timeout)
-        return service
-
-    def _clean_up_existing_service(
-        self,
-        timeout: int,
-        force: bool,
-        existing_service: HuggingFaceDeploymentService,
-    ) -> None:
-        """_summary_
-
-        Args:
-            timeout (int): _description_
-            force (bool): _description_
-            existing_service (HuggingFaceDeploymentService): _description_
-        """
-        # stop the older service
-        existing_service.stop(timeout=timeout, force=force)
-
     def find_model_server(
         self,
         running: bool,

diff --git a/llm-finetuning/huggingface/hf_model_deployer_flavor.py b/llm-finetuning/huggingface/hf_model_deployer_flavor.py
@@ -4,6 +4,9 @@
     BaseModelDeployerFlavor,
     BaseModelDeployerConfig,
 )
+from zenml.config.base_settings import BaseSettings
+from huggingface.hf_deployment_base_config import HuggingFaceBaseConfig
+from pydantic import BaseModel
 from zenml.utils.secret_utils import SecretField
 
 if TYPE_CHECKING:
@@ -13,12 +16,18 @@
 HUGGINGFACE_MODEL_DEPLOYER_FLAVOR = "hfendpoint"
 
 
-class HuggingFaceModelDeployerConfig(BaseModelDeployerConfig):
-    """Configuration for the Huggingface model deployer."""
+class HuggingFaceModelDeployerSettings(HuggingFaceBaseConfig, BaseSettings):
+    """Settings for the Huggingface model deployer."""
 
     token: str = SecretField()
 
 
+class HuggingFaceModelDeployerConfig(
+    BaseModelDeployerConfig, HuggingFaceModelDeployerSettings
+):
+    """Configuration for the Huggingface model deployer."""
+
+
 class HuggingFaceModelDeployerFlavor(BaseModelDeployerFlavor):
     """Huggingface Endpoint model deployer flavor."""