Skip to content

Commit

Permalink
Edited config + changed step
Browse files Browse the repository at this point in the history
  • Loading branch information
htahir1 committed Jan 22, 2024
1 parent 8da2698 commit 53fd3ef
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 136 deletions.
22 changes: 22 additions & 0 deletions llm-finetuning/huggingface/hf_deployment_base_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from pydantic import BaseModel
from typing import Optional, Dict


class HuggingFaceBaseConfig(BaseModel):
endpoint_name: str
repository: str
framework: str
accelerator: str
instance_size: str
instance_type: str
region: str
vendor: str
token: str
account_id: Optional[str] = None
min_replica: Optional[int] = 0
max_replica: Optional[int] = 1
revision: Optional[str] = None
task: Optional[str] = None
custom_image: Optional[Dict] = None
namespace: Optional[str] = None
endpoint_type: str = "public"
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
InferenceEndpoint,
)
from huggingface_hub import create_inference_endpoint, get_inference_endpoint
from huggingface.hf_deployment_base_config import HuggingFaceBaseConfig

from pydantic import Field

Expand All @@ -16,26 +17,8 @@
POLLING_TIMEOUT = 1200


class HuggingFaceDeploymentConfig(ServiceConfig):
"""Base class for all ZenML model deployer configurations."""

endpoint_name: str
repository: str
framework: str
accelerator: str
instance_size: str
instance_type: str
region: str
vendor: str
token: str
account_id: Optional[str] = None
min_replica: Optional[int] = 0
max_replica: Optional[int] = 1
revision: Optional[str] = None
task: Optional[str] = None
custom_image: Optional[Dict] = None
namespace: Optional[str] = None
endpoint_type: str = "public"
class HuggingFaceServiceConfig(HuggingFaceBaseConfig, ServiceConfig):
"""Base class for Huggingface configurations."""


class HuggingFaceServiceStatus(ServiceStatus):
Expand All @@ -51,12 +34,12 @@ class HuggingFaceDeploymentService(BaseDeploymentService):
flavor="hfendpoint",
description="Huggingface inference endpoint service",
)
config: HuggingFaceDeploymentConfig
config: HuggingFaceServiceConfig
status: HuggingFaceServiceStatus = Field(
default_factory=lambda: HuggingFaceServiceStatus()
)

def __init__(self, config: HuggingFaceDeploymentConfig, **attrs: Any):
def __init__(self, config: HuggingFaceServiceConfig, **attrs: Any):
"""_summary_."""
super().__init__(config=config, **attrs)

Expand All @@ -73,6 +56,27 @@ def hf_endpoint(self) -> InferenceEndpoint:
namespace=self.config.namespace,
)

@property
def prediction_url(self) -> Optional[str]:
"""The prediction URI exposed by the prediction service.
Returns:
The prediction URI exposed by the prediction service, or None if
the service is not yet ready.
"""
if not self.is_running:
return None
return self.hf_endpoint.url

@property
def inference_client(self) -> InferenceClient:
"""_summary_.
Returns:
InferenceClient: _description_
"""
return self.hf_endpoint.client

def provision(self) -> None:
"""_summary_."""

Expand Down Expand Up @@ -105,22 +109,14 @@ def provision(self) -> None:
"Failed to start huggingface inference endpoint service..."
)

def _get_client(self) -> InferenceClient:
"""_summary_.
Returns:
InferenceClient: _description_
"""
return self.hf_endpoint.client

def check_status(self) -> Tuple[ServiceState, str]:
"""_summary_.
Returns:
Tuple[ServiceState, str]: _description_
"""
try:
_ = self._get_client()
_ = self.inference_client
except InferenceEndpointError:
return (ServiceState.INACTIVE, "")

Expand Down Expand Up @@ -170,9 +166,8 @@ def predict(self, data: "Any", max_new_tokens: int) -> "Any":
"Please start the service before making predictions."
)
if self.hf_endpoint.prediction_url is not None:
client = self._get_client()
if self.hf_endpoint.task == "text-generation":
result = client.task_generation(
result = self.inference_client.task_generation(
data, max_new_tokens=max_new_tokens
)
else:
Expand All @@ -181,18 +176,6 @@ def predict(self, data: "Any", max_new_tokens: int) -> "Any":
)
return result

@property
def prediction_url(self) -> Optional[str]:
"""The prediction URI exposed by the prediction service.
Returns:
The prediction URI exposed by the prediction service, or None if
the service is not yet ready.
"""
if not self.is_running:
return None
return self.hf_endpoint.url

def get_logs(
self, follow: bool = False, tail: int = None
) -> Generator[str, bool, None]:
Expand Down
102 changes: 63 additions & 39 deletions llm-finetuning/huggingface/hf_model_deployer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@

from typing import List, Optional, cast, ClassVar, Type, Dict
from zenml.services import BaseService, ServiceConfig
from huggingface.hf_deployment import (
from huggingface.hf_deployment_service import (
HuggingFaceDeploymentService,
HuggingFaceDeploymentConfig,
HuggingFaceServiceConfig,
)
from huggingface.hf_model_deployer_flavor import HuggingFaceModelDeployerSettings, HuggingFaceModelDeployerConfig
from zenml.model_deployers.base_model_deployer import (
DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT,
BaseModelDeployerFlavor,
Expand All @@ -25,6 +26,60 @@ class HuggingFaceModelDeployer(BaseModelDeployer):
Type[BaseModelDeployerFlavor]
] = HuggingFaceModelDeployerFlavor

@property
def config(self) -> HuggingFaceModelDeployerConfig:
"""Config class for the Huggingface Model deployer settings class.
Returns:
The configuration.
"""
return cast(HuggingFaceModelDeployerConfig, self._config)

@property
def settings_class(self) -> Type[HuggingFaceModelDeployerSettings]:
"""Settings class for the Huggingface Model deployer settings class.
Returns:
The settings class.
"""
return HuggingFaceModelDeployerSettings

def _create_new_service(
self, timeout: int, config: HuggingFaceServiceConfig
) -> HuggingFaceDeploymentService:
"""Creates a new HuggingFaceDeploymentService.
Args:
timeout: the timeout in seconds to wait for the Huggingface inference endpoint
to be provisioned and successfully started or updated.
config: the configuration of the model to be deployed with Hugginface model deployer.
Returns:
The HuggingFaceServiceConfig object that can be used to interact
with the Huggingface inference endpoint.
"""
# create a new service for the new model
service = HuggingFaceDeploymentService(config)
service.start(timeout=timeout)
return service

def _clean_up_existing_service(
self,
timeout: int,
force: bool,
existing_service: HuggingFaceDeploymentService,
) -> None:
"""_summary_
Args:
timeout (int): _description_
force (bool): _description_
existing_service (HuggingFaceDeploymentService): _description_
"""
# stop the older service
existing_service.stop(timeout=timeout, force=force)


def deploy_model(
self,
config: ServiceConfig,
Expand All @@ -46,14 +101,18 @@ def deploy_model(
Returns:
BaseService: _description_
"""
config = cast(HuggingFaceDeploymentConfig, config)
config = cast(HuggingFaceServiceConfig, config)

# Add zenml prefix
if not config.endpoint_name.startswith("zenml-"):
config.endpoint_name = "zenml-" + config.endpoint_name

# if replace is True, remove all existing services
if replace is True:
existing_services = self.find_model_server(
pipeline_name=config.pipeline_name,
pipeline_step_name=config.pipeline_step_name,
model_name=config.model_name,
model_name=config.repository,
)

for existing_service in existing_services:
Expand Down Expand Up @@ -92,41 +151,6 @@ def deploy_model(

return cast(BaseService, service)

def _create_new_service(
self, timeout: int, config: HuggingFaceDeploymentConfig
) -> HuggingFaceDeploymentService:
"""Creates a new HuggingFaceDeploymentService.
Args:
timeout: the timeout in seconds to wait for the Huggingface inference endpoint
to be provisioned and successfully started or updated.
config: the configuration of the model to be deployed with Hugginface model deployer.
Returns:
The HuggingFaceDeploymentConfig object that can be used to interact
with the Huggingface inference endpoint.
"""
# create a new service for the new model
service = HuggingFaceDeploymentService(config)
service.start(timeout=timeout)
return service

def _clean_up_existing_service(
self,
timeout: int,
force: bool,
existing_service: HuggingFaceDeploymentService,
) -> None:
"""_summary_
Args:
timeout (int): _description_
force (bool): _description_
existing_service (HuggingFaceDeploymentService): _description_
"""
# stop the older service
existing_service.stop(timeout=timeout, force=force)

def find_model_server(
self,
running: bool,
Expand Down
13 changes: 11 additions & 2 deletions llm-finetuning/huggingface/hf_model_deployer_flavor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
BaseModelDeployerFlavor,
BaseModelDeployerConfig,
)
from zenml.config.base_settings import BaseSettings
from huggingface.hf_deployment_base_config import HuggingFaceBaseConfig
from pydantic import BaseModel
from zenml.utils.secret_utils import SecretField

if TYPE_CHECKING:
Expand All @@ -13,12 +16,18 @@
HUGGINGFACE_MODEL_DEPLOYER_FLAVOR = "hfendpoint"


class HuggingFaceModelDeployerConfig(BaseModelDeployerConfig):
"""Configuration for the Huggingface model deployer."""
class HuggingFaceModelDeployerSettings(HuggingFaceBaseConfig, BaseSettings):
"""Settings for the Huggingface model deployer."""

token: str = SecretField()


class HuggingFaceModelDeployerConfig(
BaseModelDeployerConfig, HuggingFaceModelDeployerSettings
):
"""Configuration for the Huggingface model deployer."""


class HuggingFaceModelDeployerFlavor(BaseModelDeployerFlavor):
"""Huggingface Endpoint model deployer flavor."""

Expand Down
Loading

0 comments on commit 53fd3ef

Please sign in to comment.