Skip to content

Commit

Permalink
Enhance local deployment pipeline with chat interface integration and…
Browse files Browse the repository at this point in the history
… Kubernetes configuration updates
  • Loading branch information
safoinme committed Dec 10, 2024
1 parent dd6ab06 commit dfe2f01
Show file tree
Hide file tree
Showing 9 changed files with 369 additions and 17 deletions.
2 changes: 2 additions & 0 deletions llm-complete-guide/pipelines/local_deployment.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from steps.bento_builder import bento_builder
from steps.bento_deployment import bento_deployment
from steps.visualize_chat import create_chat_interface
from zenml import pipeline


@pipeline(enable_cache=False)
def local_deployment():
bento = bento_builder()
bento_deployment(bento)
create_chat_interface()

#vllm_model_deployer_step()
24 changes: 21 additions & 3 deletions llm-complete-guide/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
rag_deployment,
llm_index_and_evaluate,
local_deployment,
production_deployment,
)
from structures import Document
from zenml.materializers.materializer_registry import materializer_registry
Expand Down Expand Up @@ -144,6 +145,12 @@
default=None,
help="Path to config",
)
@click.option(
"--env",
"env",
default="local",
help="The environment to use for the completion.",
)
def main(
pipeline: str,
query_text: Optional[str] = None,
Expand All @@ -154,6 +161,7 @@ def main(
use_argilla: bool = False,
use_reranker: bool = False,
config: Optional[str] = None,
env: str = "local",
):
"""Main entry point for the pipeline execution.
Expand All @@ -167,6 +175,7 @@ def main(
use_argilla (bool): If True, Argilla an notations will be used
use_reranker (bool): If True, rerankers will be used
config (Optional[str]): Path to config file
env (str): The environment to use for the deployment (local, huggingface space, k8s etc.)
"""
pipeline_args = {"enable_cache": not no_cache}
embeddings_finetune_args = {
Expand Down Expand Up @@ -259,9 +268,18 @@ def main(
)()

elif pipeline == "deploy":
#rag_deployment.with_options(model=zenml_model, **pipeline_args)()
local_deployment.with_options(model=zenml_model, **pipeline_args)()

if env == "local":
local_deployment.with_options(
model=zenml_model, config_path=config_path, **pipeline_args
)()
elif env == "huggingface":
rag_deployment.with_options(
model=zenml_model, config_path=config_path, **pipeline_args
)()
elif env == "k8s":
production_deployment.with_options(
model=zenml_model, config_path=config_path, **pipeline_args
)()
elif pipeline == "evaluation":
pipeline_args["enable_cache"] = False
llm_eval.with_options(model=zenml_model, config_path=config_path)()
Expand Down
12 changes: 12 additions & 0 deletions llm-complete-guide/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,18 @@
"timeout": 300,
"concurrency": 256,
},
http={
"cors": {
"enabled": True,
"access_control_allow_origins": ["https://cloud.zenml.io"], # Add your allowed origins
"access_control_allow_methods": ["GET", "OPTIONS", "POST", "HEAD", "PUT"],
"access_control_allow_credentials": True,
"access_control_allow_headers": ["*"],
# "access_control_allow_origin_regex": "https://.*\.my_org\.com", # Optional regex
"access_control_max_age": 1200,
"access_control_expose_headers": ["Content-Length"],
}
}
)
class RAGService:
"""RAG service for generating responses using LLM and RAG."""
Expand Down
2 changes: 2 additions & 0 deletions llm-complete-guide/steps/bento_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
)
from zenml.integrations.bentoml.steps import bento_builder_step
from zenml.logger import get_logger
from zenml.orchestrators.utils import get_config_environment_vars
from zenml.utils import source_utils

logger = get_logger(__name__)
Expand Down Expand Up @@ -64,6 +65,7 @@ def bento_builder() -> (
if Client().active_stack.orchestrator.flavor == "local":
model = get_step_context().model
version_to_deploy = Model(name=model.name, version="production")
logger.info(f"Building BentoML bundle for model: {version_to_deploy.name}")
# Build the BentoML bundle
bento = bentos.build(
service="service.py:RAGService",
Expand Down
9 changes: 4 additions & 5 deletions llm-complete-guide/steps/bento_dockerizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

logger = get_logger(__name__)

@step
@step(enable_cache=False)
def bento_dockerizer() -> (
Annotated[
str,
Expand All @@ -40,12 +40,11 @@ def bento_dockerizer() -> (
This step is responsible for dockerizing the BentoML model.
"""
### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ###
zenml_client = Client()
model = get_step_context().model
version_to_deploy = Model(name=model.name, version="production")
bentoml_deployment = version_to_deploy.get_model_artifact(name="bentoml_rag_deployment")
version_to_deploy = Model(name=model.name)
bentoml_deployment = zenml_client.get_artifact_version(name_id_or_prefix="bentoml_rag_deployment")
bento_tag = f'{bentoml_deployment.run_metadata["bento_tag_name"]}:{bentoml_deployment.run_metadata["bento_info_version"]}'

zenml_client = Client()
container_registry = zenml_client.active_stack.container_registry
assert container_registry, "Container registry is not configured."
image_name = f"{container_registry.config.uri}/{bento_tag}"
Expand Down
61 changes: 57 additions & 4 deletions llm-complete-guide/steps/k8s_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,21 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing
# permissions and limitations under the License.
from pathlib import Path
from typing import Dict, Optional
import re
from pathlib import Path
from typing import Dict, Optional, cast

import yaml
from kubernetes import client, config
from kubernetes.client.rest import ApiException
from zenml import get_step_context, step
from zenml.client import Client
from zenml.integrations.bentoml.services.bentoml_local_deployment import (
BentoMLLocalDeploymentConfig,
BentoMLLocalDeploymentService,
)
from zenml.logger import get_logger
from zenml.orchestrators.utils import get_config_environment_vars

logger = get_logger(__name__)

Expand Down Expand Up @@ -93,7 +99,7 @@ def apply_kubernetes_configuration(k8s_configs: list) -> None:
logger.error(f"Error applying {kind} {name}: {e}")
raise e

@step
@step(enable_cache=False)
def k8s_deployment(
docker_image_tag: str,
namespace: str = "default"
Expand All @@ -103,6 +109,17 @@ def k8s_deployment(
# Sanitize the model name
model_name = sanitize_name(raw_model_name)

# Get environment variables
environment_vars = get_config_environment_vars()

# Get current deployment
zenml_client = Client()
model_deployer = zenml_client.active_stack.model_deployer
services = model_deployer.find_model_server(
model_name=model_name,
model_version="production",
)

# Read the K8s template
template_path = Path(__file__).parent / "k8s_template.yaml"
with open(template_path, "r") as f:
Expand All @@ -120,6 +137,23 @@ def k8s_deployment(
if config["kind"] == "Service":
# Update service selector
config["spec"]["selector"]["app"] = model_name

# Update metadata annotations with SSL certificate ARN
config["metadata"]["annotations"] = {
"service.beta.kubernetes.io/aws-load-balancer-ssl-cert": "arn:aws:acm:eu-central-1:339712793861:certificate/0426ace8-5fa3-40dd-bd81-b0fb1064bd85",
"service.beta.kubernetes.io/aws-load-balancer-backend-protocol": "http",
"service.beta.kubernetes.io/aws-load-balancer-ssl-ports": "443",
"service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout": "3600"
}

# Update ports
config["spec"]["ports"] = [
{
"name": "https",
"port": 443,
"targetPort": 3000
}
]

elif config["kind"] == "Deployment":
# Update deployment selector and template
Expand All @@ -131,6 +165,12 @@ def k8s_deployment(
for container in containers:
container["name"] = model_name
container["image"] = docker_image_tag

# Add environment variables to the container
env_vars = []
for key, value in environment_vars.items():
env_vars.append({"name": key, "value": value})
container["env"] = env_vars

# Apply the configurations
try:
Expand All @@ -149,9 +189,22 @@ def k8s_deployment(
"namespace": namespace,
"status": deployment_status,
"service_port": 3000,
"configurations": k8s_configs
"configurations": k8s_configs,
"url": "chat-rag.staging.cloudinfra.zenml.io"
}

if services:
bentoml_deployment= cast(BentoMLLocalDeploymentService, services[0])
zenml_client.update_service(
id=bentoml_deployment.uuid,
prediction_url="https://chat-rag.staging.cloudinfra.zenml.io",
health_check_url="https://chat-rag.staging.cloudinfra.zenml.io/healthz",
labels={
"docker_image": docker_image_tag,
"namespace": namespace,
}
)

return deployment_info


Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
apiVersion: v1
kind: Service
metadata:
name: placeholder
labels:
app: placeholder
name: placeholder
annotations:
service.beta.kubernetes.io/aws-load-balancer-ssl-cert: arn:aws:acm:region:account-id:certificate/certificate-id
service.beta.kubernetes.io/aws-load-balancer-backend-protocol: http
service.beta.kubernetes.io/aws-load-balancer-ssl-ports: "443"
service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "3600"
spec:
ports:
- name: http # Changed from 'predict' to 'http' for clarity
port: 80 # External port exposed by LoadBalancer
targetPort: 3000 # Internal container port
selector:
app: placeholder
type: LoadBalancer
ports:
- name: https
port: 443 # External port exposed by LoadBalancer (HTTPS)
targetPort: 3000 # Internal container port
---
apiVersion: apps/v1
kind: Deployment
Expand Down
Loading

0 comments on commit dfe2f01

Please sign in to comment.