Skip to content

Commit

Permalink
Merge pull request #82 from kookmin-sw/jihun
Browse files Browse the repository at this point in the history
Diffusion Inference Template Code 제작
  • Loading branch information
mh3ong authored May 19, 2024
2 parents d4ecf6b + 9233500 commit 463af5c
Show file tree
Hide file tree
Showing 12 changed files with 406 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,9 @@

endpoint = os.environ.get("ENDPOINT_URL")

def create_request_body(text, extras=None):
if extras is None:
extras = {}

def create_request_body(text):
body = {
"text_prompts": [{"text": text}],
"prompt": text,
}
return body

Expand All @@ -22,7 +19,6 @@ def image_generate(endpoint, prompt):
headers = {"Content-Type": "application/json"}
request_body = create_request_body(
text=prompt,
extras={}
)

res = requests.post(endpoint, data=json.dumps(request_body), headers=headers)
Expand Down
4 changes: 4 additions & 0 deletions automation/diffusion_inference_deploy/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
push_aws_ecr.sh
ecr_login.sh
get_kubeconfig.sh
*test*
13 changes: 13 additions & 0 deletions automation/diffusion_inference_deploy/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM public.ecr.aws/lambda/python:3.11

RUN pip install awscli requests --no-cache-dir

# x86_64
RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \
&& chmod +x ./kubectl

COPY main.py ${LAMBDA_TASK_ROOT}

RUN chmod +x /var/task

CMD ["main.handler"]
1 change: 1 addition & 0 deletions automation/diffusion_inference_deploy/IaC/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
var.tf
35 changes: 35 additions & 0 deletions automation/diffusion_inference_deploy/IaC/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# prefix, container_repository 변경 (대채적인 이름)

module "diffusion_inference_deploy" {
source = "github.com/kookmin-sw/capstone-2024-12//IaC/serverless_api_template"
prefix = "diffusion-inference-deploy"
container_registry = "694448341573.dkr.ecr.ap-northeast-2.amazonaws.com"
container_repository = "diffusion-inference-deploy"
container_image_tag = "latest"
lambda_ram_size = 2048
attach_s3_policy = true
attach_ec2_policy = true
attach_eks_policy = true
attach_ssm_readonly_policy = true
region_name = var.region
eks_cluster_name = var.eks_cluster_name
db_api_url = var.db_api_url
}

output "diffusion_inference_deploy_function_url" {
value = module.diffusion_inference_deploy.function_url
}

provider "aws" {
region = var.region
profile = var.awscli_profile
}

terraform {
backend "s3" {
bucket = "sskai-terraform-state"
key = "diffusion_inference_deploy/tf.state"
region = "ap-northeast-2"
encrypt = true
}
}
19 changes: 19 additions & 0 deletions automation/diffusion_inference_deploy/IaC/var.tf.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
variable "region" {
type = string
default = "ap-northeast-2"
}

variable "awscli_profile" {
type = string
default = ""
}

variable "eks_cluster_name" {
type = string
default = ""
}

variable "db_api_url" {
type = string
default = ""
}
182 changes: 182 additions & 0 deletions automation/diffusion_inference_deploy/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
import subprocess
import requests
import os
import json
import time

kubectl = '/var/task/kubectl'
kubeconfig = '/tmp/kubeconfig'

eks_cluster_name = os.getenv('EKS_CLUSTER_NAME')
region = os.getenv("REGION")
db_api_url = os.getenv("DB_API_URL")
ecr_uri = os.getenv("ECR_URI")

# get eks cluster kubernetes configuration by aws cli
result_get_kubeconfig = subprocess.run([
"aws", "eks", "update-kubeconfig",
"--name", eks_cluster_name,
"--region", region,
"--kubeconfig", kubeconfig
])

def generate_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name):
content = f"""---
apiVersion: v1
kind: Namespace
metadata:
name: {user_namespace}
---
apiVersion: apps/v1
kind: Deployment
metadata:
namespace: {user_namespace}
name: deployment-{endpoint_uid}
spec:
selector:
matchLabels:
app.kubernetes.io/name: app-{endpoint_uid}
replicas: 2
template:
metadata:
labels:
app.kubernetes.io/name: app-{endpoint_uid}
spec:
containers:
- image: {ecr_uri}/diffusion-inference:latest
imagePullPolicy: Always
name: app-{endpoint_uid}
ports:
- containerPort: 8080
env:
- name: MODEL_S3_URL
value: {model_s3_url}
resources:
requests:
cpu: 3700m
memory: 30720M
nvidia.com/gpu: 1
limits:
cpu: 3700m
memory: 30720M
nvidia.com/gpu: 1
nodeSelector:
karpenter.sh/nodepool: {node_pool_name}
---
apiVersion: v1
kind: Service
metadata:
namespace: {user_namespace}
name: service-{endpoint_uid}
spec:
ports:
- port: 8080
targetPort: 8080
protocol: TCP
type: ClusterIP
selector:
app.kubernetes.io/name: app-{endpoint_uid}
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
namespace: {user_namespace}
name: ingress-{endpoint_uid}
annotations:
alb.ingress.kubernetes.io/scheme: internet-facing
alb.ingress.kubernetes.io/target-type: ip
alb.ingress.kubernetes.io/group.name: "{user_namespace}"
spec:
ingressClassName: alb
rules:
- http:
paths:
- path: /{endpoint_uid}
pathType: Prefix
backend:
service:
name: service-{endpoint_uid}
port:
number: 8080
"""

filepath = f"/tmp/{endpoint_uid}.yaml"
with open(filepath, 'w') as f:
f.write(content)

return filepath

def apply_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name):
filename = generate_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name)
result = subprocess.run([
kubectl, "apply", "-f", filename, "--kubeconfig", kubeconfig
])
if result.returncode != 0: print("create resource returncode != 0")
return result.returncode

def delete_resource(user_namespace, endpoint_uid):
deployment_name = f"deployment-{endpoint_uid}"
service_name = f"service-{endpoint_uid}"
ingress_name = f"ingress-{endpoint_uid}"
ingress_result = subprocess.run([
kubectl, "-n", user_namespace, "delete", "ingress", ingress_name, "--kubeconfig", kubeconfig
])
service_result = subprocess.run([
kubectl, "-n", user_namespace, "delete", "service", service_name, "--kubeconfig", kubeconfig
])
deployment_result = subprocess.run([
kubectl, "-n", user_namespace, "delete", "deployment", deployment_name, "--kubeconfig", kubeconfig
])
result = 0
if ingress_result.returncode != 0 or service_result.returncode != 0 or deployment_result.returncode != 0:
result = 1
print("delete resource returncode != 0")
return result

def handler(event, context):
body = json.loads(event.get("body", "{}"))
user_uid = body.get("user").lower()
endpoint_uid = body.get("uid").lower()
action = body.get("action")

if action == "create":
model_s3_url = body['model']['s3_url']
node_pool_name = "nodepool-1"
result = apply_yaml(user_uid, endpoint_uid, model_s3_url, node_pool_name)

cmd = "{} get ingress -A --kubeconfig {} | grep {}".format(kubectl, kubeconfig, endpoint_uid)
time.sleep(10)
endpoint_url = subprocess.run(cmd, capture_output=True, shell=True).stdout.decode('utf-8').strip().split()[4]
print(f"endpoint_url: {endpoint_url}")
update_data = {
"endpoint": f"http://{endpoint_url}/{endpoint_uid}"
}
response = requests.put(url=f"{db_api_url}/inferences/{endpoint_uid}", json=update_data)
if result == 0:
return {
'statusCode': 200,
'body': "complete create inference endpoint"
}
else:
return {
'statusCode': 500,
'body': "error with create inference endpoint"
}
elif action == "delete":
result = delete_resource(user_uid, endpoint_uid)
if result == 0:
requests.delete(url=f"{db_api_url}/inferences/{endpoint_uid}")
return {
'statusCode': 200,
'body': "complete delete inference deployment"
}
else:
return {
'statusCode': 500,
'body': "error with delete inference endpoint"
}
else:
return {
'statusCode': 500,
'body': "invalid action"
}
7 changes: 7 additions & 0 deletions automation/diffusion_inference_deploy/push_aws_ecr.sh.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/sh

ECR_URI=""

aws ecr get-login-password --region ap-northeast-2 | docker login --username AWS --password-stdin $ECR_URI
docker build -t $ECR_URI/diffusion-inference-deploy:latest .
docker push $ECR_URI/diffusion-inference-deploy:latest
15 changes: 15 additions & 0 deletions inference/template_code/diffusion/Dockerfile.kubernetes_gpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime

WORKDIR /app

COPY requirements_kubernetes_gpu.txt /app/requirements.txt

RUN pip3 install --no-cache-dir -r requirements.txt

RUN apt-get update && apt-get install -y apt-utils wget unzip && apt-get clean && rm -rf /var/lib/apt/lists/*

COPY kubernetes_app_diffusion.py /app/app.py

CMD [ "python3", "/app/app.py" ]

EXPOSE 8080
Loading

0 comments on commit 463af5c

Please sign in to comment.