Merge pull request #82 from kookmin-sw/jihun

Diffusion Inference Template Code 제작
kookmin-sw · May 19, 2024 · 463af5c · 463af5c
2 parents d4ecf6b + 9233500
commit 463af5c
Show file tree

Hide file tree

Showing 12 changed files with 406 additions and 6 deletions.
diff --git a/automation/deploy_streamlit/stable_diffusion/image-generator-SDXL1.py b/automation/deploy_streamlit/stable_diffusion/image-generator-SDXL1.py
@@ -7,12 +7,9 @@
 
 endpoint = os.environ.get("ENDPOINT_URL")
 
-def create_request_body(text, extras=None):
-    if extras is None:
-        extras = {}
-
+def create_request_body(text):
     body = {
-        "text_prompts": [{"text": text}],
+        "prompt": text,
     }
     return body
 
@@ -22,7 +19,6 @@ def image_generate(endpoint, prompt):
         headers = {"Content-Type": "application/json"}
         request_body = create_request_body(
             text=prompt,
-            extras={}
         )
 
         res = requests.post(endpoint, data=json.dumps(request_body), headers=headers)

diff --git a/automation/diffusion_inference_deploy/.gitignore b/automation/diffusion_inference_deploy/.gitignore
@@ -0,0 +1,4 @@
+push_aws_ecr.sh
+ecr_login.sh
+get_kubeconfig.sh
+*test*
diff --git a/automation/diffusion_inference_deploy/Dockerfile b/automation/diffusion_inference_deploy/Dockerfile
@@ -0,0 +1,13 @@
+FROM public.ecr.aws/lambda/python:3.11
+
+RUN pip install awscli requests --no-cache-dir
+
+# x86_64
+RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \
+    && chmod +x ./kubectl
+
+COPY main.py ${LAMBDA_TASK_ROOT}
+
+RUN chmod +x /var/task
+
+CMD ["main.handler"]
diff --git a/automation/diffusion_inference_deploy/IaC/.gitignore b/automation/diffusion_inference_deploy/IaC/.gitignore
@@ -0,0 +1 @@
+var.tf
diff --git a/automation/diffusion_inference_deploy/IaC/main.tf b/automation/diffusion_inference_deploy/IaC/main.tf
@@ -0,0 +1,35 @@
+# prefix, container_repository 변경 (대채적인 이름)
+
+module "diffusion_inference_deploy" {
+  source                     = "github.com/kookmin-sw/capstone-2024-12//IaC/serverless_api_template"
+  prefix                     = "diffusion-inference-deploy"
+  container_registry         = "694448341573.dkr.ecr.ap-northeast-2.amazonaws.com"
+  container_repository       = "diffusion-inference-deploy"
+  container_image_tag        = "latest"
+  lambda_ram_size            = 2048
+  attach_s3_policy           = true
+  attach_ec2_policy          = true
+  attach_eks_policy          = true
+  attach_ssm_readonly_policy = true
+  region_name = var.region
+  eks_cluster_name = var.eks_cluster_name
+  db_api_url = var.db_api_url
+}
+
+output "diffusion_inference_deploy_function_url" {
+  value = module.diffusion_inference_deploy.function_url
+}
+
+provider "aws" {
+  region  = var.region
+  profile = var.awscli_profile
+}
+
+terraform {
+  backend "s3" {
+    bucket  = "sskai-terraform-state"
+    key     = "diffusion_inference_deploy/tf.state"
+    region  = "ap-northeast-2"
+    encrypt = true
+  }
+}
diff --git a/automation/diffusion_inference_deploy/IaC/var.tf.sample b/automation/diffusion_inference_deploy/IaC/var.tf.sample
@@ -0,0 +1,19 @@
+variable "region" {
+  type    = string
+  default = "ap-northeast-2"
+}
+
+variable "awscli_profile" {
+  type    = string
+  default = ""
+}
+
+variable "eks_cluster_name" {
+    type = string
+    default = ""
+}
+
+variable "db_api_url" {
+    type = string
+    default = "" 
+}
diff --git a/automation/diffusion_inference_deploy/main.py b/automation/diffusion_inference_deploy/main.py
@@ -0,0 +1,182 @@
+import subprocess
+import requests
+import os
+import json
+import time
+
+kubectl = '/var/task/kubectl'
+kubeconfig = '/tmp/kubeconfig'
+
+eks_cluster_name = os.getenv('EKS_CLUSTER_NAME')
+region = os.getenv("REGION")
+db_api_url = os.getenv("DB_API_URL")
+ecr_uri = os.getenv("ECR_URI")
+
+# get eks cluster kubernetes configuration by aws cli
+result_get_kubeconfig = subprocess.run([
+    "aws", "eks", "update-kubeconfig",
+    "--name", eks_cluster_name,
+    "--region", region,
+    "--kubeconfig", kubeconfig
+])
+
+def generate_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name):
+    content = f"""---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: {user_namespace}
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  namespace: {user_namespace}
+  name: deployment-{endpoint_uid}
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: app-{endpoint_uid}
+  replicas: 2
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: app-{endpoint_uid}
+    spec:
+      containers:
+      - image: {ecr_uri}/diffusion-inference:latest
+        imagePullPolicy: Always
+        name: app-{endpoint_uid}
+        ports:
+        - containerPort: 8080
+        env:
+        - name: MODEL_S3_URL
+          value: {model_s3_url}
+        resources:
+            requests:
+                cpu: 3700m
+                memory: 30720M
+                nvidia.com/gpu: 1
+            limits:
+                cpu: 3700m
+                memory: 30720M
+                nvidia.com/gpu: 1
+      nodeSelector:
+        karpenter.sh/nodepool: {node_pool_name}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  namespace: {user_namespace}
+  name: service-{endpoint_uid}
+spec:
+  ports:
+    - port: 8080
+      targetPort: 8080
+      protocol: TCP
+  type: ClusterIP
+  selector:
+    app.kubernetes.io/name: app-{endpoint_uid}
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  namespace: {user_namespace}
+  name: ingress-{endpoint_uid}
+  annotations:
+    alb.ingress.kubernetes.io/scheme: internet-facing
+    alb.ingress.kubernetes.io/target-type: ip
+    alb.ingress.kubernetes.io/group.name: "{user_namespace}"
+spec:
+  ingressClassName: alb
+  rules:
+    - http:
+        paths:
+        - path: /{endpoint_uid}
+          pathType: Prefix
+          backend:
+            service:
+              name: service-{endpoint_uid}
+              port:
+                number: 8080
+"""
+
+    filepath = f"/tmp/{endpoint_uid}.yaml"
+    with open(filepath, 'w') as f:
+        f.write(content)
+
+    return filepath
+
+def apply_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name):
+    filename = generate_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name)
+    result = subprocess.run([
+        kubectl, "apply", "-f", filename, "--kubeconfig", kubeconfig
+    ])
+    if result.returncode != 0: print("create resource returncode != 0")
+    return result.returncode
+
+def delete_resource(user_namespace, endpoint_uid):
+    deployment_name = f"deployment-{endpoint_uid}"
+    service_name = f"service-{endpoint_uid}"
+    ingress_name = f"ingress-{endpoint_uid}"
+    ingress_result = subprocess.run([
+        kubectl, "-n", user_namespace, "delete",  "ingress", ingress_name, "--kubeconfig", kubeconfig
+    ])
+    service_result = subprocess.run([
+        kubectl, "-n", user_namespace, "delete",  "service", service_name, "--kubeconfig", kubeconfig
+    ])
+    deployment_result = subprocess.run([
+        kubectl, "-n", user_namespace, "delete",  "deployment", deployment_name, "--kubeconfig", kubeconfig
+    ])
+    result = 0
+    if ingress_result.returncode != 0 or service_result.returncode != 0 or deployment_result.returncode != 0:
+        result = 1
+        print("delete resource returncode != 0")
+    return result
+
+def handler(event, context):
+    body = json.loads(event.get("body", "{}"))
+    user_uid = body.get("user").lower()
+    endpoint_uid = body.get("uid").lower()
+    action = body.get("action")
+
+    if action == "create":
+        model_s3_url = body['model']['s3_url']
+        node_pool_name = "nodepool-1"
+        result = apply_yaml(user_uid, endpoint_uid, model_s3_url, node_pool_name)
+
+        cmd = "{} get ingress -A --kubeconfig {} | grep {}".format(kubectl, kubeconfig, endpoint_uid)
+        time.sleep(10)
+        endpoint_url = subprocess.run(cmd, capture_output=True, shell=True).stdout.decode('utf-8').strip().split()[4]
+        print(f"endpoint_url: {endpoint_url}")
+        update_data = {
+            "endpoint": f"http://{endpoint_url}/{endpoint_uid}"
+        }
+        response = requests.put(url=f"{db_api_url}/inferences/{endpoint_uid}", json=update_data)
+        if result == 0:
+            return {
+                'statusCode': 200,
+                'body': "complete create inference endpoint"
+            }  
+        else:
+            return {
+                'statusCode': 500,
+                'body': "error with create inference endpoint"
+            }
+    elif action == "delete":
+        result = delete_resource(user_uid, endpoint_uid)
+        if result == 0:
+            requests.delete(url=f"{db_api_url}/inferences/{endpoint_uid}")
+            return {
+                'statusCode': 200,
+                'body': "complete delete inference deployment"
+            }
+        else:
+            return {
+                'statusCode': 500,
+                'body': "error with delete inference endpoint"
+            }
+    else:
+        return {
+            'statusCode': 500,
+            'body': "invalid action"
+        }
diff --git a/automation/diffusion_inference_deploy/push_aws_ecr.sh.sample b/automation/diffusion_inference_deploy/push_aws_ecr.sh.sample
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+ECR_URI=""
+
+aws ecr get-login-password --region ap-northeast-2 | docker login --username AWS --password-stdin $ECR_URI
+docker build -t $ECR_URI/diffusion-inference-deploy:latest .
+docker push $ECR_URI/diffusion-inference-deploy:latest
diff --git a/inference/template_code/diffusion/Dockerfile.kubernetes_gpu b/inference/template_code/diffusion/Dockerfile.kubernetes_gpu
@@ -0,0 +1,15 @@
+FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
+
+WORKDIR /app
+
+COPY requirements_kubernetes_gpu.txt /app/requirements.txt
+
+RUN pip3 install --no-cache-dir -r requirements.txt
+
+RUN apt-get update && apt-get install -y apt-utils wget unzip && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+COPY kubernetes_app_diffusion.py /app/app.py
+
+CMD [ "python3", "/app/app.py" ]
+
+EXPOSE 8080