Trying to prewarm inference image (#102)

Loads the inference server docker image at cluster start - to save time and confusion.
groundlight · Oct 9, 2024 · 682d849 · 682d849
1 parent ceaf596
commit 682d849
Show file tree

Hide file tree

Showing 5 changed files with 49 additions and 5 deletions.
diff --git a/deploy/README.md b/deploy/README.md
@@ -38,34 +38,34 @@ export GROUNDLIGHT_API_TOKEN="api_xxxxxx"
 # Choose an inference flavor, either CPU or (default) GPU.
 # Note that appropriate setup for GPU may need to be done separately.
 export INFERENCE_FLAVOR="CPU"
-# export INFERENCE_FLAVOR = "GPU"
+# export INFERENCE_FLAVOR="GPU"
 ```
 
 You'll also need to configure your AWS credentials using `aws configure` to include credentials that have permissions to pull from the appropriate ECR location (if you don't already have the AWS CLI installed, refer to the instructions [here](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html)).
 
 To start the cluster, run 
 ```shell 
-> ./deploy/bin/cluster_setup.sh
+./deploy/bin/cluster_setup.sh
 ```
 
 Sometimes it might be desirable to reset all database tables(i.e., delete all existing data) for a fresh start. In that case, 
 you will need to start the cluster with an extra argument:
 
 ```shell
-> ./deploy/bin/cluster_setup.sh db_reset
+./deploy/bin/cluster_setup.sh db_reset
 ```
 
 This will create the edge-endpoint deployment with two containers: one for the edge logic and another one for creating/updating inference
 deployments. After a while you should be able to see something like this if you run `kubectl get pods`:
 
-```shell
+```
 NAME                                    READY   STATUS    RESTARTS   AGE
 edge-endpoint-594d645588-5mf28          2/2     Running   0          4s
 ```
 
 If you added detectors to the [edge config file](../configs/edge-config.yaml), you should also see a pod for each of them, e.g.:
 
-```shell
+```
 NAME                                                              READY   STATUS    RESTARTS   AGE
 edge-endpoint-594d645588-5mf28                                    2/2     Running   0          4s
 inferencemodel-det-3jemxiunjuekdjzbuxavuevw15k-5d8b454bcb-xqf8m   1/1     Running   0          2s

diff --git a/deploy/bin/cluster_setup.sh b/deploy/bin/cluster_setup.sh
@@ -184,6 +184,7 @@ envsubst < deploy/k3s/service_account.yaml > deploy/k3s/service_account.yaml.tmp
 $K apply -f deploy/k3s/service_account.yaml.tmp
 rm deploy/k3s/service_account.yaml.tmp
 
+$K apply -f deploy/k3s/inference_deployment/warmup_inference_model.yaml
 $K apply -f deploy/k3s/edge_deployment/edge_deployment.yaml
 
 $K describe deployment edge-endpoint
diff --git a/deploy/bin/make-aws-secret.sh b/deploy/bin/make-aws-secret.sh
@@ -21,6 +21,7 @@ fi
 $K delete --ignore-not-found secret registry-credentials
 $K delete --ignore-not-found secret aws-credentials
 
+# NOTE: these credentials seem to be expiring, causing problems later.
 PASSWORD=$(aws ecr get-login-password --region us-west-2)
 $K create secret docker-registry registry-credentials \
     --docker-server=767397850842.dkr.ecr.us-west-2.amazonaws.com \

diff --git a/deploy/k3s/inference_deployment/inference_deployment_template.yaml b/deploy/k3s/inference_deployment/inference_deployment_template.yaml
@@ -41,6 +41,8 @@ spec:
           maxUnavailable: 0  # Aim for no downtime during rollout
 
       initContainers:
+      # NOTE: the sync-pinamod container is duplicated in the warmup_inference_model.yaml Job
+      # TODO: refactor to share code between the Job and the initContainer in the Deployment
       - name: sync-pinamod
         image: amazon/aws-cli:latest
         # Sync models from S3 to the local hostmapped filesystem.

diff --git a/deploy/k3s/inference_deployment/warmup_inference_model.yaml b/deploy/k3s/inference_deployment/warmup_inference_model.yaml
@@ -0,0 +1,40 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: warmup-inference-model
+spec:
+  template:
+    spec:
+      restartPolicy: OnFailure
+      imagePullSecrets:
+      - name: registry-credentials
+      containers:
+      - name: image-puller
+        image: 767397850842.dkr.ecr.us-west-2.amazonaws.com/gl-edge-inference:latest
+        command: ["echo", "Pulling image to warm cache"]
+      - name: sync-pinamod
+        image: amazon/aws-cli:latest
+        # Sync models from S3 to the local hostmapped filesystem.
+        command: ['sh', '-c', 'aws s3 sync s3://pinamod-artifacts-public/pinamod $PINAMOD_DIR --delete']
+        env:
+        - name: AWS_ACCESS_KEY_ID
+          valueFrom:
+            secretKeyRef:
+              name: aws-credentials
+              key: aws_access_key_id
+        - name: AWS_SECRET_ACCESS_KEY
+          valueFrom:
+            secretKeyRef:
+              name: aws-credentials
+              key: aws_secret_access_key
+        - name: PINAMOD_DIR
+          value: /opt/models/pinamod
+        volumeMounts:
+        - name: pina-models
+          mountPath: /opt/models
+      volumes:
+      - name: pina-models
+        hostPath:
+          path: /opt/groundlight/edge/pinamod-public
+          type: Directory
+  backoffLimit: 2