diff --git a/deploy/README.md b/deploy/README.md index 81e7a80c..4b6e09a6 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -38,34 +38,34 @@ export GROUNDLIGHT_API_TOKEN="api_xxxxxx" # Choose an inference flavor, either CPU or (default) GPU. # Note that appropriate setup for GPU may need to be done separately. export INFERENCE_FLAVOR="CPU" -# export INFERENCE_FLAVOR = "GPU" +# export INFERENCE_FLAVOR="GPU" ``` You'll also need to configure your AWS credentials using `aws configure` to include credentials that have permissions to pull from the appropriate ECR location (if you don't already have the AWS CLI installed, refer to the instructions [here](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html)). To start the cluster, run ```shell -> ./deploy/bin/cluster_setup.sh +./deploy/bin/cluster_setup.sh ``` Sometimes it might be desirable to reset all database tables(i.e., delete all existing data) for a fresh start. In that case, you will need to start the cluster with an extra argument: ```shell -> ./deploy/bin/cluster_setup.sh db_reset +./deploy/bin/cluster_setup.sh db_reset ``` This will create the edge-endpoint deployment with two containers: one for the edge logic and another one for creating/updating inference deployments. After a while you should be able to see something like this if you run `kubectl get pods`: -```shell +``` NAME READY STATUS RESTARTS AGE edge-endpoint-594d645588-5mf28 2/2 Running 0 4s ``` If you added detectors to the [edge config file](../configs/edge-config.yaml), you should also see a pod for each of them, e.g.: -```shell +``` NAME READY STATUS RESTARTS AGE edge-endpoint-594d645588-5mf28 2/2 Running 0 4s inferencemodel-det-3jemxiunjuekdjzbuxavuevw15k-5d8b454bcb-xqf8m 1/1 Running 0 2s diff --git a/deploy/bin/cluster_setup.sh b/deploy/bin/cluster_setup.sh index 1d8148c0..7f660464 100755 --- a/deploy/bin/cluster_setup.sh +++ b/deploy/bin/cluster_setup.sh @@ -184,6 +184,7 @@ envsubst < deploy/k3s/service_account.yaml > deploy/k3s/service_account.yaml.tmp $K apply -f deploy/k3s/service_account.yaml.tmp rm deploy/k3s/service_account.yaml.tmp +$K apply -f deploy/k3s/inference_deployment/warmup_inference_model.yaml $K apply -f deploy/k3s/edge_deployment/edge_deployment.yaml $K describe deployment edge-endpoint \ No newline at end of file diff --git a/deploy/bin/make-aws-secret.sh b/deploy/bin/make-aws-secret.sh index 8ed60896..f29bf3bd 100755 --- a/deploy/bin/make-aws-secret.sh +++ b/deploy/bin/make-aws-secret.sh @@ -21,6 +21,7 @@ fi $K delete --ignore-not-found secret registry-credentials $K delete --ignore-not-found secret aws-credentials +# NOTE: these credentials seem to be expiring, causing problems later. PASSWORD=$(aws ecr get-login-password --region us-west-2) $K create secret docker-registry registry-credentials \ --docker-server=767397850842.dkr.ecr.us-west-2.amazonaws.com \ diff --git a/deploy/k3s/inference_deployment/inference_deployment_template.yaml b/deploy/k3s/inference_deployment/inference_deployment_template.yaml index 3721f044..96084a5f 100644 --- a/deploy/k3s/inference_deployment/inference_deployment_template.yaml +++ b/deploy/k3s/inference_deployment/inference_deployment_template.yaml @@ -41,6 +41,8 @@ spec: maxUnavailable: 0 # Aim for no downtime during rollout initContainers: + # NOTE: the sync-pinamod container is duplicated in the warmup_inference_model.yaml Job + # TODO: refactor to share code between the Job and the initContainer in the Deployment - name: sync-pinamod image: amazon/aws-cli:latest # Sync models from S3 to the local hostmapped filesystem. diff --git a/deploy/k3s/inference_deployment/warmup_inference_model.yaml b/deploy/k3s/inference_deployment/warmup_inference_model.yaml new file mode 100644 index 00000000..caf81a6b --- /dev/null +++ b/deploy/k3s/inference_deployment/warmup_inference_model.yaml @@ -0,0 +1,40 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: warmup-inference-model +spec: + template: + spec: + restartPolicy: OnFailure + imagePullSecrets: + - name: registry-credentials + containers: + - name: image-puller + image: 767397850842.dkr.ecr.us-west-2.amazonaws.com/gl-edge-inference:latest + command: ["echo", "Pulling image to warm cache"] + - name: sync-pinamod + image: amazon/aws-cli:latest + # Sync models from S3 to the local hostmapped filesystem. + command: ['sh', '-c', 'aws s3 sync s3://pinamod-artifacts-public/pinamod $PINAMOD_DIR --delete'] + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: aws-credentials + key: aws_access_key_id + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: aws-credentials + key: aws_secret_access_key + - name: PINAMOD_DIR + value: /opt/models/pinamod + volumeMounts: + - name: pina-models + mountPath: /opt/models + volumes: + - name: pina-models + hostPath: + path: /opt/groundlight/edge/pinamod-public + type: Directory + backoffLimit: 2 \ No newline at end of file