From f5bd43d73ee4dc420d0f492e9e7edccdb17e3779 Mon Sep 17 00:00:00 2001
From: robotrapta <79607467+robotrapta@users.noreply.github.com>
Date: Sun, 27 Oct 2024 10:54:21 -0700
Subject: [PATCH] Rewording things so we use "cluster" in the canonical sense
 of the word (#118)

Before this, our docs & code would use the word "cluster" to sometimes
mean our application, sometimes mean the namespace, and sometimes to
actually mean the k8 cluster. I think now whenever we use the word
cluster we're doing so in a way that would make sense to anybody versed
with k8.
---
 .github/workflows/pipeline.yaml              |  2 +-
 Dockerfile                                   |  2 +-
 INSTALL-JETSON.md                            |  4 +--
 app/core/kubernetes_management.py            |  4 +--
 balena.yml                                   |  3 +--
 deploy/README.md                             | 27 ++++++++++----------
 deploy/balena-k3s/README.md                  |  4 +--
 deploy/balena-k3s/bastion/Dockerfile         |  4 +--
 deploy/bin/install-k3s-nvidia.sh             |  1 +
 deploy/bin/{cluster_setup.sh => setup-ee.sh} |  2 +-
 load-testing/README.md                       |  2 +-
 11 files changed, 27 insertions(+), 28 deletions(-)
 rename deploy/bin/{cluster_setup.sh => setup-ee.sh} (99%)

diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml
index e9303865..a57ba03c 100644
--- a/.github/workflows/pipeline.yaml
+++ b/.github/workflows/pipeline.yaml
@@ -241,7 +241,7 @@ jobs:
           # source test/setup_inference_test_env.sh
           export INFERENCE_FLAVOR="CPU"
           export DEPLOYMENT_NAMESPACE="default"
-          bash deploy/bin/cluster_setup.sh
+          bash deploy/bin/setup-ee.sh
           kubectl describe deployment
 
       - name: Wait for edge-endpoint pod to be ready
diff --git a/Dockerfile b/Dockerfile
index d0d45dd4..ab4de00b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -56,7 +56,7 @@ RUN mkdir /etc/groundlight/edge-config && \
     mkdir /etc/groundlight/inference-deployment
 
 # Adding this here for testing purposes. In production, this will be mounted as persistent
-# volume in the kubernetes cluster.
+# volume in kubernetes 
 RUN mkdir -p /opt/groundlight/edge/sqlite
 
 # Copy configs
diff --git a/INSTALL-JETSON.md b/INSTALL-JETSON.md
index aadc37fd..d0c6efe4 100644
--- a/INSTALL-JETSON.md
+++ b/INSTALL-JETSON.md
@@ -30,10 +30,10 @@ aws sts get-caller-identity
 ```
 
 
-5) Setup the cluster.
+5) Setup the edge endpoint.
 
 ```
-./deploy/bin/cluster_setup.sh
+./deploy/bin/setup-ee.sh
 ```
 
 6) Figure out the URL of the edge endpoint.
diff --git a/app/core/kubernetes_management.py b/app/core/kubernetes_management.py
index 465863c7..b887fb77 100644
--- a/app/core/kubernetes_management.py
+++ b/app/core/kubernetes_management.py
@@ -49,7 +49,7 @@ def _load_inference_deployment_template(self) -> str:
 
     def _create_from_kube_manifest(self, namespace: str, manifest: str) -> None:
         """
-        Applies manifest to the kubernetes cluster. This is not blocking since the kubernetes API
+        Applies manifest to the kubernetes namespace. This is not blocking since the kubernetes API
         creates deployments and services asynchronously.
         """
         logger.debug(f"Applying kubernetes manifest to namespace `{namespace}`...")
@@ -84,7 +84,7 @@ def create_inference_deployment(self, detector_id: str) -> None:
 
         This method substitutes placeholders in the inference deployment template
         with the provided detector ID, service name, and deployment name, and then
-        applies the manifest to the Kubernetes cluster.
+        applies the manifest to the Kubernetes namespace.
 
         Args:
             detector_id (str): The unique identifier for the detector for which
diff --git a/balena.yml b/balena.yml
index 6a78f3e0..5b5255d4 100644
--- a/balena.yml
+++ b/balena.yml
@@ -3,5 +3,4 @@ name: balena-k3s-edge-endpoint
 type: sw.application
 version: 0.2.8
 description: >-
-  Run the edge-endpoint on top of k3s on Balena. The inference-server
-  will also be deployed on the same k3s cluster.
\ No newline at end of file
+  Run the edge-endpoint inside k3s in Balena.
\ No newline at end of file
diff --git a/deploy/README.md b/deploy/README.md
index 86e2bcae..c820defc 100644
--- a/deploy/README.md
+++ b/deploy/README.md
@@ -1,24 +1,25 @@
 
 # Setting up the Edge Endpoint
 
-The edge endpoint is run as a k3s deployment. Follow the steps below to get it set up.
+The edge endpoint runs under kubernetes, typically on a single-node cluster, which could be just a raspberry pi, or a powerful GPU server.  But if you have a lot of detectors to run locally, it will scale out to a large multi-node cluster as well with basically zero changes except to the k8 cluster setup. 
 
-## Starting the k3s Cluster
+The instructions below are fairly opinionated, optimized for single-node cluster setup, using k3s, on an Ubuntu/Debian-based system.  If you want to set it up with a different flavor of kubernetes, that should work, but you'll have to figure out how to do that yourself.
 
-If you don't have [k3s](https://docs.k3s.io/) installed, go ahead and install it by running
+## Setting up Single-Node Kubernetes with k3s
+
+If you don't have [k3s](https://docs.k3s.io/) installed, there are two scripts which can install it depending on whether you have a CUDA GPU or not.  If you don't set up a GPU, the models will run more slowly on CPU.
 
 ```shell
-> ./deploy/bin/install-k3s.sh
+./deploy/bin/install-k3s.sh
+# or to install on a GPU system
+./deploy/bin/install-k3s-nvidia.sh
 ```
 
-
-If you intend to run edge inference, make sure to add the detector ID's to the
-[edge config file](../configs/edge-config.yaml). Adding detector ID's to the config file will cause
-inference pods to be initialized automatically for each detector. Even if they aren't configured in the config file,
+You might want to customize the [edge config file](../configs/edge-config.yaml) to include the detector ID's you want to run.  Adding detector ID's to the config file will cause inference pods to be initialized automatically for each detector. Even if they aren't configured in the config file,
 edge inference will be set up for each detector ID for which the Groundlight service receives requests (note that it
 takes some time for each inference pod to become available for the first time).
 
-Before starting the cluster, you need to create/specify the namespace for the deployment. If you're creating a new one, run:
+Before installing the edge-endpoint, you need to create/specify the namespace for the deployment. If you're creating a new one, run:
 
 ```
 kubectl create namespace "your-namespace-name"
@@ -43,13 +44,12 @@ export INFERENCE_FLAVOR="CPU"
 
 You'll also need to configure your AWS credentials using `aws configure` to include credentials that have permissions to pull from the appropriate ECR location (if you don't already have the AWS CLI installed, refer to the instructions [here](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html)).
 
-To start the cluster, run
+To install the edge-endpoint, run:
 ```shell
-./deploy/bin/cluster_setup.sh
+./deploy/bin/setup-ee.sh
 ```
 
-This will create the edge-endpoint deployment with two containers: one for the edge logic and another one for creating/updating inference
-deployments. After a while you should be able to see something like this if you run `kubectl get pods`:
+This will create the edge-endpoint deployment which is the both the SDK proxy and coordination service. After a while you should be able to see something like this if you run `kubectl get pods`:
 
 ```
 NAME                                    READY   STATUS    RESTARTS   AGE
@@ -64,7 +64,6 @@ edge-endpoint-594d645588-5mf28                                    2/2     Runnin
 inferencemodel-det-3jemxiunjuekdjzbuxavuevw15k-5d8b454bcb-xqf8m   1/1     Running   0          2s
 ```
 
-
 We currently have a hard-coded docker image from ECR in the [edge-endpoint](/edge-endpoint/deploy/k3s/edge_deployment.yaml)
 deployment. If you want to make modifications to the edge endpoint code and push a different
 image to ECR see [Pushing/Pulling Images from ECR](#pushingpulling-images-from-elastic-container-registry-ecr).
diff --git a/deploy/balena-k3s/README.md b/deploy/balena-k3s/README.md
index e779db93..2f7d1ce5 100644
--- a/deploy/balena-k3s/README.md
+++ b/deploy/balena-k3s/README.md
@@ -10,7 +10,7 @@ balena push <your-fleet>
 ```
 This will build and push two "services" to the edge devices in your chosen fleet. The first is a [k3s server](https://docs.k3s.io/architecture) named `server`, which effectively acts as our k3s cluster node. The second is the `bastion` service, from which a user can access the k3s cluster (e.g. by running `kubectl get nodes`). The `bastion` service also contains a copy of this repo at `/app/edge-endpoint`.
 
-Now, we have our k3s cluster built and running, but we have not started our edge deployment.
+Now, we have our k3s single-node cluster built and running, but we have not started our edge deployment.
 
 Configure the following variables via the `<fleet>/Variables` or `<device>/Device Variables` interfaces on the BalenaCloud dashboard:
 ```
@@ -26,5 +26,5 @@ Dockerfile will automatically run the following command as `bastion` launches so
 
 ```bash
 cd /app/edge-endpoint
-INFERENCE_FLAVOR="CPU" DEPLOYMENT_NAMESPACE="default" ./deploy/bin/cluster_setup.sh
+INFERENCE_FLAVOR="CPU" DEPLOYMENT_NAMESPACE="default" ./deploy/bin/setup-ee.sh
 ```
diff --git a/deploy/balena-k3s/bastion/Dockerfile b/deploy/balena-k3s/bastion/Dockerfile
index 3a6e638c..dd254573 100644
--- a/deploy/balena-k3s/bastion/Dockerfile
+++ b/deploy/balena-k3s/bastion/Dockerfile
@@ -55,7 +55,7 @@ RUN mkdir -p /app/edge-endpoint
 COPY . /app/edge-endpoint
 
 RUN echo "source /app/edge-endpoint/deploy/balena-k3s/bastion/src/kube-bash.sh" >> /root/.bashrc
-RUN chmod +x ./edge-endpoint/deploy/bin/cluster_setup.sh
+RUN chmod +x ./edge-endpoint/deploy/bin/setup-ee.sh
 
 ENTRYPOINT []
-CMD ["/bin/sh", "-c", "./edge-endpoint/deploy/bin/cluster_setup.sh && tail -f /dev/null"]
+CMD ["/bin/sh", "-c", "./edge-endpoint/deploy/bin/setup-ee.sh && tail -f /dev/null"]
diff --git a/deploy/bin/install-k3s-nvidia.sh b/deploy/bin/install-k3s-nvidia.sh
index 1a76cc36..f1f7150a 100755
--- a/deploy/bin/install-k3s-nvidia.sh
+++ b/deploy/bin/install-k3s-nvidia.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 
 # Install k3s and configure GPU support
+# This does the GPU stuff, but calls the other install-k3s.sh script to do the rest.
 # Tested on an AWS EC2 G4 instance using the following AMI:
 # Deep Learning OSS Nvidia Driver AMI GPU PyTorch 2.3.0 (Ubuntu 20.04) 20240825
 
diff --git a/deploy/bin/cluster_setup.sh b/deploy/bin/setup-ee.sh
similarity index 99%
rename from deploy/bin/cluster_setup.sh
rename to deploy/bin/setup-ee.sh
index 1b1a77f0..f5bda298 100755
--- a/deploy/bin/cluster_setup.sh
+++ b/deploy/bin/setup-ee.sh
@@ -2,7 +2,7 @@
 
 # Usage:
 # Execute the script using the following command:
-# ./deploy/bin/cluster_setup.sh
+# ./deploy/bin/setup-ee.sh
 #
 # Environment Variables:
 # - KUBECTL_CMD: Specifies the path to the kubectl command. Defaults to "kubectl". If using k3s, set to "k3s kubectl".
diff --git a/load-testing/README.md b/load-testing/README.md
index 5f51c7a3..16ca9a4d 100644
--- a/load-testing/README.md
+++ b/load-testing/README.md
@@ -31,7 +31,7 @@ command:
 
 Some trial and error will be necessary to figure out the ideal configuration. For reference: on a machine with 32 CPU cores and 126 G of RAM, and a RTX 3090 GPU with 24 Gi of RAM, setting edge-endpoint proxy workers to 128 and inference server workers to 61 was able to run successfully. 
 
-After setting these config options, you should run/re-run the [cluster setup script](/deploy/bin/cluster_setup.sh) to deploy with your new configuration. You can monitor the inference pod's logs to see when all of the workers have finished starting up (if the number of workers is high, this will likely be after the pod reports being ready). 
+After setting these config options, you should run/re-run the [setup edge endpoint script](/deploy/bin/setup-ee.sh) to deploy with your new configuration. You can monitor the inference pod's logs to see when all of the workers have finished starting up (if the number of workers is high, this will likely be after the pod reports being ready). 
 
 ### Configuring the load testing scripts