groundlight · roxanne-o · Nov 23, 2024 · Nov 23, 2024 · Nov 23, 2024 · Nov 23, 2024
@@ -254,6 +254,7 @@ jobs:
     needs:
       - test-general-edge-endpoint
       - test-sdk
+      - test-with-k3s
       - validate-setup-ee
     runs-on: ubuntu-22.04
     steps:
@@ -276,7 +277,10 @@ jobs:
 
   update-glhub:
     if: github.ref == 'refs/heads/main'
-    needs: validate-setup-ee
+    needs: 
+      - validate-setup-ee
+      - test-sdk
+      - test-with-k3s
     runs-on: ubuntu-latest
     environment: live
 

@@ -20,7 +20,7 @@ test-all: test test-with-docker  ## Run all tests in one make command
 	@echo "All tests completed."
 
 test-with-k3s:
-	. test/setup_k3s_test_environment.sh && poetry run pytest -m live
+	. test/integration/setup_and_run_tests.sh
 
 validate-setup-ee:
 	test/validate_setup_ee.sh

@@ -152,7 +152,6 @@ if [[ "${DEPLOY_LOCAL_VERSION}" == "1" ]]; then
 
     # Use envsubst to replace the PERSISTENT_VOLUME_NAME, PERSISTENT_VOLUME_NAME in the local_persistent_volume.yaml template
     envsubst < deploy/k3s/local_persistent_volume.yaml > deploy/k3s/local_persistentvolume.yaml
-    echo $PERSISTENT_VOLUME_NAME
     $K apply -f deploy/k3s/local_persistentvolume.yaml
     rm deploy/k3s/local_persistentvolume.yaml
 

@@ -19,7 +19,9 @@
 # - name="edge_testing_det",
 # - query="Is there a dog in the image?",
 # - confidence_threshold=0.9
-DETECTOR_ID = "det_2SagpFUrs83cbMZsap5hZzRjZw4"
+
+# we use a dynamically created detector for integration tests
+DETECTOR_ID = os.getenv("DETECTOR_ID", "det_2SagpFUrs83cbMZsap5hZzRjZw4")
 
 
 @pytest.mark.live

@@ -0,0 +1,144 @@
+## The integration tests consists of a back and forth between python (which we use to create and validate
+## image queries) and bash (which we use to check deployments are properly rolled out)
+## This file contains all the modes that we use for integeration testing.
+## Modes:
+## - Create the integration test detector
+## - Submit the initial dog/cat image query to the edge, expect low confidence
+## - Train the edge model by submitting image queries to the cloud.
+## - Submit the final dog/cat image query to the edge, expect high confidence
+
+import argparse
+import random
+import time
+
+from groundlight import Groundlight, GroundlightClientError
+from model import Detector
+
+NUM_IQS_TO_IMPROVE_MODEL = 10
+ACCETABLE_TRAINED_CONFIDENCE = 0.8
-ACCETABLE_TRAINED_CONFIDENCE = 0.8
+ACCEPTABLE_TRAINED_CONFIDENCE = 0.8
-ACCETABLE_TRAINED_CONFIDENCE = 0.8
+ACCEPTABLE_TRAINED_CONFIDENCE = 0.8
+
+
+def get_groundlight():
+    try:
+        return Groundlight(endpoint="http://localhost:30107")
+    except GroundlightClientError:
+        # we use this to create a detector since we do that before setting up edge
+        # although maybe we want to be more careful here about making sure that's
+        # the case we're in
+        return Groundlight()
+
+
+gl = get_groundlight()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Submit a dog and cat image to k3s Groundlight edge-endpoint for integration tests"
+    )
+    parser.add_argument(
+        "-m",
+        "--mode",
+        type=str,
+        choices=["create_detector", "initial", "improve_model", "final"],
+        help="Mode of operation: 'initial', 'many', or 'final'",
+        required=True,
+    )
+    parser.add_argument("-d", "--detector_id", type=str, help="id of detector to use", required=False)
+    args = parser.parse_args()
+
+    detector = None
+    if args.detector_id:
+        detector = gl.get_detector(args.detector_id)
+
+    if detector is None and args.mode != "create_detector":
+        raise ValueError("You must provide detector id unless mode is create detector")
+
+    if args.mode == "create_detector":
+        detector_id = create_cat_detector()
+        print(detector_id)  # print so that the shell script can save the value
+    elif args.mode == "initial":
+        submit_initial(detector)
+    elif args.mode == "improve_model":
+        improve_model(detector)
+    elif args.mode == "final":
+        submit_final(detector)
+
+
+def create_cat_detector() -> str:
+    """Create the intial cat detector that we use for the integration tests. We create
+    a new one each time."""
+    random_number = random.randint(0, 9999)
+    detector = gl.create_detector(name=f"cat_{random_number}", query="Is this a cat?")
+    detector_id = detector.id
+    return detector_id
+
+
+def submit_initial(detector: Detector) -> str:
+    """Submit the initial dog and cat image to the edge. Since this method is called at the beginning
+    of integration tests, we expect a low confidence from the default edge model"""
+    start_time = time.time()
+    # 0.5 threshold to ensure we get a edge answer
+    iq_yes = _submit_cat(detector, confidence_threshold=0.5)
+    iq_no = _submit_dog(detector, confidence_threshold=0.5)
+    end_time = time.time()
+    print(f"Time taken to get low confidence response from edge: {end_time - start_time} seconds")
+
+    # a bit dependent on the current default model,
+    # but that one always defaults to 0.5 confidence at first.
+    assert iq_yes.result.confidence == 0.5
+    assert iq_no.result.confidence == 0.5
+
+
+def improve_model(detector: Detector):
+    """Improve the edge model by escalating to the cloud."""
+    for _ in range(NUM_IQS_TO_IMPROVE_MODEL):
+        # there's a subtle tradeoff here.
+        # we're submitting images from the edge which will get escalated to the cloud
+        # and thus train our model. but this process is slow
+        iq_yes = _submit_cat(detector, confidence_threshold=1, wait=0)
+        gl.add_label(image_query=iq_yes, label="YES")
+        iq_no = _submit_dog(detector, confidence_threshold=1, wait=0)
+        gl.add_label(image_query=iq_no, label="NO")
+
+
+def submit_final(detector: Detector):
+    """This is called at the end of our integration tests to make sure the edge model
+    is now confident."""
+    # 0.5 threshold to ensure we get a edge answer
+    start_time = time.time()
+    iq_yes = _submit_cat(detector, confidence_threshold=0.5)
+    iq_no = _submit_dog(detector, confidence_threshold=0.5)
+    end_time = time.time()
+    print(f"Time taken to get high confidence response from edge: {end_time - start_time} seconds")
+
+    assert iq_yes.result.confidence > ACCETABLE_TRAINED_CONFIDENCE
+    assert iq_yes.result.label.value == "YES"
+    print(f"Final confidence for yes result: {iq_yes.result.confidence}")
+
+    assert iq_no.result.confidence > ACCETABLE_TRAINED_CONFIDENCE
+    assert iq_no.result.label.value == "NO"
+    print(f"Final confidence for no result: {iq_no.result.confidence}")
+
+
+def _submit_cat(detector: Detector, confidence_threshold: float, wait: int = None):
+    return _submit_dog_or_cat(
+        detector=detector, confidence_threshold=confidence_threshold, img_file="./test/integration/cat.jpg", wait=wait
+    )
+
+
+def _submit_dog(detector: Detector, confidence_threshold: float, wait: int = None):
+    return _submit_dog_or_cat(
+        detector=detector, confidence_threshold=confidence_threshold, img_file="./test/integration/dog.jpg", wait=wait
+    )
+
+
+def _submit_dog_or_cat(detector: Detector, confidence_threshold: float, img_file: str, wait: int = None):
+    image_query = gl.submit_image_query(
+        detector=detector, confidence_threshold=confidence_threshold, image=img_file, wait=wait
+    )
+
+    return image_query
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,48 @@
+# This script runs integration tests, assuming k3s and detector setup via setup_and_run_tests.sh.
+# Run all tests with: > make test-with-k3s
+# It combines Python (for image submission) and Bash (for k3s checks).
+# The test includes:
+# 1) Running pytest live tests for health, readiness, and image submission to the edge.
+# 2) Submitting an image to the edge using a cat/dog detector, 
+#   checking for low confidence, training the edge detector via cloud escalation, and 
+#   verifying model improvement in a new edge pod.
+
+# first do basic pytest integration style tests
+# we skip the async test because we're setup for edge answers
+if ! poetry run pytest -m live -k "not test_post_image_query_via_sdk_want_async"; then
+    echo "Error: pytest integration tests failed."
+    exit 1
+fi
+
+echo "Submitting initial iqs, ensuring we get low confidence at first"
+# submit initial tests that we get low confidence answers at first
+poetry run python test/integration/integration_test.py -m initial -d $DETECTOR_ID
+
+echo "Training detector in the cloud"
+# now we improve the model by submitting many iqs and labels
+poetry run python test/integration/integration_test.py -m improve_model -d $DETECTOR_ID
+
+# Give the new model time to be pulled. We're a bit generous here.
+echo "Now we sleep for $((3 * REFRESH_RATE)) seconds to get a newer model" 
+sleep $((3 * REFRESH_RATE))
+echo "Ensuring a new pod for the deployment $DETECTOR_ID_WITH_DASHES has been created in the last $REFRESH_RATE seconds..."
+
+# Ensure our most recent pod is brand new.
+most_recent_pod=$(kubectl get pods -n $DEPLOYMENT_NAMESPACE -l app=inference-server -o jsonpath='{.items[-1].metadata.name}')
+current_time=$(date +%s)
+pod_creation_time=$(kubectl get pod $most_recent_pod -n $DEPLOYMENT_NAMESPACE -o jsonpath='{.metadata.creationTimestamp}')
+pod_creation_time_seconds=$(date -d "$pod_creation_time" +%s)
+time_difference=$((current_time - pod_creation_time_seconds))
+
+
+# Check if the pod was created within 1.1 times the refresh rate
+if [ $(echo "$time_difference <= $REFRESH_RATE * 3" | bc) -eq 1 ]; then
+    echo "A new pod for the deployment $DETECTOR_ID_WITH_DASHES has been created within 3 times the refresh rate."
+else
+    echo "Error: No new pod for the deployment $DETECTOR_ID_WITH_DASHES has been created within 3 times the refresh rate."
+    exit 1
+fi
+
+echo now we check if the edge model performs well...
+poetry run python test/integration/integration_test.py -m final -d $DETECTOR_ID
+echo All tests pass :D
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+# This script will setup the k3s testing environment. Once you've run them you can run the
+# live tests, which will hit the API service that got setup
+# Altogether, you can run everything with:
+# > make test-with-k3s
+
+if [ -z "$GROUNDLIGHT_API_TOKEN" ]; then
+    echo "Error: GROUNDLIGHT_API_TOKEN environment variable is not set."
+    exit 1
+fi
+
+if ! command -v k3s &> /dev/null
+then
+    echo "Error: you must have k3s setup"
+    exit 1
+
+fi
+
+# First create a detector to use for testing:
+export DETECTOR_ID=$(poetry run python test/integration/integration_test.py --mode create_detector)
+echo "created detector with id: $DETECTOR_ID"
+
+# set some other environment variables
+export PERSISTENT_VOLUME_NAME="test-with-k3s-pv"
+export EDGE_ENDPOINT_PORT="30107"
+export INFERENCE_FLAVOR="CPU"
+export LIVE_TEST_ENDPOINT="http://localhost:$EDGE_ENDPOINT_PORT"
+export REFRESH_RATE=60 # not actually different than the default, but we may want to tweak this
+
+# update the config for this detector, such that we always take edge answers
+# but first, save the template to a temporary file
+cp configs/edge-config.yaml configs/edge-config.yaml.tmp
+sed -i "s/detector_id: \"\"/detector_id: \"$DETECTOR_ID\"/" configs/edge-config.yaml
+sed -i "s/refresh_rate: 60/refresh_rate: $REFRESH_RATE/" configs/edge-config.yaml
+
+# # now we should delete the persistent volume before, in case it's in a bad state
+if kubectl get pv "$PERSISTENT_VOLUME_NAME" &> /dev/null; then
+    echo "Persistent volume $PERSISTENT_VOLUME_NAME exists. Deleting it..."
+    kubectl delete pv "$PERSISTENT_VOLUME_NAME" &
+    echo "Persistent volume $PERSISTENT_VOLUME_NAME deleted."
+else
+    echo "Persistent volume $PERSISTENT_VOLUME_NAME does not exist. No action needed."
+fi
+
+
+export DEPLOYMENT_NAMESPACE="test-with-k3s"
+if ! kubectl get namespace $DEPLOYMENT_NAMESPACE &> /dev/null; then
+    kubectl create namespace $DEPLOYMENT_NAMESPACE
+fi
+
+
+# Build the Docker image and import it into k3s
+echo "Building the Docker image..."
+export IMAGE_TAG=$(./deploy/bin/git-tag-name.sh)
+./deploy/bin/build-push-edge-endpoint-image.sh dev
+./deploy/bin/setup-ee.sh
+# restore config file
+mv configs/edge-config.yaml.tmp configs/edge-config.yaml
+
+echo "Waiting for edge-endpoint pods to rollout..."
+
+if ! kubectl rollout status deployment/edge-endpoint -n $DEPLOYMENT_NAMESPACE --timeout=5m; then
+    echo "Error: edge-endpoint pods failed to rollout within the timeout period."
+    exit 1
+fi
+
+echo "Edge-endpoint pods have successfully rolled out."
+
+echo "Waiting for the inference deployment to rollout (inferencemodel-$DETECTOR_ID)..."
+
+export DETECTOR_ID_WITH_DASHES=$(echo ${DETECTOR_ID//_/-} | tr '[:upper:]' '[:lower:]')
+if ! kubectl rollout status deployment/inferencemodel-$DETECTOR_ID_WITH_DASHES -n $DEPLOYMENT_NAMESPACE --timeout=5m; then
+    echo "Error: inference deployment for detector $DETECTOR_ID_WITH_DASHES failed to rollout within the timeout period."
+    exit 1
+fi
+echo "Inference deployment for detector $DETECTOR_ID has successfully rolled out."
+
+
+./test/integration/run_tests.sh
+
@@ -2,6 +2,8 @@
 # basic script to validate that setup_ee works as expected
 export DEPLOYMENT_NAMESPACE="validate-setup-ee"
 export INFERENCE_FLAVOR="CPU"
+export DEPLOY_LOCAL_VERSION="1"
+export EDGE_ENDPOINT_PORT="30107"
 
 kubectl create namespace $DEPLOYMENT_NAMESPACE
 ./deploy/bin/setup-ee.sh