Skip to content

CICD: Runs a full GPU install on an EC2 instance #1403

CICD: Runs a full GPU install on an EC2 instance

CICD: Runs a full GPU install on an EC2 instance #1403

Workflow file for this run

name: cicd
on:
pull_request:
branches:
- main
types: [opened, synchronize, reopened]
workflow_dispatch:
# This allows it to be triggered manually in the github console
# You could put inputs here, but we don't need them.
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
# This causes it to cancel previous in-progress actions in the same PR
cancel-in-progress: true
env:
PYTHON_VERSION: "3.11"
POETRY_VERSION: "1.8.3"
# This is the token associated with "prod-biggies" (with shared credentials on 1password)
GROUNDLIGHT_API_TOKEN: ${{ secrets.GROUNDLIGHT_API_TOKEN }}
# This is the NGINX proxy endpoint
GROUNDLIGHT_ENDPOINT: http://localhost:30101
jobs:
test-general-edge-endpoint:
runs-on: ubuntu-22.04
steps:
- name: Check out code
uses: actions/checkout@v3
- name: Set up python
id: setup_python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install poetry
uses: snok/install-poetry@v1
with:
version: ${{ env.POETRY_VERSION }}
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Load Cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('**/poetry.lock') }}
- name: Install edge-endpoint's python dependencies
run: |
poetry install --no-interaction --no-root
- name: Run Unit Tests (that dont require docker)
run: |
# Point these tests to the cloud endpoint, because to test the edge-endpoint, they
# dont actually issue requests. They interally setup a test client and server.
# The cloud endpoint is needed so that the /me endpoint succeeds and we can actually
# use the GL client.
GROUNDLIGHT_ENDPOINT="https://api.groundlight.ai/"
source test/setup_plain_test_env.sh
poetry run pytest -vs -k "not _live"
- name: Install Docker
run: |
sudo apt-get update
sudo apt-get remove moby-runc
sudo apt-get install apt-transport-https ca-certificates curl software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
sudo apt-get update
sudo apt-get install docker-ce
- name: Build Docker Image
run: docker build --tag groundlight-edge .
- name: Start Docker Container
id: start_container
run: |
source test/setup_plain_test_env.sh
echo "EDGE_CONFIG=$EDGE_CONFIG"
container_id=$(docker run \
-e LOG_LEVEL=DEBUG \
-e EDGE_CONFIG \
-d -p 30101:30101 \
groundlight-edge)
echo "::set-output name=container_id::$container_id"
- name: Run Unit Tests (that do require docker)
run: |
GROUNDLIGHT_ENDPOINT=http://localhost:30101
source test/setup_plain_test_env.sh
poetry run pytest -k "_live"
- name: Dump Logs from Docker Container
if: always()
run: docker logs ${{ steps.start_container.outputs.container_id }}
- name: Stop Docker Container
# This ensures that we always stop the container regardless of the outcomes of
# the previous steps
if: always()
run: docker stop ${{ steps.start_container.outputs.container_id }}
validate-setup-ee:
runs-on: ubuntu-22.04
steps:
- name: Check out code
uses: actions/checkout@v3
- name: Install k3s
run: |
./deploy/bin/install-k3s.sh
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_GL_PUBLIC_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_GL_PUBLIC_SECRET_ACCESS_KEY }}
aws-region: us-west-2
- name: Install poetry
uses: snok/install-poetry@v1
with:
version: ${{ env.POETRY_VERSION }}
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Install edge-endpoint's python dependencies
run: |
poetry install --no-interaction --no-root
- name: Validate setup edge endpoint
run: |
make validate-setup-ee
# we run this seperately from validate-setup-ee since we run out of disk space doing
# both of them on the same runner and they can be slow so its best to do them in parallel
test-with-k3s:
runs-on: ubuntu-22.04
steps:
- name: Check out code
uses: actions/checkout@v3
- name: Install k3s
run: |
./deploy/bin/install-k3s.sh
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_GL_PUBLIC_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_GL_PUBLIC_SECRET_ACCESS_KEY }}
aws-region: us-west-2
- name: Install poetry
uses: snok/install-poetry@v1
with:
version: ${{ env.POETRY_VERSION }}
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Install edge-endpoint's python dependencies
run: |
poetry install --no-interaction --no-root
- name: Run tests with k3s
run: |
make test-with-k3s
# Run Groundlight SDK tests against the edge proxy endpoint
test-sdk:
runs-on: ubuntu-22.04
steps:
- name: Check out code
uses: actions/checkout@v3
- name: Set up python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Docker
run: |
sudo apt-get update
sudo apt-get remove moby-runc
sudo apt-get install apt-transport-https ca-certificates curl software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
sudo apt-get update
sudo apt-get install docker-ce
- name: Build Docker Image
run: docker build --tag groundlight-edge .
- name: Start Docker Container
id: start_container
run: |
source test/setup_plain_test_env.sh
echo "EDGE_CONFIG=$EDGE_CONFIG"
container_id=$(docker run \
-e LOG_LEVEL=DEBUG \
-e EDGE_CONFIG \
-d -p 30101:30101 \
groundlight-edge)
echo "::set-output name=container_id::$container_id"
- name: Install poetry
uses: snok/install-poetry@v1
with:
version: ${{ env.POETRY_VERSION }}
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Load Cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('**/poetry.lock') }}
# Note that we're pulling the latest main from the SDK repo
# This might be ahead of what's published to pypi, but it's useful to test things before they're released.
- name: Checkout Groundlight SDK
uses: actions/checkout@v3
with:
repository: groundlight/python-sdk
path: groundlight-sdk
- name: Install Groundlight SDK dependencies
run: |
cd groundlight-sdk
make install
- name: Run Groundlight SDK tests against Prod API via Edge Proxy Endpoint
run: |
cd groundlight-sdk
make test-4edge
cd ..
- name: Dump Logs from Docker Container
if: always()
run: docker logs ${{ steps.start_container.outputs.container_id }}
- name: Stop Docker Container
# This ensures that we always stop the container regardless of the outcomes of
# the previous steps
if: always()
run: docker stop ${{ steps.start_container.outputs.container_id }}
G4-end-to-end:
# Note this job can run multiple times in parallel because the stack name is unique
# for the run. How much we want to do this is TBD.
runs-on: self-hosted
# Run this on any PR.
# Question: Should we wait until the other tests pass before running this?
#needs:
# - validate-setup-ee
# - test-with-k3s
# - test-sdk
env:
PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_CICD_PAT }}
PYTHONUNBUFFERED: 1
defaults:
run:
working-directory: cicd/pulumi
steps:
- name: Check out code
uses: actions/checkout@v3
- name: Name the stack
run: |
# Set to expire in 60 minutes
EXPIRATION_TIME=$(($(date +%s) + 60 * 60))
STACK_NAME=ee-cicd-${{ github.run_id }}-expires-${EXPIRATION_TIME}
echo "STACK_NAME=${STACK_NAME}" | tee -a $GITHUB_ENV
# We give the stack a name including its expiration time so that the sweeper
# (in sweeper-eeut.yaml) knows when to get rid of it.
# This saves us having to clean up here, which can be quite slow (~7 minutes for a g4)
- name: Check that aws credentials are set
# Credentials come from an IAM profile on the runner instance
run: |
aws sts get-caller-identity
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Pulumi
run: |
curl -fsSL https://get.pulumi.com | sh
export HOME=$(eval echo ~$(whoami))
echo "$HOME/.pulumi/bin" >> $GITHUB_PATH
- name: Install uv
uses: astral-sh/setup-uv@v5
- name: Make sure uv is working
run: |
uv --version
uv sync
uv run python --version
- name: Check that pulumi is installed and authenticated
run: |
uv run pulumi whoami
- name: Prepare pulumi stack
run: |
uv run pulumi stack init ${STACK_NAME}
uv run pulumi config
- name: Pick which commit we will test
run: |
echo "This is a bit subtle."
echo "We can't just test on 'main' for fairly obvious reasons - we"
echo "want to test the code in this PR's branch. The current commit"
echo "right here is ${GITHUB_SHA}, which is likely a merge commit."
echo "Merge commits are challenging. They are what would happen if"
echo "this PR were to be merged into its base branch. But they are"
echo "ephemeral things and not available in the public repo. So the"
echo "EEUT can't just check them out. Making them available to the"
echo "EEUT would require pushing them and polluting the repo. So,"
echo "for now, we are going to use the PR's head ref"
echo "${{ github.event.pull_request.head.ref }}, which is the commit"
echo "that was used to create the PR. Recognizing that this doesn't"
echo "reflect what will happen after merge. But it's simpler."
# TODO: test on the merge commit by pushing it to the repo with a temporary
# branch, and then clean up the branch later.
COMMIT_TO_TEST=${{ github.event.pull_request.head.ref }}
uv run pulumi config set ee-cicd:targetCommit ${COMMIT_TO_TEST}
- name: Create the EEUT instance
run: |
uv run pulumi up --yes
- name: Check that EE install succeeded
run: |
uv run fab connect --patience=150
uv run fab wait-for-ee-setup
- name: Wait for K8 to load everything
run: |
uv run fab check-k8-deployments
uv run fab check-server-port
- name: Use groundlight sdk through EE
run: |
EEUT_IP=$(uv run pulumi stack output eeut_private_ip)
export GROUNDLIGHT_ENDPOINT=http://${EEUT_IP}:30101
uv run groundlight whoami
uv run groundlight list-detectors
- name: Thank the worker and shut down
if: always()
run: |
echo "Strong work, G4! Now go to sleep. The grim sweeper will visit soon."
# This saves money and frees up resources
uv run fab shutdown-instance
build-push-edge-endpoint-multiplatform:
if: ${{ github.ref == 'refs/heads/main' || github.event_name == 'workflow_dispatch' }}
# We only run this action if all the prior test actions succeed
needs:
- test-general-edge-endpoint
- test-sdk
- validate-setup-ee
- G4-end-to-end
runs-on: ubuntu-22.04
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_GL_PUBLIC_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_GL_PUBLIC_SECRET_ACCESS_KEY }}
aws-region: us-west-2
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v1
with:
mask-password: "true"
- name: Check out code
uses: actions/checkout@v4
- name: Build and Push Multiplatform edge-endpoint Image to ECR
timeout-minutes: 45
run: ./deploy/bin/build-push-edge-endpoint-image.sh
update-glhub:
if: github.ref == 'refs/heads/main'
needs: validate-setup-ee
runs-on: ubuntu-latest
environment: live
steps:
- name: Checkout glhub
uses: actions/checkout@v4
with:
repository: groundlight/glhub
token: ${{ secrets.BOT_GITHUB_TOKEN }}
path: glhub
- name: Update GLHub
run: |
cd glhub
git config --global user.email "[email protected]"
git config --global user.name "edge-glhub-bot"
git submodule update --init --recursive
git submodule update --remote edge-endpoint
git add .
git commit -m "Update edge endpoint submodule"
git push https://edge-glhub-bot:${{ secrets.BOT_GITHUB_TOKEN }}@github.com/groundlight/glhub.git main
env:
GIT_AUTHOR_NAME: "edge-glhub-bot"
GIT_AUTHOR_EMAIL: "[email protected]"