CICD: Runs a full GPU install on an EC2 instance #1403
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: cicd | |
on: | |
pull_request: | |
branches: | |
- main | |
types: [opened, synchronize, reopened] | |
workflow_dispatch: | |
# This allows it to be triggered manually in the github console | |
# You could put inputs here, but we don't need them. | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | |
# This causes it to cancel previous in-progress actions in the same PR | |
cancel-in-progress: true | |
env: | |
PYTHON_VERSION: "3.11" | |
POETRY_VERSION: "1.8.3" | |
# This is the token associated with "prod-biggies" (with shared credentials on 1password) | |
GROUNDLIGHT_API_TOKEN: ${{ secrets.GROUNDLIGHT_API_TOKEN }} | |
# This is the NGINX proxy endpoint | |
GROUNDLIGHT_ENDPOINT: http://localhost:30101 | |
jobs: | |
test-general-edge-endpoint: | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v3 | |
- name: Set up python | |
id: setup_python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install poetry | |
uses: snok/install-poetry@v1 | |
with: | |
version: ${{ env.POETRY_VERSION }} | |
virtualenvs-create: true | |
virtualenvs-in-project: true | |
installer-parallel: true | |
- name: Load Cached venv | |
id: cached-poetry-dependencies | |
uses: actions/cache@v3 | |
with: | |
path: .venv | |
key: venv-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('**/poetry.lock') }} | |
- name: Install edge-endpoint's python dependencies | |
run: | | |
poetry install --no-interaction --no-root | |
- name: Run Unit Tests (that dont require docker) | |
run: | | |
# Point these tests to the cloud endpoint, because to test the edge-endpoint, they | |
# dont actually issue requests. They interally setup a test client and server. | |
# The cloud endpoint is needed so that the /me endpoint succeeds and we can actually | |
# use the GL client. | |
GROUNDLIGHT_ENDPOINT="https://api.groundlight.ai/" | |
source test/setup_plain_test_env.sh | |
poetry run pytest -vs -k "not _live" | |
- name: Install Docker | |
run: | | |
sudo apt-get update | |
sudo apt-get remove moby-runc | |
sudo apt-get install apt-transport-https ca-certificates curl software-properties-common | |
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - | |
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | |
sudo apt-get update | |
sudo apt-get install docker-ce | |
- name: Build Docker Image | |
run: docker build --tag groundlight-edge . | |
- name: Start Docker Container | |
id: start_container | |
run: | | |
source test/setup_plain_test_env.sh | |
echo "EDGE_CONFIG=$EDGE_CONFIG" | |
container_id=$(docker run \ | |
-e LOG_LEVEL=DEBUG \ | |
-e EDGE_CONFIG \ | |
-d -p 30101:30101 \ | |
groundlight-edge) | |
echo "::set-output name=container_id::$container_id" | |
- name: Run Unit Tests (that do require docker) | |
run: | | |
GROUNDLIGHT_ENDPOINT=http://localhost:30101 | |
source test/setup_plain_test_env.sh | |
poetry run pytest -k "_live" | |
- name: Dump Logs from Docker Container | |
if: always() | |
run: docker logs ${{ steps.start_container.outputs.container_id }} | |
- name: Stop Docker Container | |
# This ensures that we always stop the container regardless of the outcomes of | |
# the previous steps | |
if: always() | |
run: docker stop ${{ steps.start_container.outputs.container_id }} | |
validate-setup-ee: | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v3 | |
- name: Install k3s | |
run: | | |
./deploy/bin/install-k3s.sh | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_GL_PUBLIC_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_GL_PUBLIC_SECRET_ACCESS_KEY }} | |
aws-region: us-west-2 | |
- name: Install poetry | |
uses: snok/install-poetry@v1 | |
with: | |
version: ${{ env.POETRY_VERSION }} | |
virtualenvs-create: true | |
virtualenvs-in-project: true | |
installer-parallel: true | |
- name: Install edge-endpoint's python dependencies | |
run: | | |
poetry install --no-interaction --no-root | |
- name: Validate setup edge endpoint | |
run: | | |
make validate-setup-ee | |
# we run this seperately from validate-setup-ee since we run out of disk space doing | |
# both of them on the same runner and they can be slow so its best to do them in parallel | |
test-with-k3s: | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v3 | |
- name: Install k3s | |
run: | | |
./deploy/bin/install-k3s.sh | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_GL_PUBLIC_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_GL_PUBLIC_SECRET_ACCESS_KEY }} | |
aws-region: us-west-2 | |
- name: Install poetry | |
uses: snok/install-poetry@v1 | |
with: | |
version: ${{ env.POETRY_VERSION }} | |
virtualenvs-create: true | |
virtualenvs-in-project: true | |
installer-parallel: true | |
- name: Install edge-endpoint's python dependencies | |
run: | | |
poetry install --no-interaction --no-root | |
- name: Run tests with k3s | |
run: | | |
make test-with-k3s | |
# Run Groundlight SDK tests against the edge proxy endpoint | |
test-sdk: | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v3 | |
- name: Set up python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install Docker | |
run: | | |
sudo apt-get update | |
sudo apt-get remove moby-runc | |
sudo apt-get install apt-transport-https ca-certificates curl software-properties-common | |
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - | |
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | |
sudo apt-get update | |
sudo apt-get install docker-ce | |
- name: Build Docker Image | |
run: docker build --tag groundlight-edge . | |
- name: Start Docker Container | |
id: start_container | |
run: | | |
source test/setup_plain_test_env.sh | |
echo "EDGE_CONFIG=$EDGE_CONFIG" | |
container_id=$(docker run \ | |
-e LOG_LEVEL=DEBUG \ | |
-e EDGE_CONFIG \ | |
-d -p 30101:30101 \ | |
groundlight-edge) | |
echo "::set-output name=container_id::$container_id" | |
- name: Install poetry | |
uses: snok/install-poetry@v1 | |
with: | |
version: ${{ env.POETRY_VERSION }} | |
virtualenvs-create: true | |
virtualenvs-in-project: true | |
installer-parallel: true | |
- name: Load Cached venv | |
id: cached-poetry-dependencies | |
uses: actions/cache@v3 | |
with: | |
path: .venv | |
key: venv-${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('**/poetry.lock') }} | |
# Note that we're pulling the latest main from the SDK repo | |
# This might be ahead of what's published to pypi, but it's useful to test things before they're released. | |
- name: Checkout Groundlight SDK | |
uses: actions/checkout@v3 | |
with: | |
repository: groundlight/python-sdk | |
path: groundlight-sdk | |
- name: Install Groundlight SDK dependencies | |
run: | | |
cd groundlight-sdk | |
make install | |
- name: Run Groundlight SDK tests against Prod API via Edge Proxy Endpoint | |
run: | | |
cd groundlight-sdk | |
make test-4edge | |
cd .. | |
- name: Dump Logs from Docker Container | |
if: always() | |
run: docker logs ${{ steps.start_container.outputs.container_id }} | |
- name: Stop Docker Container | |
# This ensures that we always stop the container regardless of the outcomes of | |
# the previous steps | |
if: always() | |
run: docker stop ${{ steps.start_container.outputs.container_id }} | |
G4-end-to-end: | |
# Note this job can run multiple times in parallel because the stack name is unique | |
# for the run. How much we want to do this is TBD. | |
runs-on: self-hosted | |
# Run this on any PR. | |
# Question: Should we wait until the other tests pass before running this? | |
#needs: | |
# - validate-setup-ee | |
# - test-with-k3s | |
# - test-sdk | |
env: | |
PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_CICD_PAT }} | |
PYTHONUNBUFFERED: 1 | |
defaults: | |
run: | |
working-directory: cicd/pulumi | |
steps: | |
- name: Check out code | |
uses: actions/checkout@v3 | |
- name: Name the stack | |
run: | | |
# Set to expire in 60 minutes | |
EXPIRATION_TIME=$(($(date +%s) + 60 * 60)) | |
STACK_NAME=ee-cicd-${{ github.run_id }}-expires-${EXPIRATION_TIME} | |
echo "STACK_NAME=${STACK_NAME}" | tee -a $GITHUB_ENV | |
# We give the stack a name including its expiration time so that the sweeper | |
# (in sweeper-eeut.yaml) knows when to get rid of it. | |
# This saves us having to clean up here, which can be quite slow (~7 minutes for a g4) | |
- name: Check that aws credentials are set | |
# Credentials come from an IAM profile on the runner instance | |
run: | | |
aws sts get-caller-identity | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install Pulumi | |
run: | | |
curl -fsSL https://get.pulumi.com | sh | |
export HOME=$(eval echo ~$(whoami)) | |
echo "$HOME/.pulumi/bin" >> $GITHUB_PATH | |
- name: Install uv | |
uses: astral-sh/setup-uv@v5 | |
- name: Make sure uv is working | |
run: | | |
uv --version | |
uv sync | |
uv run python --version | |
- name: Check that pulumi is installed and authenticated | |
run: | | |
uv run pulumi whoami | |
- name: Prepare pulumi stack | |
run: | | |
uv run pulumi stack init ${STACK_NAME} | |
uv run pulumi config | |
- name: Pick which commit we will test | |
run: | | |
echo "This is a bit subtle." | |
echo "We can't just test on 'main' for fairly obvious reasons - we" | |
echo "want to test the code in this PR's branch. The current commit" | |
echo "right here is ${GITHUB_SHA}, which is likely a merge commit." | |
echo "Merge commits are challenging. They are what would happen if" | |
echo "this PR were to be merged into its base branch. But they are" | |
echo "ephemeral things and not available in the public repo. So the" | |
echo "EEUT can't just check them out. Making them available to the" | |
echo "EEUT would require pushing them and polluting the repo. So," | |
echo "for now, we are going to use the PR's head ref" | |
echo "${{ github.event.pull_request.head.ref }}, which is the commit" | |
echo "that was used to create the PR. Recognizing that this doesn't" | |
echo "reflect what will happen after merge. But it's simpler." | |
# TODO: test on the merge commit by pushing it to the repo with a temporary | |
# branch, and then clean up the branch later. | |
COMMIT_TO_TEST=${{ github.event.pull_request.head.ref }} | |
uv run pulumi config set ee-cicd:targetCommit ${COMMIT_TO_TEST} | |
- name: Create the EEUT instance | |
run: | | |
uv run pulumi up --yes | |
- name: Check that EE install succeeded | |
run: | | |
uv run fab connect --patience=150 | |
uv run fab wait-for-ee-setup | |
- name: Wait for K8 to load everything | |
run: | | |
uv run fab check-k8-deployments | |
uv run fab check-server-port | |
- name: Use groundlight sdk through EE | |
run: | | |
EEUT_IP=$(uv run pulumi stack output eeut_private_ip) | |
export GROUNDLIGHT_ENDPOINT=http://${EEUT_IP}:30101 | |
uv run groundlight whoami | |
uv run groundlight list-detectors | |
- name: Thank the worker and shut down | |
if: always() | |
run: | | |
echo "Strong work, G4! Now go to sleep. The grim sweeper will visit soon." | |
# This saves money and frees up resources | |
uv run fab shutdown-instance | |
build-push-edge-endpoint-multiplatform: | |
if: ${{ github.ref == 'refs/heads/main' || github.event_name == 'workflow_dispatch' }} | |
# We only run this action if all the prior test actions succeed | |
needs: | |
- test-general-edge-endpoint | |
- test-sdk | |
- validate-setup-ee | |
- G4-end-to-end | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_GL_PUBLIC_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_GL_PUBLIC_SECRET_ACCESS_KEY }} | |
aws-region: us-west-2 | |
- name: Login to Amazon ECR | |
id: login-ecr | |
uses: aws-actions/amazon-ecr-login@v1 | |
with: | |
mask-password: "true" | |
- name: Check out code | |
uses: actions/checkout@v4 | |
- name: Build and Push Multiplatform edge-endpoint Image to ECR | |
timeout-minutes: 45 | |
run: ./deploy/bin/build-push-edge-endpoint-image.sh | |
update-glhub: | |
if: github.ref == 'refs/heads/main' | |
needs: validate-setup-ee | |
runs-on: ubuntu-latest | |
environment: live | |
steps: | |
- name: Checkout glhub | |
uses: actions/checkout@v4 | |
with: | |
repository: groundlight/glhub | |
token: ${{ secrets.BOT_GITHUB_TOKEN }} | |
path: glhub | |
- name: Update GLHub | |
run: | | |
cd glhub | |
git config --global user.email "[email protected]" | |
git config --global user.name "edge-glhub-bot" | |
git submodule update --init --recursive | |
git submodule update --remote edge-endpoint | |
git add . | |
git commit -m "Update edge endpoint submodule" | |
git push https://edge-glhub-bot:${{ secrets.BOT_GITHUB_TOKEN }}@github.com/groundlight/glhub.git main | |
env: | |
GIT_AUTHOR_NAME: "edge-glhub-bot" | |
GIT_AUTHOR_EMAIL: "[email protected]" |