Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor + add unit and smoke testing workflows. #412

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions .github/workflows/reusable-start-ec2-runner.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# SPDX-License-Identifier: Apache-2.0
name: "[Reusable] Start EC2 self-hosted runner."

on:
workflow_call:
inputs:
aws_region:
description: "AWS datacenter identification; e.g. `us-west-2`"
type: string
required: true
aws_ami:
description: "AWS EC2 instance AMI ID"
type: string
required: true
aws_ec2_runner_variant:
description: "AWS EC2 instance type; e.g. `m7i.xlarge`"
type: string
required: true
secrets:
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true
GITHUB_PERSONAL_ACCESS_TOKEN:
required: true
outputs:
runner_label:
value: ${{jobs.start-ec2-runner.outputs.label}}
runner_instance_id:
value: ${{jobs.start-ec2-runner.outputs.ec2-instance-id}}

# (jkunstle) NOTES:
# 1. This workflow's permissions don't seem relevant since it's reusable
# concurrency protections seem irrelevant because it could be reused by other
# 2. workflows that need to do so.
# 3. not sure if I need to edit the aws-resource-tags if this is a reusable workflow.
# that might be non-generic information that this workflow should take as input.

jobs:
start-ec2-runner:
runs-on: ubuntu-latest
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id}}

steps:
- name: "Harden runner"
uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.1
with:
egress-policy: audit

- name: "Configure AWS credentials"
uses: "aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502" # v4.0.2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID}}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY}}
aws-region: ${{ inputs.aws_region }}

- name: "Start EC2 runner"
id: start-ec2-runner
uses: machulav/ec2-github-runner@28fbe1c4d7d9ba74134ca5ebc559d5b0a989a856 # v2.3.8
with:
mode: start
github-token: ${{ secrets.GITHUB_PERSONAL_ACCESS_TOKEN}}
ec2-image-id: ${{ inputs.aws_ami }}
ec2-instance-type: ${{ inputs.aws_ec2_runner_variant}}
subnet-id: subnet-024298cefa3bedd61
security-group-id: sg-06300447c4a5fbef3
iam-role-name: instructlab-ci-runner
aws-resource-tags: >
[
{"Key": "Name", "Value": "instructlab-ci-github-unittest-runner"},
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
{"Key": "GitHubRef", "Value": "${{ github.ref }}"},
{"Key": "GitHubPR", "Value": "${{ github.event.number }}"}
]
49 changes: 49 additions & 0 deletions .github/workflows/reusable-stop-ec2-runner.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# SPDX-License-Identifier: Apache-2.0
name: "[Reusable] Stop EC2 self-hosted runner."

on:
workflow_call:
inputs:
aws_region:
description: "AWS datacenter identification; e.g. `us-west-2`"
type: string
required: true
aws_ec2_runner_instance_id:
description: "AWS EC2 ID for instance that's running"
type: string
required: true
aws_ec2_runner_label:
description: "AWS EC2 instance label for instance that's running"
type: string
required: true
secrets:
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true
GITHUB_PERSONAL_ACCESS_TOKEN:
required: true

jobs:
stop-ec2-runner:
runs-on: ubuntu-latest
steps:
- name: "Harden runner"
uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.1
with:
egress-policy: audit

- name: "Configure AWS credentials"
uses: "aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502" # v4.0.2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID}}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY}}
aws-region: ${{ inputs.aws_region }}

- name: "Stop EC2 runner"
uses: machulav/ec2-github-runner@28fbe1c4d7d9ba74134ca5ebc559d5b0a989a856 # v2.3.8
with:
mode: stop
github-token: ${{ secrets.GITHUB_PERSONAL_ACCESS_TOKEN}}
label: ${{ inputs.aws_ec2_runner_label }}
ec2-instance-id: ${{ inputs.aws_ec2_runner_instance_id }}
96 changes: 96 additions & 0 deletions .github/workflows/smoke-tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# SPDX-License-Identifier: Apache-2.0

name: "Run smoke tests via Tox::pytest"
# These tests will be long running and require accelerated hardware.
# They will help to verify that the library is *functionally* correct but
# will not try to verify that the libary is *correct*.

on:
# TEMP - only runs when manually invoked
# and only runs against branches in the repo.
workflow_dispatch:
inputs:
branch:
type: string
default: main

permissions:
contents: read

defaults:
run:
shell: bash

jobs:
start-ec2-runner:
uses: ./.github/workflows/reusable-start-ec2-runner.yaml
with:
aws_region: ${{vars.AWS_REGION}}
aws_ami: ${{vars.AWS_EC2_AMI}}
aws_ec2_runner_variant: ${{vars.SMOKETEST_EC2_INSTANCE_TYPE}} # requires accelerators
secrets: inherit

run-smoke-tests:
needs:
- start-ec2-runner
runs-on: ${{needs.start-ec2-runner.outputs.runner_label}}
# It is important that this job has no write permissions and has
# no access to any secrets. This part is where we are running
# untrusted code from PRs.
permissions: {}
steps:
- name: "Harden runner"
uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.1
with:
egress-policy: audit

- name: "Install packages"
run: |
cat /etc/os-release
sudo dnf install -y gcc gcc-c++ make git python3.11 python3.11-devel

- name: "Verify cuda environment is setup"
run: |
export CUDA_HOME="/usr/local/cuda"
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
export PATH="$PATH:$CUDA_HOME/bin"
nvidia-smi

- name: "Checkout code"
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
ref: ${{inputs.branch}}

# installs in $GITHUB_WORKSPACE/venv.
# only has to install Tox because Tox will do the other virtual environment management.
- name: "Setup Python virtual environment"
run: |
python3.11 -m venv --upgrade-deps venv
. venv/bin/activate
pip install tox

- name: "Show disk utilization BEFORE tests"
run: |
df -h

- name: "Run unit tests with Tox and Pytest"
run: |
source venv/bin/activate
tox -e py3-smoke

- name: "Show disk utilization AFTER tests"
run: |
df -h

stop-ec2-runner:
needs:
- start-ec2-runner
- run-smoke-tests
if: ${{ always() }}
uses: ./.github/workflows/reusable-stop-ec2-runner.yaml
with:
aws_region: ${{vars.AWS_REGION}}
aws_ec2_runner_label: ${{needs.start-ec2-runner.outputs.runner_label}}
aws_ec2_runner_instance_id: ${{needs.start-ec2-runner.outputs.runner_instance_id}}
secrets: inherit
77 changes: 14 additions & 63 deletions .github/workflows/unit-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,53 +38,19 @@ defaults:
run:
shell: bash

env:
pytest_mark: "fast"
ec2_runner_variant: "m7i.xlarge" # 4 Xeon CPU, 16GB RAM

jobs:
start-ec2-runner:
runs-on: ubuntu-latest
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id}}

steps:
- name: "Harden runner"
uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.1
with:
egress-policy: audit

- name: "Configure AWS credentials"
uses: "aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502" # v4.0.2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_REGION }}

- name: "Start EC2 runner"
id: start-ec2-runner
uses: machulav/ec2-github-runner@28fbe1c4d7d9ba74134ca5ebc559d5b0a989a856 # v2.3.8
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ${{ vars.AWS_EC2_AMI }}
ec2-instance-type: ${{ env.ec2_runner_variant }}
subnet-id: subnet-024298cefa3bedd61
security-group-id: sg-06300447c4a5fbef3
iam-role-name: instructlab-ci-runner
aws-resource-tags: >
[
{"Key": "Name", "Value": "instructlab-ci-github-unittest-runner"},
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
{"Key": "GitHubRef", "Value": "${{ github.ref }}"},
{"Key": "GitHubPR", "Value": "${{ github.event.number }}"}
]
uses: ./.github/workflows/reusable-start-ec2-runner.yaml
with:
aws_region: ${{vars.AWS_REGION}}
aws_ami: ${{vars.AWS_EC2_AMI}}
aws_ec2_runner_variant: ${{vars.UNITTEST_EC2_INSTANCE_TYPE}}
secrets: inherit

run-unit-tests:
needs:
- start-ec2-runner
runs-on: ${{needs.start-ec2-runner.outputs.label}}
runs-on: ${{needs.start-ec2-runner.outputs.runner_label}}
# It is important that this job has no write permissions and has
# no access to any secrets. This part is where we are running
# untrusted code from PRs.
Expand Down Expand Up @@ -120,7 +86,7 @@ jobs:
- name: "Run unit tests with Tox and Pytest"
run: |
source venv/bin/activate
tox -e py3-unit -- -m ${{env.pytest_mark}}
tox -e py3-unit

- name: "Show disk utilization AFTER tests"
run: |
Expand All @@ -130,25 +96,10 @@ jobs:
needs:
- start-ec2-runner
- run-unit-tests
runs-on: ubuntu-latest
if: ${{ always() }}
steps:
- name: "Harden runner"
uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.1
with:
egress-policy: audit

- name: "Configure AWS credentials"
uses: "aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502" # v4.0.2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_REGION }}

- name: "Stop EC2 runner"
uses: machulav/ec2-github-runner@28fbe1c4d7d9ba74134ca5ebc559d5b0a989a856 # v2.3.8
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-ec2-runner.outputs.ec2-instance-id }}
uses: ./.github/workflows/reusable-stop-ec2-runner.yaml
with:
aws_region: ${{vars.AWS_REGION}}
aws_ec2_runner_label: ${{needs.start-ec2-runner.outputs.runner_label}}
aws_ec2_runner_instance_id: ${{needs.start-ec2-runner.outputs.runner_instance_id}}
secrets: inherit
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,8 @@ exclude = [
]
# honor excludes by not following there through imports
follow_imports = "silent"

[tool.pytest.ini_options]
markers = [
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
]
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ ipython
ipykernel
jupyter

huggingface_hub
Loading
Loading