Split build workflow for multiple plateforms #1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build and push ROCm docker image to internal registry | ||
on: | ||
workflow_dispatch: | ||
push: | ||
branches: | ||
- 'main' | ||
tags: | ||
- 'v*' | ||
pull_request: | ||
paths: | ||
- ".github/workflows/build.yaml" | ||
- "integration-tests/**" | ||
- "server/**" | ||
- "proto/**" | ||
- "router/**" | ||
- "launcher/**" | ||
- "Cargo.lock" | ||
- "rust-toolchain.toml" | ||
- "Dockerfile" | ||
branches: | ||
- 'main' | ||
jobs: | ||
start-runner: | ||
name: Start self-hosted EC2 runner | ||
runs-on: ubuntu-latest | ||
env: | ||
AWS_REGION: us-east-1 | ||
EC2_AMI_ID: ami-0789b6925c11b1fb2 | ||
EC2_INSTANCE_TYPE: g5.12xlarge | ||
EC2_SUBNET_ID: subnet-931b34f5,subnet-ecb993cd,subnet-943dc2d8,subnet-45371f1a,subnet-ee93e0df,subnet-fddc3dfc | ||
EC2_SECURITY_GROUP: sg-030175c435ac141d6 | ||
outputs: | ||
label: ${{ steps.start-ec2-runner.outputs.label }} | ||
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} | ||
steps: | ||
- name: Configure AWS credentials | ||
uses: aws-actions/configure-aws-credentials@v1 | ||
with: | ||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||
aws-region: ${{ env.AWS_REGION }} | ||
- name: Start EC2 runner | ||
id: start-ec2-runner | ||
uses: philschmid/philschmid-ec2-github-runner@main | ||
with: | ||
mode: start | ||
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | ||
ec2-image-id: ${{ env.EC2_AMI_ID }} | ||
ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} | ||
subnet-id: ${{ env.EC2_SUBNET_ID }} | ||
security-group-id: ${{ env.EC2_SECURITY_GROUP }} | ||
aws-resource-tags: > # optional, requires additional permissions | ||
[ | ||
{"Key": "Name", "Value": "ec2-tgi-github-runner"}, | ||
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"} | ||
] | ||
build-and-push-image-rocm: | ||
concurrency: | ||
group: ${{ github.workflow }}-build-and-push-image-rocm-${{ github.head_ref || github.run_id }} | ||
cancel-in-progress: true | ||
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner | ||
permissions: | ||
contents: write | ||
packages: write | ||
# This is used to complete the identity challenge | ||
# with sigstore/fulcio when running outside of PRs. | ||
id-token: write | ||
security-events: write | ||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v3 | ||
- name: Initialize Docker Buildx | ||
uses: docker/[email protected] | ||
with: | ||
install: true | ||
- name: Inject slug/short variables | ||
uses: rlespinasse/[email protected] | ||
- name: Tailscale | ||
uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966 | ||
with: | ||
authkey: ${{ secrets.TAILSCALE_AUTHKEY }} | ||
- name: Login to GitHub Container Registry | ||
if: github.event_name != 'pull_request' | ||
uses: docker/login-action@v2 | ||
with: | ||
registry: ghcr.io | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
- name: Login to internal Container Registry | ||
uses: docker/[email protected] | ||
with: | ||
username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }} | ||
password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }} | ||
registry: registry.internal.huggingface.tech | ||
- name: Login to Azure Container Registry | ||
if: github.event_name != 'pull_request' | ||
uses: docker/[email protected] | ||
with: | ||
username: ${{ secrets.AZURE_DOCKER_USERNAME }} | ||
password: ${{ secrets.AZURE_DOCKER_PASSWORD }} | ||
registry: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io | ||
# If pull request | ||
- name: Extract metadata (tags, labels) for Docker | ||
if: ${{ github.event_name == 'pull_request' }} | ||
id: meta-pr | ||
uses: docker/[email protected] | ||
with: | ||
images: | | ||
registry.internal.huggingface.tech/api-inference/community/text-generation-inference | ||
tags: | | ||
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-rocm | ||
# If main, release or tag | ||
- name: Extract metadata (tags, labels) for Docker | ||
if: ${{ github.event_name != 'pull_request' }} | ||
id: meta | ||
uses: docker/[email protected] | ||
with: | ||
flavor: | | ||
latest=false | ||
images: | | ||
registry.internal.huggingface.tech/api-inference/community/text-generation-inference | ||
ghcr.io/huggingface/text-generation-inference | ||
db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference | ||
tags: | | ||
type=semver,pattern={{version}}-rocm | ||
type=semver,pattern={{major}}.{{minor}}-rocm | ||
type=raw,value=latest-rocm,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} | ||
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-rocm | ||
- name: Build and push Docker image | ||
id: build-and-push | ||
uses: docker/build-push-action@v4 | ||
with: | ||
context: . | ||
file: Dockerfile_amd | ||
push: true | ||
platforms: 'linux/amd64' | ||
build-args: | | ||
GIT_SHA=${{ env.GITHUB_SHA }} | ||
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}-rocm | ||
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }} | ||
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }} | ||
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min | ||
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min | ||
integration-tests-rocm: | ||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} | ||
cancel-in-progress: true | ||
needs: | ||
- start-runner | ||
- build-and-push-image-rocm | ||
runs-on: [self-hosted, amd-gpu, multi-gpu, mi300] | ||
container: | ||
image: registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ needs.build-and-push-image-rocm.outputs.short_sha }}-rocm | ||
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/cache | ||
env: | ||
DOCKER_VOLUME: /cache | ||
steps: | ||
- name: ROCM-SMI | ||
run: | | ||
rocm-smi | ||
- name: ROCM-INFO | ||
run: | | ||
rocminfo | grep "Agent" -A 14 | ||
- name: Show ROCR environment | ||
run: | | ||
echo "ROCR: $ROCR_VISIBLE_DEVICES" | ||
- name: Install | ||
run: | | ||
make install-integration-tests | ||
- name: Run tests | ||
run: | | ||
export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} | ||
pytest -s -vv integration-tests | ||
stop-runner: | ||
name: Stop self-hosted EC2 runner | ||
needs: | ||
- start-runner | ||
- build-and-push-image | ||
Check failure on line 183 in .github/workflows/build_rocm.yml GitHub Actions / Build and push ROCm docker image to internal registryInvalid workflow file
|
||
- build-and-push-image-rocm | ||
- build-and-push-image-intel | ||
- integration-tests | ||
runs-on: ubuntu-latest | ||
env: | ||
AWS_REGION: us-east-1 | ||
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs | ||
steps: | ||
- name: Configure AWS credentials | ||
uses: aws-actions/configure-aws-credentials@v1 | ||
with: | ||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||
aws-region: ${{ env.AWS_REGION }} | ||
- name: Stop EC2 runner | ||
uses: philschmid/philschmid-ec2-github-runner@main | ||
with: | ||
mode: stop | ||
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | ||
label: ${{ needs.start-runner.outputs.label }} | ||
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} |