From 1ad9e9c70220f74933756a8c3111ef55a7771082 Mon Sep 17 00:00:00 2001 From: Jonathan Giannuzzi Date: Thu, 10 Mar 2022 19:02:51 +0100 Subject: [PATCH] Many improvements * Rename variables file so it gets used automatically * Refactor variables * `ghrunner_version` becomes `runner_version` * `source_image_project_id` is a string * `source_image_family`, `disk_size`, `drivers_url`, and `drivers_script` are removed * `nvidia_version` is added * Service account is not set on the instance anymore * NVIDIA driver version is added to the image name * `unattended-upgrades` service is disabled to avoid package upgrades during build or at runtime * `ghrunner` user has been renamed to `runner` like on GitHub infrastructure * NVIDIA drivers installer is removed from the image * Ubuntu base image, NVIDIA drivers, and GitHub Actions Runner have been updated to their latest versions * README has been updated * CI workflows have been refactored * Use the latest version of Ubuntu * Use the latest version of actions/checkout * Use built-in packer * Feed the `project` variable as a secret * Also run `lint` when a pull request is updated * Run `build` on push to main * Secrets are protected by environment `gcp` * Add VSCode dev container settings --- .devcontainer/Dockerfile | 20 ++++++++++ .devcontainer/devcontainer.json | 31 +++++++++++++++ .github/workflows/build.yaml | 36 ++++++++--------- .github/workflows/lint.yaml | 19 ++++----- .gitignore | 2 + README.md | 69 ++++++++++++++++++++------------- runner-machine-image.pkr.hcl | 33 ++++++++-------- runtime-variables.pkrvars.hcl | 9 ----- scripts/setup.sh | 36 +++++++++-------- variables.auto.pkrvars.hcl | 7 ++++ variables.pkr.hcl | 18 ++++----- 11 files changed, 170 insertions(+), 110 deletions(-) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json create mode 100644 .gitignore delete mode 100644 runtime-variables.pkrvars.hcl create mode 100644 variables.auto.pkrvars.hcl diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000..e766f31 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,20 @@ +# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.224.2/containers/ubuntu/.devcontainer/base.Dockerfile + +# [Choice] Ubuntu version (use hirsuite or bionic on local arm64/Apple Silicon): hirsute, focal, bionic +ARG VARIANT="hirsute" +FROM mcr.microsoft.com/vscode/devcontainers/base:0-${VARIANT} + +# [Optional] Uncomment this section to install additional OS packages. +# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ +# && apt-get -y install --no-install-recommends + +# Install Packer +ARG PACKER_VERSION="1.8.0" +ARG PACKER_ARCH="amd64" +RUN set -x \ + && TMPDIR=$(mktemp -d) \ + && cd $TMPDIR \ + && curl -fsSLo packer.zip https://releases.hashicorp.com/packer/${PACKER_VERSION}/packer_${PACKER_VERSION}_linux_${PACKER_ARCH}.zip \ + && unzip packer.zip \ + && install packer /usr/local/bin \ + && rm -rf $TMPDIR \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..6db6d13 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,31 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: +// https://github.com/microsoft/vscode-dev-containers/tree/v0.224.2/containers/ubuntu +{ + "name": "Ubuntu", + "build": { + "dockerfile": "Dockerfile", + // Update 'VARIANT' to pick an Ubuntu version: hirsute, focal, bionic + // Use hirsute or bionic on local arm64/Apple Silicon. + // Set 'PACKER_ARCH' to arm64 on local arm64/Apple Silicon. + "args": { + "VARIANT": "hirsute", + "PACKER_ARCH": "amd64" + } + }, + // Set *default* container specific settings.json values on container create. + "settings": {}, + // Add the IDs of extensions you want installed when the container is created. + "extensions": [ + "4ops.packer" + ], + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + // Use 'postCreateCommand' to run commands after the container is created. + // "postCreateCommand": "uname -a", + // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. + "remoteUser": "vscode", + "features": { + "git": "os-provided", + "github-cli": "latest" + } +} \ No newline at end of file diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index a05c374..eef4eab 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -2,29 +2,27 @@ name: Packer Build on: push: - tags: - - v* + branches: + - main + jobs: packer-build: - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 + environment: gcp steps: - name: Checkout Packer project - uses: actions/checkout@v2 - - - name: Export Application Credentials - run: echo ${{ secrets.PACKER_SA_KEY }} > $GOOGLE_APPLICATION_CREDENTIALS - env: - GOOGLE_APPLICATION_CREDENTIALS: /home/runner/packer_sa_key.json + uses: actions/checkout@v3 - name: Initialize Packer Plugin Binaries - uses: hashicorp/packer-github-actions@master - with: - command: init - target: . + run: packer init . - - name: Build Artifact - uses: hashicorp/packer-github-actions@master - with: - command: build - arguments: "-var=ghrunner_version=$GITHUB_REF_NAME -var-file=runtime-variables.pkrvars.hcl" - target: . \ No newline at end of file + - name: Build Image + env: + PKR_VAR_project: ${{ secrets.GOOGLE_PROJECT }} + GOOGLE_APPLICATION_CREDENTIALS: gcp.json + run: | + cat > $GOOGLE_APPLICATION_CREDENTIALS << EOF + ${{ secrets.GOOGLE_CREDENTIALS }} + EOF + trap "rm -f $GOOGLE_APPLICATION_CREDENTIALS" EXIT + packer build -force . \ No newline at end of file diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 8603e33..2d14581 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -2,26 +2,21 @@ name: lint on: pull_request: - types: [opened] push: branches: - main + jobs: validate: - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 steps: - name: Checkout Repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Initialize Packer Plugin Binaries - uses: hashicorp/packer-github-actions@master - with: - command: init - target: . + run: packer init . - name: Validate Template - uses: hashicorp/packer-github-actions@master - with: - command: validate - arguments: -var=ghrunner_version=v1.0.0 -var-file=runtime-variables.pkrvars.hcl - target: . + env: + PKR_VAR_project: dummy_value + run: packer validate . \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ff5aa05 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.env +gcp.json \ No newline at end of file diff --git a/README.md b/README.md index aa45486..4c8d862 100644 --- a/README.md +++ b/README.md @@ -7,40 +7,57 @@ # Introduction -This repository is used to build GCP Machine Image for Github Ephemeral Runner using Packer. Image is based on Debian 10, with CUDA v1.13 installed. This repository bakes in the Github Runner `v2.283.3`, with its dependencies installed. Check [Usage](#usage) section for how to use this image within your IaaC. +This repository is used to build a GCP Machine Image for ephemeral Github Actions self-hosted runners using Packer. The image is based on Ubuntu 20.04 v20220308, with NVIDIA drivers v510.47.03 and [GitHub Actions Runner](https://github.com/actions/runner) v2.288.1. Check [Usage](#usage) section for how to use this image within your IaaC. # Guidelines -Github Workflow is run only on tag push. Tags are based on main branch. After accepting a pull request, tag should be created and named in the format `v`, eg: `v2.283.3`. Workflow will read the tag name, and use it to build the artifact image. Github Runner versions are available in the [download section](https://github.com/actions/runner/releases). +## Local development + +For local development, you can use Visual Studio Code dev container to [open your local checkout in a container](https://code.visualstudio.com/docs/remote/containers#_quick-start-open-an-existing-folder-in-a-container). + +You will then need to init Packer with: +```sh +packer init . +``` + +The next step is to [create a GCP service account](https://www.packer.io/plugins/builders/googlecompute#running-outside-of-google-cloud) and save its credentials in JSON format as `gcp.json`. + +You can then create a local `.env` file with the following content: +```sh +export GOOGLE_APPLICATION_CREDENTIALS=gcp.json +export PKR_VAR_project= +``` + +Finally you can build the image with: +```sh +packer build . +``` + +## CI + +Github Actions workflow runs only on push to `main`, and will automatically build and publish the new image. # Usage This VM Machine Image will: -- create `ghrunner` user and it's home directory -- create working directory inside the home directory: `/home/ghrunner/workdir/actions-runner` -- inside the working directory unpack the Github Runner package with available bash scripts: +- create `runner` user and it's home directory +- create working directory inside the home directory: `/home/runner/runner` +- inside the working directory unpack the Github Actions Runner package with available bash scripts: - `config.sh` - `run.sh` -Here is the example systemd service that registers and runs the Github Runner in Ephemeral mode: -``` -[Unit] -Description=Register GitHub Runner - -[Service] -User=ghrunner -Type=oneshot -WorkingDirectory=/home/ghrunner/workdir/actions-runner -ExecStartPre=-/bin/bash -c "/home/ghrunner/workdir/actions-runner/config.sh \ - --url https://github.com/{{repoOwner}}/{{repo}} \ - --token {{token}} \ - --name {{ghRunnerName}} \ - --work _work \ - --runnergroup default \ - --labels self-hosted \ - --ephemeral" -ExecStart=-/bin/bash -c "/home/ghrunner/workdir/actions-runner/run.sh" - -[Install] -WantedBy=multi-user.target +Here is an example script that registers and runs the Github Actions Runner in ephemeral mode: +```sh +su - runner -c "cd runner && \ + ./config.sh \ + --url https://github.com/{{owner}}/{{repo}} \ + --token {{token}} \ + --labels {{labels}} \ + --disableupdate \ + --unattended \ + --ephemeral" + +cd ~runner/runner +./svc.sh install runner +./svc.sh start ``` \ No newline at end of file diff --git a/runner-machine-image.pkr.hcl b/runner-machine-image.pkr.hcl index f437ead..1fbc847 100644 --- a/runner-machine-image.pkr.hcl +++ b/runner-machine-image.pkr.hcl @@ -8,29 +8,28 @@ packer { } source "googlecompute" "runner_machine_image" { - project_id = var.project - image_name = format("%s-ghr%s", var.source_image, replace(var.ghrunner_version, ".", "")) - ssh_username = "packer" - source_image = var.source_image - source_image_family = var.source_image_family - source_image_project_id = var.source_image_project_id - zone = var.zone - disk_size = var.disk_size - machine_type = var.machine_type - preemptible = "true" + project_id = var.project + image_name = format("%s-ghr%s-nv%s", var.source_image, replace(var.runner_version, ".", ""), replace(var.nvidia_version, ".", "")) + image_family = var.image_family + ssh_username = "packer" + source_image = var.source_image + source_image_project_id = [var.source_image_project_id] + zone = var.zone + machine_type = var.machine_type + preemptible = "true" + disable_default_service_account = "true" } build { - sources = [ "source.googlecompute.runner_machine_image" ] + sources = ["source.googlecompute.runner_machine_image"] provisioner "shell" { - environment_vars = [ - "RUNNER_VERSION=${trimprefix(var.ghrunner_version, "v")}", - "DRIVERS_URL=${var.drivers_url}", - "DRIVERS_SCRIPT=${var.drivers_script}" + environment_vars = [ + "RUNNER_VERSION=${var.runner_version}", + "NVIDIA_VERSION=${var.nvidia_version}" ] - script = "scripts/setup.sh" - execute_command = "chmod +x {{ .Path }}; sudo sh -c '{{ .Vars }} {{ .Path }}'" + script = "scripts/setup.sh" + execute_command = "chmod +x {{ .Path }}; sudo sh -c '{{ .Vars }} {{ .Path }}'" } } \ No newline at end of file diff --git a/runtime-variables.pkrvars.hcl b/runtime-variables.pkrvars.hcl deleted file mode 100644 index 3d7372b..0000000 --- a/runtime-variables.pkrvars.hcl +++ /dev/null @@ -1,9 +0,0 @@ -project = "gr-oss-251320" -zone = "europe-west4-a" -source_image_project_id = [ "ubuntu-os-cloud" ] -source_image_family = "ubuntu-2004-lts" -source_image = "ubuntu-2004-focal-v20211202" -disk_size = 100 -machine_type = "a2-highgpu-1g" -drivers_url = "https://uk.download.nvidia.com/tesla/470.82.01" -drivers_script = "NVIDIA-Linux-x86_64-470.82.01.run" \ No newline at end of file diff --git a/scripts/setup.sh b/scripts/setup.sh index cf5d282..706e39b 100644 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -1,24 +1,26 @@ #!/bin/bash +export DEBIAN_FRONTEND=noninteractive + +echo "[INFO] > Disable unattended upgrades services" +systemctl disable --now unattended-upgrades.service + echo "[INFO] > Prepare the system before installing drivers" -apt-get install -y gcc make pkg-config +apt-get update +apt-get install -y build-essential -echo "[INFO] > Installing deeplearning drivers" -curl -O -L $DRIVERS_URL/$DRIVERS_SCRIPT -chmod +x $DRIVERS_SCRIPT -./$DRIVERS_SCRIPT -s +echo "[INFO] > Installing NVIDIA drivers" +curl -o nvidia.run -fsSL https://us.download.nvidia.com/tesla/${NVIDIA_VERSION}/NVIDIA-Linux-x86_64-${NVIDIA_VERSION}.run +sh nvidia.run --ui=none -q +rm nvidia.run -echo "[INFO] > Creating user ghrunner with home directory" -/usr/sbin/useradd -m ghrunner -cd /home/ghrunner -mkdir -p workdir/actions-runner && cd workdir/actions-runner +echo "[INFO] > Creating user runner with home directory" +/usr/sbin/useradd -m runner +mkdir ~runner/runner -echo "[INFO] > Downloading runner tar archive from Github" -ARCHIVE=actions-runner-linux-x64-$RUNNER_VERSION.tar.gz -curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/$ARCHIVE -tar xzf ./$ARCHIVE -rm $ARCHIVE -chown -R ghrunner ~ghrunner +echo "[INFO] > Installing GitHub Actions runner" +curl -fsSL https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz | tar -C ~runner/runner -xzf - +chown -R runner:runner ~runner/runner -echo "[INFO] > Installing runner dependencies" -/home/ghrunner/workdir/actions-runner/bin/installdependencies.sh \ No newline at end of file +echo "[INFO] > Installing GitHub Actions runner dependencies" +~runner/runner/bin/installdependencies.sh \ No newline at end of file diff --git a/variables.auto.pkrvars.hcl b/variables.auto.pkrvars.hcl new file mode 100644 index 0000000..99d1b80 --- /dev/null +++ b/variables.auto.pkrvars.hcl @@ -0,0 +1,7 @@ +zone = "europe-west4-a" +image_family = "ubuntu-2004-ghr-nvidia" +source_image = "ubuntu-2004-focal-v20220308" +source_image_project_id = "ubuntu-os-cloud" +machine_type = "a2-highgpu-1g" +runner_version = "2.288.1" +nvidia_version = "470.103.01" \ No newline at end of file diff --git a/variables.pkr.hcl b/variables.pkr.hcl index d80b929..e660677 100644 --- a/variables.pkr.hcl +++ b/variables.pkr.hcl @@ -1,10 +1,8 @@ -variable "project" { type = string } -variable "zone" { type = string } -variable "source_image_project_id" { type = list(string) } -variable "disk_size" { type = number } -variable "source_image_family" { type = string } -variable "source_image" { type = string } -variable "ghrunner_version" { type = string } -variable "machine_type" { type = string } -variable "drivers_url" { type = string } -variable "drivers_script" { type = string } \ No newline at end of file +variable "project" { type = string } +variable "zone" { type = string } +variable "image_family" { type = string } +variable "source_image" { type = string } +variable "source_image_project_id" { type = string } +variable "machine_type" { type = string } +variable "runner_version" { type = string } +variable "nvidia_version" { type = string } \ No newline at end of file