Skip to content

Commit

Permalink
Add automation for building/uploading/updating lucene snapshots daily (
Browse files Browse the repository at this point in the history
  • Loading branch information
brianseeders authored Aug 11, 2023
1 parent b155444 commit a7cb51c
Show file tree
Hide file tree
Showing 10 changed files with 353 additions and 0 deletions.
15 changes: 15 additions & 0 deletions .buildkite/hooks/pre-command
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,18 @@ export GRADLE_BUILD_CACHE_USERNAME

GRADLE_BUILD_CACHE_PASSWORD=$(vault read -field=password secret/ci/elastic-elasticsearch/migrated/gradle-build-cache)
export GRADLE_BUILD_CACHE_PASSWORD

BUILDKITE_API_TOKEN=$(vault read -field=token secret/ci/elastic-elasticsearch/buildkite-api-token)
export BUILDKITE_API_TOKEN

if [[ "${USE_LUCENE_SNAPSHOT_CREDS:-}" == "true" ]]; then
data=$(.buildkite/scripts/lucene-snapshot/get-credentials.sh)

AWS_ACCESS_KEY_ID=$(echo "$data" | jq -r .data.access_key)
export AWS_ACCESS_KEY_ID

AWS_SECRET_ACCESS_KEY=$(echo "$data" | jq -r .data.secret_key)
export AWS_SECRET_ACCESS_KEY

unset data
fi
20 changes: 20 additions & 0 deletions .buildkite/pipelines/lucene-snapshot/build-snapshot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
steps:
- trigger: apache-lucene-build-snapshot
label: Trigger pipeline to build lucene snapshot
key: lucene-build
if: build.env("LUCENE_BUILD_ID") == null || build.env("LUCENE_BUILD_ID") == ""
- wait
- label: Upload and update lucene snapshot
command: .buildkite/scripts/lucene-snapshot/upload-snapshot.sh
timeout_in_minutes: 15
env:
USE_LUCENE_SNAPSHOT_CREDS: "true"
UPDATE_ES_LUCENE_SNAPSHOT: "true"
agents:
provider: gcp
image: family/elasticsearch-ubuntu-2004
machineType: custom-32-98304
buildDirectory: /dev/shm/bk
- wait
- trigger: "elasticsearch-lucene-snapshot-tests"
async: true
60 changes: 60 additions & 0 deletions .buildkite/pipelines/lucene-snapshot/run-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
steps:
- label: sanity-check
command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files precommit
timeout_in_minutes: 300
agents:
provider: gcp
image: family/elasticsearch-ubuntu-2004
machineType: custom-32-98304
buildDirectory: /dev/shm/bk
- wait: null
- label: part1
command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart1
timeout_in_minutes: 300
agents:
provider: gcp
image: family/elasticsearch-ubuntu-2004
machineType: custom-32-98304
buildDirectory: /dev/shm/bk
- label: part2
command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart2
timeout_in_minutes: 300
agents:
provider: gcp
image: family/elasticsearch-ubuntu-2004
machineType: custom-32-98304
buildDirectory: /dev/shm/bk
- label: part3
command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart3
timeout_in_minutes: 300
agents:
provider: gcp
image: family/elasticsearch-ubuntu-2004
machineType: custom-32-98304
buildDirectory: /dev/shm/bk
- group: bwc-snapshots
steps:
- label: "{{matrix.BWC_VERSION}} / bwc-snapshots"
command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files v$$BWC_VERSION#bwcTest
timeout_in_minutes: 300
matrix:
setup:
BWC_VERSION:
- 7.17.13
- 8.9.1
- 8.10.0
agents:
provider: gcp
image: family/elasticsearch-ubuntu-2004
machineType: custom-32-98304
buildDirectory: /dev/shm/bk
env:
BWC_VERSION: "{{matrix.BWC_VERSION}}"
- label: rest-compat
command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkRestCompat
timeout_in_minutes: 300
agents:
provider: gcp
image: family/elasticsearch-ubuntu-2004
machineType: custom-32-98304
buildDirectory: /dev/shm/bk
7 changes: 7 additions & 0 deletions .buildkite/pipelines/lucene-snapshot/update-branch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
steps:
- label: Update lucene-snapshot branch
command: .buildkite/scripts/lucene-snapshot/update-branch.sh
timeout_in_minutes: 15
- wait
- trigger: "elasticsearch-lucene-snapshot-tests"
async: true
16 changes: 16 additions & 0 deletions .buildkite/scripts/lucene-snapshot/get-credentials.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

set -euo pipefail

# WARNING: this script will echo the credentials to the console. It is meant to be called from another script and captured in a variable.
# It should really only be used inside .buildkite/hooks/pre-command

VAULT_ROLE_ID=$(vault read -field=role-id secret/ci/elastic-elasticsearch/legacy-vault-credentials)
VAULT_SECRET_ID=$(vault read -field=secret-id secret/ci/elastic-elasticsearch/legacy-vault-credentials)
VAULT_ADDR=https://secrets.elastic.co:8200

unset VAULT_TOKEN
VAULT_TOKEN=$(vault write -field=token auth/approle/login role_id=$VAULT_ROLE_ID secret_id=$VAULT_SECRET_ID)
export VAULT_TOKEN

vault read -format=json aws-elastic/creds/lucene-snapshots
13 changes: 13 additions & 0 deletions .buildkite/scripts/lucene-snapshot/remove-verification-metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import re

regex = re.compile(
r'<component group="org.apache.lucene" name="lucene-.*?</component>\s*',
re.MULTILINE | re.DOTALL,
)

with open("gradle/verification-metadata.xml", "r+") as f:
text = f.read()
text = regex.sub("", text)
f.seek(0)
f.truncate()
f.write(text)
18 changes: 18 additions & 0 deletions .buildkite/scripts/lucene-snapshot/update-branch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash

set -euo pipefail

if [[ "$BUILDKITE_BRANCH" != "lucene_snapshot" ]]; then
echo "Error: This script should only be run on the lucene_snapshot branch"
exit 1
fi

echo --- Updating lucene_snapshot branch with main

git config --global user.name elasticsearchmachine
git config --global user.email '[email protected]'

git checkout lucene_snapshot
git fetch origin main
git merge --no-edit origin/main
git push origin lucene_snapshot
46 changes: 46 additions & 0 deletions .buildkite/scripts/lucene-snapshot/update-es-snapshot.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash

set -euo pipefail

echo --- Update Lucene snapshot in Elasticsearch

LUCENE_SNAPSHOT_VERSION=${LUCENE_SNAPSHOT_VERSION:-}

if [[ -z "$LUCENE_SNAPSHOT_VERSION" ]]; then
LUCENE_SNAPSHOT_VERSION=$(buildkite-agent meta-data get lucene-snapshot-version)
fi

echo "Lucene Snapshot Version: $LUCENE_SNAPSHOT_VERSION"

git checkout "$BUILDKITE_BRANCH"
git pull --ff-only origin "$BUILDKITE_BRANCH"

# Replace `lucene = <version>` string in version.properties and maintain the same indentation
sed -E "s/^(lucene *= *[^ ]* *).*\$/\1$LUCENE_SNAPSHOT_VERSION/" build-tools-internal/version.properties > new-version.properties
mv new-version.properties build-tools-internal/version.properties

# Remove stale verification metadata, because generating them just appends new ones
python3 .buildkite/scripts/lucene-snapshot/remove-verification-metadata.py
./gradlew --write-verification-metadata sha256

# 9.8.0-snapshot-1f25c68 -> 9.8.0
VERSION=$(echo "$LUCENE_SNAPSHOT_VERSION" | cut -f 1 -d '-')
VERSION_SNAKE=$(echo "$VERSION" | sed -E 's/\./_/g')

sed -E "s/^(:lucene_version: *).*\$/\1$VERSION/" docs/Versions.asciidoc > docs/Versions.asciidoc.new
sed -E "s/^(:lucene_version_path: *).*\$/\1$VERSION_SNAKE/" docs/Versions.asciidoc.new > docs/Versions.asciidoc
rm -f docs/Versions.asciidoc.new

if git diff-index --quiet HEAD --; then
echo 'No changes to commit.'
else
git config --global user.name elasticsearchmachine
git config --global user.email '[email protected]'

git add build-tools-internal/version.properties
git add gradle/verification-metadata.xml
git add docs/Versions.asciidoc

git commit -m "[Automated] Update Lucene snapshot to $LUCENE_SNAPSHOT_VERSION"
git push origin "$BUILDKITE_BRANCH"
fi
41 changes: 41 additions & 0 deletions .buildkite/scripts/lucene-snapshot/upload-snapshot.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/bash

set -euo pipefail

LUCENE_BUILD_ID=${LUCENE_BUILD_ID:-}

if [[ -z "$LUCENE_BUILD_ID" ]]; then
build_json=$(curl -sH "Authorization: Bearer $BUILDKITE_API_TOKEN" "https://api.buildkite.com/v2/organizations/elastic/pipelines/$BUILDKITE_PIPELINE_SLUG/builds/$BUILDKITE_BUILD_NUMBER")
LUCENE_BUILD_ID=$(jq -r '.jobs[] | select(.step_key == "lucene-build").triggered_build.id' <<< "$build_json")
fi

export LUCENE_BUILD_ID

LUCENE_SHA=$(buildkite-agent meta-data get --build "$LUCENE_BUILD_ID" lucene-snapshot-sha)
export LUCENE_SHA

LUCENE_SNAPSHOT_VERSION=$(buildkite-agent meta-data get --build "$LUCENE_BUILD_ID" lucene-snapshot-version)
export LUCENE_SNAPSHOT_VERSION

echo --- Downloading lucene snapshot

mkdir lucene-snapshot
cd lucene-snapshot
buildkite-agent artifact download --build "$LUCENE_BUILD_ID" lucene-snapshot.tar.gz .
tar -xvf lucene-snapshot.tar.gz
cd -

echo --- Upload lucene snapshot to S3

if ! which aws; then
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
unzip awscliv2.zip
sudo ./aws/install
rm -rf awscliv2.zip aws
fi

aws s3 sync lucene-snapshot/ "s3://download.elasticsearch.org/lucenesnapshots/$LUCENE_SHA/" --acl public-read

if [[ "${UPDATE_ES_LUCENE_SNAPSHOT:-}" ]]; then
.buildkite/scripts/lucene-snapshot/update-es-snapshot.sh
fi
117 changes: 117 additions & 0 deletions catalog-info.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,120 @@ spec:
branch: main
cronline: "0 0,8,16 * * * America/New_York"
message: "Triggers pipelines 3x daily"
---
# yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/e57ee3bed7a6f73077a3f55a38e76e40ec87a7cf/rre.schema.json
apiVersion: backstage.io/v1alpha1
kind: Resource
metadata:
name: buildkite-pipeline-elasticsearch-lucene-snapshot-build
description: Builds a new lucene snapshot, uploads, updates the lucene_snapshot branch in ES, runs tests
links:
- title: Pipeline
url: https://buildkite.com/elastic/elasticsearch-lucene-snapshot-build
spec:
type: buildkite-pipeline
system: buildkite
owner: group:elasticsearch-team
implementation:
apiVersion: buildkite.elastic.dev/v1
kind: Pipeline
metadata:
description: ":elasticsearch: Builds a new lucene snapshot and tests it"
name: elasticsearch / lucene-snapshot / build-and-update
spec:
repository: elastic/elasticsearch
pipeline_file: .buildkite/pipelines/lucene-snapshot/build-snapshot.yml
branch_configuration: lucene_snapshot
teams:
elasticsearch-team: {}
ml-core: {}
everyone:
access_level: BUILD_AND_READ
provider_settings:
build_branches: false
build_pull_requests: false
publish_commit_status: false
trigger_mode: none
schedules:
Periodically on lucene_snapshot:
branch: lucene_snapshot
cronline: "0 2 * * America/New_York"
message: "Builds a new lucene snapshot 1x per day"
---
# yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/e57ee3bed7a6f73077a3f55a38e76e40ec87a7cf/rre.schema.json
apiVersion: backstage.io/v1alpha1
kind: Resource
metadata:
name: buildkite-pipeline-elasticsearch-lucene-snapshot-update-branch
description: Merge main into the lucene_snapshot branch, and run tests
links:
- title: Pipeline
url: https://buildkite.com/elastic/elasticsearch-lucene-snapshot-update-branch
spec:
type: buildkite-pipeline
system: buildkite
owner: group:elasticsearch-team
implementation:
apiVersion: buildkite.elastic.dev/v1
kind: Pipeline
metadata:
description: ":elasticsearch: Merges main into lucene_snapshot branch and runs tests"
name: elasticsearch / lucene-snapshot / update-branch
spec:
repository: elastic/elasticsearch
pipeline_file: .buildkite/pipelines/lucene-snapshot/update-branch.yml
branch_configuration: lucene_snapshot
teams:
elasticsearch-team: {}
ml-core: {}
everyone:
access_level: BUILD_AND_READ
provider_settings:
build_branches: false
build_pull_requests: false
publish_commit_status: false
trigger_mode: none
schedules:
Periodically on lucene_snapshot:
branch: lucene_snapshot
cronline: "0 6 * * America/New_York"
message: "Merges main into lucene_snapshot branch 1x per day"
---
# yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/e57ee3bed7a6f73077a3f55a38e76e40ec87a7cf/rre.schema.json
apiVersion: backstage.io/v1alpha1
kind: Resource
metadata:
name: buildkite-pipeline-elasticsearch-lucene-snapshot-tests
description: Runs tests against lucene_snapshot branch
links:
- title: Pipeline
url: https://buildkite.com/elastic/elasticsearch-lucene-snapshot-tests
spec:
type: buildkite-pipeline
system: buildkite
owner: group:elasticsearch-team
implementation:
apiVersion: buildkite.elastic.dev/v1
kind: Pipeline
metadata:
description: ":elasticsearch: Runs tests against lucene_snapshot branch"
name: elasticsearch / lucene-snapshot / tests
spec:
repository: elastic/elasticsearch
pipeline_file: .buildkite/pipelines/lucene-snapshot/run-tests.yml
branch_configuration: lucene_snapshot
teams:
elasticsearch-team: {}
ml-core: {}
everyone:
access_level: BUILD_AND_READ
provider_settings:
build_branches: false
build_pull_requests: false
publish_commit_status: false
trigger_mode: none
schedules:
Periodically on lucene_snapshot:
branch: lucene_snapshot
cronline: "0 9,12,15,18 * * America/New_York"
message: "Runs tests against lucene_snapshot branch several times per day"

0 comments on commit a7cb51c

Please sign in to comment.