Skip to content

Commit

Permalink
Merge branch 'cloud-bulldozer:master' into cmmo-dag
Browse files Browse the repository at this point in the history
  • Loading branch information
krishvoor authored Jul 6, 2023
2 parents 4312d4f + 12b5201 commit c3eee05
Show file tree
Hide file tree
Showing 17 changed files with 36 additions and 86 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
name: Release Airflow Images
on:
schedule: # triggers every midnight
- cron: '0 0 * * *'
push:
branches:
- master
tags:
- "*" # triggers only if push new tag version
- "*" # triggers on a push event

jobs:
containers:
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
QUAY_ACCOUNT ?= quay.io/cloud-bulldozer
IMAGE_BUILDER ?= podman
AIRFLOW_VERSION ?= 2.3.2
AIRFLOW_VERSION ?= 2.6.2
AIRFLOW_PYTHON_VERSION ?= python3.8
AIRFLOW_IMAGE_TAG ?= $(AIRFLOW_VERSION)-$(AIRFLOW_PYTHON_VERSION)
IMAGE_TAG ?= $(AIRFLOW_VERSION)
Expand Down
2 changes: 1 addition & 1 deletion dags/common/models/dag_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@ class DagConfig:
})
executor_image: Optional[dict] = field(default_factory=lambda: {
"repository": "quay.io/cloud-bulldozer",
"tag": "2.3.2"
"tag": "2.6.2"
})
dependencies: Optional[dict] = field(default_factory=lambda: {})
2 changes: 1 addition & 1 deletion dags/nocp/manifest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ dagConfig:
cleanupOnSuccess: true
executorImages:
repository: quay.io/cloud-bulldozer
tag: 2.3.2
tag: 2.6.2
5 changes: 1 addition & 4 deletions dags/nocp/scripts/run_ocm_api_load.sh
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,12 @@ run_ocm_api_load(){

# Timeout runs ocm-load-test for the specified duration even if airflow killed this script (when user wants to stop benchmark execution). This helps in ocm-load-test to cleanup resources it created. 10 minutes extra timeout is set so that test can prepare results after running for the given duration.
# kill-after option needs sudo permissions
timeout --kill-after=60s --preserve-status $(((tduration + 10) * 60)) $TESTDIR/build/ocm-load-test --aws-region $AWS_DEFAULT_REGION --aws-account-id $AWS_ACCOUNT_ID --aws-access-key $AWS_OSDCCADMIN_KEY --aws-access-secret $AWS_OSDCCADMIN_SECRET --cooldown $COOLDOWN --duration $tduration --elastic-index ocm-request-test --elastic-insecure-skip-verify=true --elastic-server $ES_SERVER --gateway-url $GATEWAY_URL --ocm-token $OCM_TOKEN --ocm-token-url $OCM_TOKEN_URL --output-path $TESTDIR/results --rate $trate --test-id $UUID --test-names $tname $rampoptions
timeout --kill-after=60s --preserve-status $(((tduration + 10) * 60)) $TESTDIR/build/ocm-load-test --aws-region $AWS_DEFAULT_REGION --aws-account-id $AWS_ACCOUNT_ID --aws-access-key $AWS_OSDCCADMIN_KEY --aws-access-secret $AWS_OSDCCADMIN_SECRET --cooldown $COOLDOWN --duration $tduration --elastic-index ocm-load-metrics --elastic-insecure-skip-verify=true --elastic-server $ES_SERVER --gateway-url $GATEWAY_URL --ocm-token $OCM_TOKEN --ocm-token-url $OCM_TOKEN_URL --output-path $TESTDIR/results --rate $trate --test-id $UUID --test-names $tname $rampoptions
sleep $COOLDOWN
done
benchmark_rv=$?
end_time=$(date +%s)

echo "Uploading Result files..."
python3 $TESTDIR/automation.py upload --dir $TESTDIR/results --server ${SNAPPY_DATA_SERVER_URL} --user ${SNAPPY_DATA_SERVER_USERNAME} --password ${SNAPPY_DATA_SERVER_PASSWORD}

# scraping metrics
export KUBE_ES_INDEX=ocm-uhc-acct-mngr
envsubst < $TESTDIR/ci/templates/kube-burner-config.yaml > $TESTDIR/kube-burner-am-config.yaml
Expand Down
2 changes: 1 addition & 1 deletion dags/openshift_nightlies/manifest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dagConfig:
cleanupOnSuccess: true
executorImages:
repository: quay.io/cloud-bulldozer
tag: 2.3.2
tag: 2.6.2
dependencies:
e2e_benchmarking:
repo: https://github.com/cloud-bulldozer/e2e-benchmarking.git
Expand Down
51 changes: 7 additions & 44 deletions dags/openshift_nightlies/scripts/install/rosa.sh
Original file line number Diff line number Diff line change
Expand Up @@ -105,38 +105,6 @@ _login_check(){
echo "Failed to login after 100 attempts with 5 sec interval"
}

_wait_for_workload_nodes_ready(){
_download_kubeconfig "$(_get_cluster_id $1)" ./kubeconfig
export KUBECONFIG=./kubeconfig
ALL_READY_ITERATIONS=0
ITERATIONS=0
# Node count is number of workload nodes, which is 3
NODES_COUNT=3
# 180 seconds per node, waiting 5 times 60 seconds (5*60 = 5 minutes) with all nodes ready to finalize
while [ ${ITERATIONS} -le ${NODES_COUNT} ] ; do
NODES_READY_COUNT=$(oc get nodes | grep -i workload | grep " Ready " | wc -l)
if [ ${NODES_READY_COUNT} -ne ${NODES_COUNT} ] ; then
echo "WARNING: ${ITERATIONS}/${NODES_COUNT} iterations. ${NODES_READY_COUNT}/${NODES_COUNT} nodes ready. Waiting 180 seconds for next check"
ALL_READY_ITERATIONS=0
ITERATIONS=$((${ITERATIONS}+1))
sleep 180
else
if [ ${ALL_READY_ITERATIONS} -eq 5 ] ; then
echo "INFO: ${ALL_READY_ITERATIONS}/5. All nodes ready, continuing process"
return 0
else
echo "INFO: ${ALL_READY_ITERATIONS}/5. All nodes ready. Waiting 60 seconds for next check"
ALL_READY_ITERATIONS=$((${ALL_READY_ITERATIONS}+1))
sleep 60
fi
fi
done
END_CLUSTER_STATUS="Ready. No Workers"
echo "ERROR: Workload nodes (${NODES_READY_COUNT}/${NODES_COUNT}) are ready after about $((${NODES_COUNT}*3)) minutes, dumping oc get nodes..."
oc get nodes
exit 1
}

_wait_for_cluster_ready(){
START_TIMER=$(date +%s)
echo "INFO: Installation starts at $(date -d @${START_TIMER})"
Expand Down Expand Up @@ -440,7 +408,6 @@ install(){
postinstall(){
# sleeping to address issue #324
sleep 120
export WORKLOAD_TYPE=$(cat ${json_file} | jq -r .openshift_workload_node_instance_type)
export EXPIRATION_TIME=$(cat ${json_file} | jq -r .rosa_expiration_time)
_download_kubeconfig "$(_get_cluster_id ${CLUSTER_NAME})" ./kubeconfig
unset KUBECONFIG
Expand All @@ -450,10 +417,6 @@ postinstall(){
export PASSWORD=$(echo ${CLUSTER_NAME} | md5sum | awk '{print $1}')
ocm create idp -n localauth -t htpasswd --username kubeadmin --password ${PASSWORD} -c ${CLUSTER_NAME}
ocm create user kubeadmin -c "$(_get_cluster_id ${CLUSTER_NAME})" --group=cluster-admins
if [[ $WORKLOAD_TYPE != "null" ]]; then
# create machinepool for workload nodes
ocm create machinepool -c ${CLUSTER_NAME} --instance-type ${WORKLOAD_TYPE} --labels 'node-role.kubernetes.io/workload=' --taints 'role=workload:NoSchedule' --replicas 3 workload
fi
# set expiration time
EXPIRATION_STRING=$(date -d "${EXPIRATION_TIME} minutes" '+{"expiration_timestamp": "%FT%TZ"}')
ocm patch /api/clusters_mgmt/v1/clusters/"$(_get_cluster_id ${CLUSTER_NAME})" <<< ${EXPIRATION_STRING}
Expand All @@ -463,10 +426,6 @@ postinstall(){
URL=$(rosa describe cluster -c $CLUSTER_NAME --output json | jq -r ".api.url")
PASSWORD=$(rosa create admin -c "$(_get_cluster_id ${CLUSTER_NAME})" -y 2>/dev/null | grep "oc login" | awk '{print $7}')
if [ $HCP == "true" ]; then _login_check $URL $PASSWORD; fi
if [[ $WORKLOAD_TYPE != "null" ]]; then
# create machinepool for workload nodes
rosa create machinepool -c ${CLUSTER_NAME} --instance-type ${WORKLOAD_TYPE} --name workload --labels node-role.kubernetes.io/workload= --taints role=workload:NoSchedule --replicas 3
fi
# set expiration to 24h
rosa edit cluster -c "$(_get_cluster_id ${CLUSTER_NAME})" --expiration=${EXPIRATION_TIME}m
fi
Expand Down Expand Up @@ -711,13 +670,17 @@ if [[ "$operation" == "install" ]]; then
install
index_metadata
fi
if [[ $WORKLOAD_TYPE != "null" ]]; then _wait_for_workload_nodes_ready ${CLUSTER_NAME}; fi
elif [ "${CLUSTER_STATUS}" == "ready" ] ; then
printf "INFO: Cluster ${CLUSTER_NAME} already installed and ready, reusing..."
postinstall
postinstall
elif [ "${CLUSTER_STATUS}" == "error" ] ; then
printf "INFO: Cluster ${CLUSTER_NAME} errored, cleaning them now..."
cleanup
printf "INFO: Fail this install to re-try a fresh install"
exit 1
else
printf "INFO: Cluster ${CLUSTER_NAME} already installed but not ready, exiting..."
exit 1
exit 1
fi

elif [[ "$operation" == "cleanup" ]]; then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@ setup(){
curl -sS https://mirror.openshift.com/pub/openshift-v4/clients/ocp/latest/openshift-client-linux.tar.gz | tar xz oc

export PATH=$PATH:$(pwd)

if [[ ! -z "$KUBEADMIN_PASSWORD" ]]; then
oc login -u kubeadmin -p $KUBEADMIN_PASSWORD --insecure-skip-tls-verify
fi
}

setup
Expand Down
6 changes: 6 additions & 0 deletions dags/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kubernetes>=25.0.0
apache-airflow==2.6.2
prometheus-api-client==0.5.2
elasticsearch==7.13.4
apache-airflow-providers-slack==7.3.0
pydantic>=1.10.0,<2.0.0 # https://github.com/apache/airflow/issues/32311
9 changes: 0 additions & 9 deletions dags/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,6 @@ home-page = https://github.com/cloud-bulldozer/benchmark-operator/cli
zip_safe = False
packages = find:
include_package_data = True
# Add here dependencies of your project (semicolon/line-separated), e.g.
install_requires =
kubernetes>=25.0.0
apache-airflow==2.3.2
prometheus-api-client==0.5.2
elasticsearch==7.13.4
apache-airflow-providers-slack==7.1.0
markupsafe==2.0.1

python_requires = >=3.8

[options.extras_require]
Expand Down
8 changes: 1 addition & 7 deletions dags/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,7 @@ extras =
tests
setenv =
py{38,39}-unit: COVERAGE_FILE = .coverage.{envname}
deps =
kubernetes>=25.0.0
apache-airflow==2.3.2
prometheus-api-client==0.5.2
elasticsearch==7.13.4
apache-airflow-providers-slack==7.1.0
markupsafe==2.0.1
deps = -r{toxinidir}/requirements.txt

python_requires = >=3.8

Expand Down
2 changes: 1 addition & 1 deletion images/airflow-ansible/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG BASE_IMAGE=quay.io/cloud-bulldozer/airflow:2.3.2
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
USER root
RUN apt install bc awscli -y
Expand Down
6 changes: 3 additions & 3 deletions images/airflow-managed-services/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ARG BASE_IMAGE=quay.io/cloud-bulldozer/airflow:2.3.2
ARG BASE_IMAGE=quay.io/cloud-bulldozer/airflow:latest
# Hypershift Compilation
FROM golang:1.18 AS hypershift
RUN git clone --branch main https://github.com/openshift/hypershift
FROM golang:1.19 AS hypershift
RUN git clone --single-branch --branch main https://github.com/openshift/hypershift --depth=1
WORKDIR hypershift
RUN make build
# Runtime image
Expand Down
5 changes: 2 additions & 3 deletions images/airflow/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ RUN curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | b

ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6

RUN curl -L https://github.com/cloud-bulldozer/kube-burner/releases/download/v1.6/kube-burner-1.6-Linux-x86_64.tar.gz | tar xz -C /usr/bin kube-burner
RUN curl -L https://github.com/jtaleric/k8s-netperf/releases/download/v0.0.7/k8s-netperf_0.0.7_linux_amd64.tar.gz | tar xz -C /usr/bin k8s-netperf
RUN curl -L https://github.com/cloud-bulldozer/kube-burner/releases/download/v1.7.2/kube-burner-V1.7.2-linux-x86_64.tar.gz | tar xz -C /usr/bin kube-burner
RUN curl -L https://github.com/cloud-bulldozer/k8s-netperf/releases/download/v0.1.11/k8s-netperf_Linux_v0.1.11_x86_64.tar.gz | tar xz -C /usr/bin k8s-netperf
USER airflow
RUN pip install prometheus-api-client elasticsearch apache-airflow-providers-elasticsearch apache-airflow-providers-cncf-kubernetes --upgrade
4 changes: 2 additions & 2 deletions scripts/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ output_info() {
_argo_password=$(kubectl get secret/argocd-initial-admin-secret -n argocd -o jsonpath='{.data.password}' | base64 --decode)

printf "\n\n ArgoCD Configs"
printf "\n Host: $_argo_url \n User: $_argo_user \n Password: $_argo_password"
printf "\n Host: https://$_argo_url \n User: $_argo_user \n Password: $_argo_password"

_airflow_url=$(oc get route/airflow -o jsonpath='{.spec.host}' -n $_airflow_ns)
_airflow_user="admin"
_airflow_password="REDACTED"

printf "\n\n Airflow Configs (Password was user defined so this script doesn't know it!)"
printf "\n Host: $_airflow_url \n User: $_airflow_user \n Password: $_airflow_password\n\n"
printf "\n Host: https://$_airflow_url \n User: $_airflow_user \n Password: $_airflow_password\n\n"

_results_dashboard_url=$(oc get route/perf-dashboard -o jsonpath='{.spec.host}' -n dashboard)
if [ -z "$_results_dashboard_url" ]; then
Expand Down
6 changes: 4 additions & 2 deletions scripts/playground/templates/airflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ spec:
releaseName: airflow
values: |-
defaultAirflowRepository: quay.io/cloud-bulldozer/airflow
defaultAirflowTag: 2.3.2
airflowVersion: 2.3.2
defaultAirflowTag: 2.6.2
airflowVersion: 2.6.2
executor: KubernetesExecutor
createUserJob:
useHelmHooks: false
Expand Down Expand Up @@ -107,6 +107,8 @@ spec:
name: airflow-webserver
weight: 100
wildcardPolicy: None
tls:
termination: edge
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
Expand Down
4 changes: 2 additions & 2 deletions scripts/tenant/templates/airflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ spec:
releaseName: airflow
values: |-
defaultAirflowRepository: quay.io/cloud-bulldozer/airflow
defaultAirflowTag: 2.3.2
airflowVersion: 2.3.2
defaultAirflowTag: 2.6.2
airflowVersion: 2.6.2
executor: KubernetesExecutor
images:
airflow:
Expand Down

0 comments on commit c3eee05

Please sign in to comment.