From 72c5635b79eac041a975ceeb83c2effa28993d4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Mac=C3=ADk?= Date: Thu, 20 Jun 2024 12:04:24 +0200 Subject: [PATCH] fix(max-concurrency): fix result collection of max-concurrency test for new iteration directories (#1226) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pavel MacĂ­k --- .../max-concurrency/collect-results.sh | 103 ++++++++++-------- tests/load-tests/run-max-concurrency.sh | 44 ++++---- 2 files changed, 80 insertions(+), 67 deletions(-) diff --git a/tests/load-tests/ci-scripts/max-concurrency/collect-results.sh b/tests/load-tests/ci-scripts/max-concurrency/collect-results.sh index 7876392ab..93763905a 100755 --- a/tests/load-tests/ci-scripts/max-concurrency/collect-results.sh +++ b/tests/load-tests/ci-scripts/max-concurrency/collect-results.sh @@ -29,13 +29,13 @@ collect_artifacts() { collect_monitoring_data() { echo "[$(date --utc -Ins)] Setting up tool to collect monitoring data" { - python3 -m venv venv - set +u - # shellcheck disable=SC1091 - source venv/bin/activate - set -u - python3 -m pip install -U pip - python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core" + python3 -m venv venv + set +u + # shellcheck disable=SC1091 + source venv/bin/activate + set -u + python3 -m pip install -U pip + python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core" } &>"${ARTIFACT_DIR}/monitoring-setup.log" ## Monitoring data for entire test @@ -121,18 +121,26 @@ ${csv_delim}WorkloadKPI\ ${csv_delim}Errors\ ${csv_delim}UserAvgTime\ ${csv_delim}UserMaxTime\ -${csv_delim}ApplicationAvgTime\ -${csv_delim}ApplicationMaxTime\ -${csv_delim}CDQAvgTime\ -${csv_delim}CDQMaxTime\ -${csv_delim}ComponentsAvgTime\ -${csv_delim}ComponentsMaxTime\ -${csv_delim}PipelineRunAvgTime\ -${csv_delim}PipelineRunMaxTime\ -${csv_delim}IntegrationTestsRunPipelineSucceededTimeAvg\ -${csv_delim}IntegrationTestsRunPipelineSucceededTimeMax\ -${csv_delim}DeploymentSucceededTimeAvg\ -${csv_delim}DeploymentSucceededTimeMax\ +${csv_delim}CreateApplicationAvgTime\ +${csv_delim}CreateApplicationMaxTime\ +${csv_delim}ValidateApplicationAvgTime\ +${csv_delim}ValidateApplicationMaxTime\ +${csv_delim}CreateComponentAvgTime\ +${csv_delim}CreateComponentMaxTime\ +${csv_delim}ValidatePipelineRunConditionAvgTime\ +${csv_delim}ValidatePipelineRunConditionMaxTime\ +${csv_delim}ValidatePipelineRunCreationAvgTime\ +${csv_delim}ValidatePipelineRunCreationMaxTime\ +${csv_delim}ValidatePipelineRunSignatureAvgTime\ +${csv_delim}ValidatePipelineRunSignatureMaxTime\ +${csv_delim}CreateIntegrationTestScenarioAvgTime\ +${csv_delim}CreateIntegrationTestScenarioMaxTime\ +${csv_delim}ValidateIntegrationTestScenarioAvgTime\ +${csv_delim}ValidateIntegrationTestScenarioMaxTime\ +${csv_delim}ValidateTestPipelineRunConditionAvgTime\ +${csv_delim}ValidateTestPipelineRunConditionMaxTime\ +${csv_delim}ValidateTestPipelineRunCreationAvgTime\ +${csv_delim}ValidateTestPipelineRunCreationMaxTime\ ${csv_delim}ClusterCPUUsageAvg\ ${csv_delim}ClusterDiskUsageAvg\ ${csv_delim}ClusterMemoryUsageAvg\ @@ -158,12 +166,10 @@ ${csv_delim}ClusterNetworkReceiveBytesTotalAvg\ ${csv_delim}ClusterNetworkTransmitBytesTotalAvg\ ${csv_delim}NodeDiskIoTimeSecondsTotalAvg" \ >"$max_concurrency_csv" - mc_files=$(find "$output_dir" -type f -name 'load-test.max-concurrency.*.json') - if [ -n "$mc_files" ]; then - for i in $mc_files; do - iteration_index=$(echo "$i" | sed -e 's,'"$output_dir"'/load-test.max-concurrency.\([0-9-]\+\).*,\1,g') - - parked_go_routines=$(get_parked_go_routines "$iteration_index") + iteration_dirs=$(find "$ARTIFACT_DIR/iterations" -type d -name 'iteration-*') + if [ -n "$iteration_dirs" ]; then + for iteration_dir in $iteration_dirs; do + parked_go_routines=$(get_parked_go_routines "$iteration_dir") parked_go_routines_columns="" if [ -n "$parked_go_routines" ]; then for g in $parked_go_routines; do @@ -174,24 +180,33 @@ ${csv_delim}NodeDiskIoTimeSecondsTotalAvg" \ parked_go_routines_columns="$parked_go_routines_columns + $csv_delim_quoted" done fi + echo "[$(date --utc -Ins)] Processing $iteration_dir/load-test.json" jq -rc "(.metadata.\"max-concurrency\".iteration | tostring) \ - + $csv_delim_quoted + (.threads | tostring) \ - + $csv_delim_quoted + (.workloadKPI | tostring) \ - + $csv_delim_quoted + (.errorsTotal | tostring) \ - + $csv_delim_quoted + (.createUserTimeAvg | tostring) \ - + $csv_delim_quoted + (.createUserTimeMax | tostring) \ - + $csv_delim_quoted + (.createApplicationsTimeAvg | tostring) \ - + $csv_delim_quoted + (.createApplicationsTimeMax | tostring) \ - + $csv_delim_quoted + (.createCDQsTimeAvg | tostring) \ - + $csv_delim_quoted + (.createCDQsTimeMax | tostring) \ - + $csv_delim_quoted + (.createComponentsTimeAvg | tostring) \ - + $csv_delim_quoted + (.createComponentsTimeMax | tostring) \ - + $csv_delim_quoted + (.runPipelineSucceededTimeAvg | tostring) \ - + $csv_delim_quoted + (.runPipelineSucceededTimeMax | tostring) \ - + $csv_delim_quoted + (.integrationTestsRunPipelineSucceededTimeAvg | tostring) \ - + $csv_delim_quoted + (.integrationTestsRunPipelineSucceededTimeMax | tostring) \ - + $csv_delim_quoted + (.deploymentSucceededTimeAvg | tostring) \ - + $csv_delim_quoted + (.deploymentSucceededTimeMax | tostring) \ + + $csv_delim_quoted + (.parameters.options.Concurrency | tostring) \ + + $csv_delim_quoted + (.results.measurements.KPI.mean | tostring) \ + + $csv_delim_quoted + (.results.measurements.KPI.errors | tostring) \ + + $csv_delim_quoted + (.results.measurements.HandleUser.pass.duration.mean | tostring) \ + + $csv_delim_quoted + (.results.measurements.HandleUser.pass.duration.max | tostring) \ + + $csv_delim_quoted + (.results.measurements.createApplication.pass.duration.mean | tostring) \ + + $csv_delim_quoted + (.results.measurements.createApplication.pass.duration.max | tostring) \ + + $csv_delim_quoted + (.results.measurements.validateApplication.pass.duration.mean | tostring) \ + + $csv_delim_quoted + (.results.measurements.validateApplication.pass.duration.max | tostring) \ + + $csv_delim_quoted + (.results.measurements.createComponent.pass.duration.mean | tostring) \ + + $csv_delim_quoted + (.results.measurements.createComponent.pass.duration.max | tostring) \ + + $csv_delim_quoted + (.results.measurements.validatePipelineRunCondition.pass.duration.mean | tostring) \ + + $csv_delim_quoted + (.results.measurements.validatePipelineRunCondition.pass.duration.max | tostring) \ + + $csv_delim_quoted + (.results.measurements.validatePipelineRunCreation.pass.duration.mean | tostring) \ + + $csv_delim_quoted + (.results.measurements.validatePipelineRunCreation.pass.duration.max | tostring) \ + + $csv_delim_quoted + (.results.measurements.validatePipelineRunSignature.pass.duration.mean | tostring) \ + + $csv_delim_quoted + (.results.measurements.validatePipelineRunSignature.pass.duration.max | tostring) \ + + $csv_delim_quoted + (.results.measurements.createIntegrationTestScenario.pass.duration.mean | tostring) \ + + $csv_delim_quoted + (.results.measurements.createIntegrationTestScenario.pass.duration.max | tostring) \ + + $csv_delim_quoted + (.results.measurements.validateIntegrationTestScenario.pass.duration.mean | tostring) \ + + $csv_delim_quoted + (.results.measurements.validateIntegrationTestScenario.pass.duration.max | tostring) \ + + $csv_delim_quoted + (.results.measurements.validateTestPipelineRunCondition.pass.duration.mean | tostring) \ + + $csv_delim_quoted + (.results.measurements.validateTestPipelineRunCondition.pass.duration.max | tostring) \ + + $csv_delim_quoted + (.results.measurements.validateTestPipelineRunCreation.pass.duration.mean | tostring) \ + + $csv_delim_quoted + (.results.measurements.validateTestPipelineRunCreation.pass.duration.max | tostring) \ + $csv_delim_quoted + (.measurements.cluster_cpu_usage_seconds_total_rate.mean | tostring) \ + $csv_delim_quoted + (.measurements.cluster_disk_throughput_total.mean | tostring) \ + $csv_delim_quoted + (.measurements.cluster_memory_usage_rss_total.mean | tostring) \ @@ -216,7 +231,7 @@ ${csv_delim}NodeDiskIoTimeSecondsTotalAvg" \ + $csv_delim_quoted + (.measurements.cluster_network_receive_bytes_total.mean | tostring) \ + $csv_delim_quoted + (.measurements.cluster_network_transmit_bytes_total.mean | tostring) \ + $csv_delim_quoted + (.measurements.node_disk_io_time_seconds_total.mean | tostring)" \ - "$i" >>"$max_concurrency_csv" + "$iteration_dir/load-test.json" >>"$max_concurrency_csv" done else echo "[$(date --utc -Ins)] WARNING: No file matching '$output_dir/load-test.max-concurrency.*.json' found!" @@ -224,7 +239,7 @@ ${csv_delim}NodeDiskIoTimeSecondsTotalAvg" \ } get_parked_go_routines() { - goroutines_pprof=$(find "$output_dir" -name "tekton-results-watcher.tekton-results-watcher-*.goroutine-dump-0.$1.pprof") + goroutines_pprof=$(find "$1" -name "tekton-results-watcher.tekton-results-watcher-*.goroutine-dump-0.pprof") count=0 for i in $goroutines_pprof; do if [ $count -gt 0 ]; then diff --git a/tests/load-tests/run-max-concurrency.sh b/tests/load-tests/run-max-concurrency.sh index 61c26e545..53e4f9754 100755 --- a/tests/load-tests/run-max-concurrency.sh +++ b/tests/load-tests/run-max-concurrency.sh @@ -11,24 +11,22 @@ OPENSHIFT_USERNAME="${OPENSHIFT_USERNAME:-kubeadmin}" OPENSHIFT_PASSWORD="${OPENSHIFT_PASSWORD:-$(cat "$KUBEADMIN_PASSWORD_FILE")}" load_test() { - local workdir threads iteration index iteration_index + local workdir threads index workdir=${1:-/tmp} threads=${2:-1} - iteration=$(printf "%04d" "${3:-1}") index=$(printf "%04d" "$threads") - iteration_index="${iteration}-${index}" ## Enable CPU profiling in Tekton if [ "${TEKTON_PERF_ENABLE_CPU_PROFILING:-}" == "true" ]; then echo "Starting CPU profiling with pprof" for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do pod="${p##*/}" - file="tekton-pipelines-controller.$pod.cpu-profile.$iteration_index" + file="tekton-pipelines-controller.$pod.cpu-profile" oc exec -n openshift-pipelines "$p" -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$workdir/$file.pprof" & echo $! >"$workdir/$file.pid" done for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do pod="${p##*/}" - file=tekton-results-watcher.$pod.cpu-profile.$iteration_index + file="tekton-results-watcher.$pod.cpu-profile" oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$workdir/$file.pprof" & echo $! >"$workdir/$file.pid" done @@ -38,14 +36,14 @@ load_test() { echo "Starting memory profiling of Tekton controller with pprof" for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do pod="${p##*/}" - file="tekton-pipelines-controller.$pod.memory-profile.$iteration_index" + file="tekton-pipelines-controller.$pod.memory-profile" oc exec -n openshift-pipelines "$p" -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/heap?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$workdir/$file.pprof" & echo $! >"$workdir/$file.pid" done echo "Starting memory profiling of Tekton results watcher with pprof" for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do pod="${p##*/}" - file=tekton-results-watcher.$pod.memory-profile.$iteration_index + file="tekton-results-watcher.$pod.memory-profile" oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/heap?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$workdir/$file.pprof" & echo $! >"$workdir/$file.pid" done @@ -92,7 +90,7 @@ load_test() { STATUS_DATA_FILE="$workdir/load-test.json" status_data.py \ --status-data-file "${STATUS_DATA_FILE}" \ - --set "name=Konflux loadtest" "started=$( cat started )" "ended=$( cat ended )" \ + --set "name=Konflux loadtest" "started=$(cat started)" "ended=$(cat ended)" \ --set-subtree-json "parameters.options=$workdir/load-test-options.json" "results.measurements=$workdir/load-test-timings.json" deactivate @@ -107,7 +105,7 @@ load_test() { for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do pod="${p##*/}" for i in 0 1 2; do - file="tekton-pipelines-controller.$pod.goroutine-dump-$i.$iteration_index" + file="tekton-pipelines-controller.$pod.goroutine-dump-$i" oc exec -n tekton-results "$p" -- bash -c "curl -SsL localhost:8008/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$workdir/$file.pprof" done done @@ -115,7 +113,7 @@ load_test() { for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do pod="${p##*/}" for i in 0 1 2; do - file="tekton-results-watcher.$pod.goroutine-dump-$i.$iteration_index" + file="tekton-results-watcher.$pod.goroutine-dump-$i" oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL localhost:8008/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$workdir/$file.pprof" done done @@ -172,22 +170,22 @@ max_concurrency() { else output="$OUTPUT_DIR/load-test.max-concurrency.json" IFS="," read -r -a maxConcurrencySteps <<<"$(echo "${MAX_CONCURRENCY_STEPS:-1\ 5\ 10\ 25\ 50\ 100\ 150\ 200}" | sed 's/ /,/g')" - maxThreads=${MAX_THREADS:-10} # Do not go above this concurrency. - threshold_sec=${THRESHOLD:-300} # In seconds. If KPI crosses this duration, stop. - threshold_err=${THRESHOLD_ERR:-10} # Failure ratio. When crossed, stop. + maxThreads=${MAX_THREADS:-10} # Do not go above this concurrency. + threshold_sec=${THRESHOLD:-300} # In seconds. If KPI crosses this duration, stop. + threshold_err=${THRESHOLD_ERR:-10} # Failure ratio. When crossed, stop. echo '{"started":"'"$(date +%FT%T%:z)"'", "maxThreads": '"$maxThreads"', "maxConcurrencySteps": "'"${maxConcurrencySteps[*]}"'", "threshold": '"$threshold_sec"', "thresholdErrors": '"$threshold_err"', "maxConcurrencyReached": 0, "computedConcurrency": 0, "workloadKPI": 0, "ended": "", "errorsTotal": -1}' | jq >"$output" iteration=0 { - python3 -m venv venv - set +u - source venv/bin/activate - set -u - python3 -m pip install -U pip - python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core" - python3 -m pip install tabulate - python3 -m pip install matplotlib - deactivate + python3 -m venv venv + set +u + source venv/bin/activate + set -u + python3 -m pip install -U pip + python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core" + python3 -m pip install tabulate + python3 -m pip install matplotlib + deactivate } &>"$OUTPUT_DIR/monitoring-setup.log" for t in "${maxConcurrencySteps[@]}"; do @@ -201,7 +199,7 @@ max_concurrency() { iteration_index="$(printf "%04d" "$iteration")-$(printf "%04d" "$t")" workdir="${OUTPUT_DIR}/iteration-${iteration_index}" mkdir "${workdir}" - load_test "$workdir" "$t" "$iteration" + load_test "$workdir" "$t" jq ".metadata.\"max-concurrency\".iteration = \"$(printf "%04d" "$iteration")\"" "$workdir/load-test.json" >"$OUTPUT_DIR/$$.json" && mv -f "$OUTPUT_DIR/$$.json" "$workdir/load-test.json" workloadKPI=$(jq '.results.measurements.KPI.mean' "$workdir/load-test.json") workloadKPIerrors=$(jq '.results.measurements.KPI.errors' "$workdir/load-test.json")