use AWS s3 for tekton results in dev mode

Signed-off-by: Pavel Macík <[email protected]>
konflux-ci · Jun 6, 2024 · a53c19a · a53c19a
1 parent bb98d98
commit a53c19a
Show file tree

Hide file tree

Showing 11 changed files with 221 additions and 76 deletions.
diff --git a/cmd/loadTests.go b/cmd/loadTests.go
@@ -219,7 +219,7 @@ func (u *UserAppsCompsMap) GetIntegrationTestScenarios(userName, appName string)
 
 var (
 	componentRepoUrl              string = "https://github.com/devfile-samples/devfile-sample-code-with-quarkus"
-	componentDockerFilePath       string = "src/main/docker/Dockerfile.jvm.staged"
+	componentDockerfilePath       string = "src/main/docker/Dockerfile.jvm.staged"
 	componentsCount               int    = 1
 	usernamePrefix                string = "testuser"
 	numberOfUsers                 int
@@ -248,7 +248,6 @@ var (
 	UserCreationTimeMaxPerThread         []time.Duration
 	ApplicationCreationTimeMaxPerThread  []time.Duration
 	ItsCreationTimeMaxPerThread          []time.Duration
-	CDQCreationTimeMaxPerThread          []time.Duration
 	ComponentCreationTimeMaxPerThread    []time.Duration
 	PipelineRunSucceededTimeMaxPerThread []time.Duration
 
@@ -322,6 +321,7 @@ type LogData struct {
 	MachineName                       string  `json:"machineName"`
 	BinaryDetails                     string  `json:"binaryDetails"`
 	ComponentRepoUrl                  string  `json:"componentRepoUrl"`
+	ComponentDockerfilePath           string  `json:"componentDockerfilePath"`
 	ComponentsCount                   int     `json:"componentsCount"`
 	NumberOfThreads                   int     `json:"threads"`
 	NumberOfUsersPerThread            int     `json:"usersPerThread"`
@@ -451,6 +451,7 @@ func ExecuteLoadTest() {
 
 func init() {
 	rootCmd.Flags().StringVar(&componentRepoUrl, "component-repo", componentRepoUrl, "the component repo URL to be used")
+	rootCmd.Flags().StringVar(&componentDockerfilePath, "component-dockerfile-path", componentDockerfilePath, "the path to Dockerfile within component repo")
 	rootCmd.Flags().IntVar(&componentsCount, "components-count", componentsCount, "number of components to create per application")
 	rootCmd.Flags().StringVar(&usernamePrefix, "username", usernamePrefix, "the prefix used for usersignup names")
 	rootCmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "if 'debug' traces should be displayed in the console")
@@ -590,6 +591,7 @@ func setup(cmd *cobra.Command, args []string) {
 		MachineName:                 machineName,
 		BinaryDetails:               binaryDetails,
 		ComponentRepoUrl:            componentRepoUrl,
+		ComponentDockerfilePath:     componentDockerfilePath,
 		ComponentsCount:             componentsCount,
 		NumberOfThreads:             threadCount,
 		NumberOfUsersPerThread:      numberOfUsers,
@@ -660,7 +662,6 @@ func setup(cmd *cobra.Command, args []string) {
 	UserCreationTimeMaxPerThread = make([]time.Duration, threadCount)
 	ApplicationCreationTimeMaxPerThread = make([]time.Duration, threadCount)
 	ItsCreationTimeMaxPerThread = make([]time.Duration, threadCount)
-	CDQCreationTimeMaxPerThread = make([]time.Duration, threadCount)
 	ComponentCreationTimeMaxPerThread = make([]time.Duration, threadCount)
 	PipelineRunSucceededTimeMaxPerThread = make([]time.Duration, threadCount)
 
@@ -804,24 +805,6 @@ func setup(cmd *cobra.Command, args []string) {
 	itsCreationFailureRate := float64(itsCreationFailureCount) / float64(overallCount)
 	logData.ItsCreationFailureRate = itsCreationFailureRate
 
-	// Compiling data about CDQs
-	cdqCreationSuccessCount := sumFromArray(SuccessfulCDQCreationsPerThread)
-	logData.CDQCreationSuccessCount = cdqCreationSuccessCount
-
-	cdqCreationFailureCount := sumFromArray(FailedCDQCreationsPerThread)
-	logData.CDQCreationFailureCount = cdqCreationFailureCount
-
-	averageTimeToCreateCDQs := float64(0)
-	if cdqCreationSuccessCount > 0 {
-		averageTimeToCreateCDQs = sumDurationFromArray(CDQCreationTimeSumPerThread).Seconds() / float64(cdqCreationSuccessCount)
-	}
-	logData.AverageTimeToCreateCDQs = averageTimeToCreateCDQs
-
-	logData.MaxTimeToCreateCDQs = maxDurationFromArray(CDQCreationTimeMaxPerThread).Seconds()
-
-	cdqCreationFailureRate := float64(cdqCreationFailureCount) / float64(overallCount)
-	logData.CDQCreationFailureRate = cdqCreationFailureRate
-
 	// Compiling data about Components
 	componentCreationSuccessCount := sumFromArray(SuccessfulComponentCreationsPerThread)
 	logData.ComponentCreationSuccessCount = componentCreationSuccessCount
@@ -831,7 +814,7 @@ func setup(cmd *cobra.Command, args []string) {
 
 	averageTimeToCreateComponents := float64(0)
 	if componentCreationSuccessCount > 0 {
-		averageTimeToCreateComponents = sumDurationFromArray(ComponentCreationTimeSumPerThread).Seconds() / float64(cdqCreationSuccessCount)
+		averageTimeToCreateComponents = sumDurationFromArray(ComponentCreationTimeSumPerThread).Seconds() / float64(componentCreationSuccessCount)
 	}
 	logData.AverageTimeToCreateComponents = averageTimeToCreateComponents
 
@@ -926,7 +909,6 @@ func setup(cmd *cobra.Command, args []string) {
 	klog.Infof("Avg/max time to spin up users: %.2f s/%.2f s", averageTimeToSpinUpUsers, logData.MaxTimeToSpinUpUsers)
 	klog.Infof("Avg/max time to create application: %.2f s/%.2f s", averageTimeToCreateApplications, logData.MaxTimeToCreateApplications)
 	klog.Infof("Avg/max time to create integration test: %.2f s/%.2f s", averageTimeToCreateIts, logData.MaxTimeToCreateIts)
-	klog.Infof("Avg/max time to create cdq: %.2f s/%.2f s", averageTimeToCreateCDQs, logData.MaxTimeToCreateCDQs)
 	klog.Infof("Avg/max time to create component: %.2f s/%.2f s", averageTimeToCreateComponents, logData.MaxTimeToCreateComponents)
 	klog.Infof("Avg/max time to complete pipelinesrun: %.2f s/%.2f s", averageTimeToRunPipelineSucceeded, logData.MaxTimeToRunPipelineSucceeded)
 	klog.Infof("Avg/max time to complete integration test: %.2f s/%.2f s", IntegrationTestsAverageTimeToRunPipelineSucceeded, logData.IntegrationTestsMaxTimeToRunPipelineSucceeded)
@@ -943,7 +925,6 @@ func setup(cmd *cobra.Command, args []string) {
 	klog.Infof("Number of times application creation worked/failed: %d/%d (%.2f %%)", applicationCreationSuccessCount, applicationCreationFailureCount, applicationCreationFailureRate*100)
 	klog.Infof("Number of times integration tests creation worked/failed: %d/%d (%.2f %%)", itsCreationSuccessCount, itsCreationFailureCount, itsCreationFailureRate*100)
 
-	klog.Infof("Number of times cdq creation worked/failed: %d/%d (%.2f %%)", cdqCreationSuccessCount, cdqCreationFailureCount, cdqCreationFailureRate*100)
 	klog.Infof("Number of times component creation worked/failed: %d/%d (%.2f %%)", componentCreationSuccessCount, componentCreationFailureCount, componentCreationFailureRate*100)
 	klog.Infof("Number of times pipeline run worked/failed: %d/%d (%.2f %%)", pipelineRunSuccessCount, pipelineRunFailureCount, pipelineRunFailureRate*100)
 	klog.Infof("Number of times integration tests' pipeline run worked/failed: %d/%d (%.2f %%)", integrationTestsPipelineRunSuccessCount, integrationTestsPipelineRunFailureCount, IntegrationTestsPipelineRunFailureRate*100)
@@ -1478,7 +1459,7 @@ func (h *ConcreteHandlerResources) handleComponentCreation(ctx *JourneyContext,
 					GitSource: &appstudioApi.GitSource{
 						URL:           componentRepoUrl,
 						Revision:      "",
-						DockerfileURL: componentDockerFilePath,
+						DockerfileURL: componentDockerfilePath,
 					},
 				},
 			},

diff --git a/tests/load-tests/ci-scripts/collect-results.sh b/tests/load-tests/ci-scripts/collect-results.sh
@@ -15,7 +15,7 @@ source "./tests/load-tests/ci-scripts/user-prefix.sh"
 
 echo "Collecting load test results"
 load_test_log=$ARTIFACT_DIR/load-tests.log
-find "$output_dir" -type f -name '*.log' -exec cp -vf {} "${ARTIFACT_DIR}" \;
+find "$output_dir" -type f -name '*.logs?' -exec cp -vf {} "${ARTIFACT_DIR}" \;
 find "$output_dir" -type f -name 'load-tests.json' -exec cp -vf {} "${ARTIFACT_DIR}" \;
 find "$output_dir" -type f -name 'gh-rate-limits-remaining.csv' -exec cp -vf {} "${ARTIFACT_DIR}" \;
 find "$output_dir" -type f -name '*.pprof' -exec cp -vf {} "${ARTIFACT_DIR}" \;

diff --git a/tests/load-tests/ci-scripts/load-test.sh b/tests/load-tests/ci-scripts/load-test.sh
@@ -11,7 +11,7 @@ pushd "${2:-./tests/load-tests}"
 
 source "./ci-scripts/user-prefix.sh"
 
-export QUAY_E2E_ORGANIZATION MY_GITHUB_ORG GITHUB_TOKEN TEKTON_PERF_ENABLE_PROFILING TEKTON_PERF_ENABLE_CPU_PROFILING TEKTON_PERF_ENABLE_MEMORY_PROFILING TEKTON_PERF_PROFILE_CPU_PERIOD KUBE_SCHEDULER_LOG_LEVEL
+export QUAY_E2E_ORGANIZATION MY_GITHUB_ORG GITHUB_USER GITHUB_TOKEN TEKTON_PERF_ENABLE_PROFILING TEKTON_PERF_ENABLE_CPU_PROFILING TEKTON_PERF_ENABLE_MEMORY_PROFILING TEKTON_PERF_PROFILE_CPU_PERIOD KUBE_SCHEDULER_LOG_LEVEL
 QUAY_E2E_ORGANIZATION=$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/quay-org)
 MY_GITHUB_ORG=$(cat /usr/local/ci-secrets/redhat-appstudio-load-test/github-org)
 
@@ -26,6 +26,8 @@ for kv in "${kvs[@]}"; do
 done
 echo >>"$rate_limits_csv"
 
+echo -e "[INFO] Start tests with user: ${GITHUB_USER}"
+
 while true; do
     timestamp=$(printf "%s" "$(date -u +'%FT%T')")
     echo -n "$timestamp" >>"$rate_limits_csv"

diff --git a/tests/load-tests/ci-scripts/max-concurrency/cluster_read_config.yaml b/tests/load-tests/ci-scripts/max-concurrency/cluster_read_config.yaml
@@ -45,6 +45,8 @@
 
 {{ monitor_pod('tekton-results', 'tekton-results-watcher', 20, '-.*') }}
 {{ monitor_pod_container('tekton-results', 'tekton-results-watcher', 'watcher', 20, '-.*') }}
+{{ monitor_pod('tekton-results', 'tekton-results-api', 20, '-.*') }}
+{{ monitor_pod_container('tekton-results', 'tekton-results-api', 'api', 20, '-.*') }}
 {{ pv_stats('tekton-results', 'data-postgres-postgresql-0', 20) }}
 
 - name: measurements.tekton-results-watcher.watcher_workqueue_depth
@@ -54,3 +56,15 @@
 - name: measurements.tekton-results-watcher.watcher_reconcile_latency_bucket
   monitoring_query: histogram_quantile(0.99, sum(rate(watcher_reconcile_latency_bucket{job="tekton-results-watcher"}[30m])) by (le) ) / 1000
   monitoring_step: 20
+
+- name: measurements.cluster_cpu_usage_seconds_total_rate
+  monitoring_query: sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=""})
+  monitoring_step: 20
+
+- name: measurements.cluster_memory_usage_rss_total
+  monitoring_query: sum(container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", cluster="", container!=""})
+  monitoring_step: 20
+
+- name: measurements.cluster_disk_throughput_total
+  monitoring_query: sum (rate(container_fs_reads_bytes_total{id!="", device=~"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+", cluster=""}[5m]) + rate(container_fs_writes_bytes_total{id!="", device=~"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+", cluster=""}[5m]))
+  monitoring_step: 20
diff --git a/tests/load-tests/ci-scripts/max-concurrency/collect-results.sh b/tests/load-tests/ci-scripts/max-concurrency/collect-results.sh
@@ -23,6 +23,7 @@ collect_artifacts() {
     mkdir -p "$artifact_logs"
     find "$output_dir" -type f -name 'load-tests.max-concurrency.*.log' -exec cp -vf {} "$artifact_logs" \;
     find "$output_dir" -type f -name 'load-tests.max-concurrency.json' -exec cp -vf {} "$artifact_logs" \;
+    find "$output_dir" -type f -name 'tekton-results-*.logs' -exec cp -vf {} "$artifact_logs" \;
     mkdir -p "$artifact_pprof"
     find "$output_dir" -type f -name '*.pprof' -exec cp -vf {} "$artifact_pprof" \;
 }
@@ -59,10 +60,25 @@ collect_monitoring_data() {
         -d &>"$monitoring_collection_log"
     cp -f "$monitoring_collection_data" "$ARTIFACT_DIR"
 
+    mkdir -p "$artifact_logs/ggm"
+    for file in $(find "$monitoring_collection_dir/" -maxdepth 1 -name "*.csv"); do
+        echo "Converting $file"
+        out="$artifact_logs/ggm/$(basename "$file")"
+        rm -rf "$out"
+        while read line; do
+            timestamp=$(echo "$line" | cut -d "," -f1)
+            value=$(echo "$line" | cut -d "," -f2)
+            echo "$(date -d "@$timestamp" "+%Y-%m-%dT%H:%M:%S.%N" --utc);$value" >>"$out"
+        done <<<"$(tail -n +2 "$file")" &
+    done
+    wait
+
     ## Monitoring data per iteration
     for monitoring_collection_data in $(find "$output_dir" -type f -name 'load-tests.max-concurrency.*.json'); do
         iteration_index=$(echo "$monitoring_collection_data" | sed -e 's,.*/load-tests.max-concurrency.\([0-9]\+-[0-9]\+\).json,\1,')
         monitoring_collection_log="$ARTIFACT_DIR/monitoring-collection.$iteration_index.log"
+        monitoring_collection_dir="$ARTIFACT_DIR/monitoring-collection-raw-data-dir/$iteration_index"
+        mkdir -p "$monitoring_collection_dir"
         echo "Collecting monitoring data for step $iteration_index..."
         mstart=$(date --utc --date "$(status_data.py --status-data-file "$monitoring_collection_data" --get timestamp)" --iso-8601=seconds)
         mend=$(date --utc --date "$(status_data.py --status-data-file "$monitoring_collection_data" --get endTimestamp)" --iso-8601=seconds)
@@ -72,6 +88,7 @@ collect_monitoring_data() {
             --additional ./tests/load-tests/cluster_read_config.yaml \
             --monitoring-start "$mstart" \
             --monitoring-end "$mend" \
+            --monitoring-raw-data-dir "$monitoring_collection_dir" \
             --prometheus-host "https://$mhost" \
             --prometheus-port 443 \
             --prometheus-token "$(oc whoami -t)" \
@@ -272,12 +289,78 @@ collect_timestamp_csvs() {
     oc get pipelineruns.tekton.dev -A -o json | jq "$jq_cmd" | sed -e "s/\n//g" -e "s/^\"//g" -e "s/\"$//g" -e "s/Z;/;/g" | sort -t ";" -k 13 -r -n >>"$pipelinerun_timestamps"
 }
 
+jq_iso_8601_to_seconds="( \
+    (if \$d | contains(\"m\") and (endswith(\"ms\") | not) then (\$d | capture(\"(?<minutes>\\\\d+)m(?<seconds>\\\\d+\\\\.?(\\\\d+)?)s\") | (.minutes | tonumber * 60) + (.seconds | tonumber)) else 0 end) + \
+    (if \$d | (contains(\"m\") | not) and contains(\"s\") and (endswith(\"ms\") | not) and (endswith(\"µs\") | not) then (\$d | capture(\"(?<seconds>\\\\d+\\\\.\\\\d+)s\") | (.seconds | tonumber)) else 0 end) + \
+    (if \$d | endswith(\"ms\") then (\$d | split(\"ms\") | .[0] | tonumber / 1000) else 0 end) + \
+    (if \$d | endswith(\"µs\") then (\$d | split(\"µs\") | .[0] | tonumber / 1000000) else 0 end) \
+) | tostring"
+
 collect_tekton_results_logs() {
     echo "Collecting Tekton results logs..."
     mkdir -p "$artifact_logs"
+    ggm=$artifact_logs/ggm
+    mkdir -p "$ggm"
     oc logs -c api -n tekton-results -l "app.kubernetes.io/name=tekton-results-api" --prefix --tail=-1 >"$artifact_logs/tekton-results-api.log"
     oc logs -c watcher -n tekton-results -l "app.kubernetes.io/name=tekton-results-watcher" --prefix --tail=-1 >"$artifact_logs/tekton-results-watcher.log"
     oc logs -c minio -n tekton-results "pod/storage-pool-0-0" --prefix --tail=-1 >"$artifact_logs/tekton-result-storage.log"
+    ts_format='"%Y-%m-%dT%H:%M:%S"'
+
+    jq_cmd="(.ts | strftime($ts_format)) + (.ts | tostring | capture(\".*(?<milliseconds>\\\\.\\\\d+)\") | .milliseconds) \
+        + $csv_delim_quoted + ( \
+            .msg | capture(\"(?<id>GGM(\\\\d+)?) (?<type>.+) kind (?<kind>\\\\S*) ns (?<ns>\\\\S*) name (?<name>\\\\S*).* times? spent (?<duration>.*)\") \
+                | .id \
+                + $csv_delim_quoted + (.type) \
+                + $csv_delim_quoted + (.kind) \
+                + $csv_delim_quoted + (.ns) \
+                + $csv_delim_quoted + (.name) \
+                + $csv_delim_quoted + (.duration) \
+                + $csv_delim_quoted + (.duration as \$d | $jq_iso_8601_to_seconds ) \
+            )"
+    component=tekton-results-api
+    metrics=("UpdateLog after handleReturn" "UpateLog after flush" "GRPC receive" "RBAC check" "get record" "create stream" "read stream")
+    for f in $(find $artifact_logs -type f -name "$component*.logs"); do
+        echo "Processing $f..."
+        grep "\"GGM" "$f" | sed -e 's,.*\({.*}\).*,\1,g' >$f.ggm.json
+        jq -rc "$jq_cmd" $f.ggm.json >"$f.csv" || true
+        for metric in "${metrics[@]}"; do
+            m="$(echo "$metric" | sed -e 's,[ /],_,g')"
+            grep "$metric"';' "$f.csv" >"$f.$m.csv"
+        done &
+    done
+    wait
+    for metric in "${metrics[@]}"; do
+        m="$(echo "$metric" | sed -e 's,[ /],_,g')"
+        find "$artifact_logs" -name "$component.*.logs.$m.csv" | xargs cat | sort -u >"$ggm/$component.$m.csv"
+    done
+
+    component=tekton-results-watcher
+    metrics=("streamLogs" "dynamic Reconcile" "tkn read" "tkn write" "log copy and write" "flush" "close/rcv")
+    jq_cmd="if .ts | tostring | contains(\"-\") then .ts | capture(\"(?<t>.*)Z\") | .t else (.ts | strftime($ts_format)) + (.ts | tostring | capture(\".*(?<milliseconds>\\\\.\\\\d+)\") | .milliseconds) end \
+        + ( \
+            .msg | capture(\"(?<id>GGM(\\\\d+)?) (?<type>.+)(?<! obj)( obj)? kind (?<kind>\\\\S*) obj ns (?<ns>\\\\S*) obj name (?<name>\\\\S*) times? spent (?<duration>.*)\") \
+                | $csv_delim_quoted + (.id) \
+                + $csv_delim_quoted + (.type) \
+                + $csv_delim_quoted + (.kind) \
+                + $csv_delim_quoted + (.ns) \
+                + $csv_delim_quoted + (.name) \
+                + $csv_delim_quoted + (.duration) \
+                + $csv_delim_quoted + (.duration as \$d | $jq_iso_8601_to_seconds ) \
+            )"
+    for f in $(find $artifact_logs -type f -name "$component*.logs"); do
+        echo "Processing $f..."
+        grep "\"GGM" "$f" | sed -e 's,.*\({.*}\).*,\1,g' >$f.ggm.json
+        jq -rc "$jq_cmd" $f.ggm.json >"$f.csv" || true
+        for metric in "${metrics[@]}"; do
+            m="$(echo "$metric" | sed -e 's,[ /],_,g')"
+            grep "$metric"';' "$f.csv" >"$f.$m.csv"
+        done &
+    done
+    wait
+    for metric in "${metrics[@]}"; do
+        m="$(echo "$metric" | sed -e 's,[ /],_,g')"
+        find "$artifact_logs" -name "$component.*.logs.$m.csv" | xargs cat | sort -u >"$ggm/$component.$m.csv"
+    done
 }
 
 echo "Collecting max concurrency results..."

diff --git a/tests/load-tests/ci-scripts/max-concurrency/test.jq b/tests/load-tests/ci-scripts/max-concurrency/test.jq
@@ -0,0 +1 @@
+(.ts | strftime("%Y-%m-%dT%H:%M:%SZ")) + (.ts | tostring | capture(".*(?<milliseconds>\\.\\d+)") | .milliseconds) + (.ts | strftime("%z"))         + (             .msg | capture("GGM (?<type>.+) name (?<name>\\S+) .+ time spent (?<duration>.*)")                 | ";" + (.type)                 + ";" + (.name)                 + ";" + (.duration)                 + ";" + (.duration as $d |                     (                         (if $d | contains("m") and (endswith("ms") | not) then ($d | capture("(?<minutes>\\d+)m(?<seconds>\\d+\\.?(\\d+)?)s") | (.minutes | tonumber * 60) + (.seconds | tonumber)) else 0 end) +                         (if $d | (contains("m") | not) and contains("s") and (endswith("ms") | not) then ($d | capture("(?<seconds>\\d+\\.\\d+)s") | (.seconds | tonumber)) else 0 end) +                         (if $d | endswith("ms") then ($d | split("ms") | .[0] | tonumber / 1000) else 0 end) +                     ) | tostring                )             )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		(.ts \| strftime("%Y-%m-%dT%H:%M:%SZ")) + (.ts \| tostring \| capture(".(?<milliseconds>\\.\\d+)") \| .milliseconds) + (.ts \| strftime("%z")) + ( .msg \| capture("GGM (?<type>.+) name (?<name>\\S+) .+ time spent (?<duration>.)") \| ";" + (.type) + ";" + (.name) + ";" + (.duration) + ";" + (.duration as $d \| ( (if $d \| contains("m") and (endswith("ms") \| not) then ($d \| capture("(?<minutes>\\d+)m(?<seconds>\\d+\\.?(\\d+)?)s") \| (.minutes \| tonumber * 60) + (.seconds \| tonumber)) else 0 end) + (if $d \| (contains("m") \| not) and contains("s") and (endswith("ms") \| not) then ($d \| capture("(?<seconds>\\d+\\.\\d+)s") \| (.seconds \| tonumber)) else 0 end) + (if $d \| endswith("ms") then ($d \| split("ms") \| .[0] \| tonumber / 1000) else 0 end) + ) \| tostring ) )