From 70c0e9a87d3cb2ab8ad48900071de25f902e412b Mon Sep 17 00:00:00 2001 From: venkataanil Date: Mon, 23 Oct 2023 15:10:31 +0530 Subject: [PATCH 1/3] node-density-cni on 500 nodes Below changes are required to run node-density-cni on 500+ nodes 1) requestTimeout is set to 60 sec to avoid Client.Timeout errors while checking for created objects 2) Better to use metrics-aggregated.yml instead of metrics.yml which will have reduced/aggregated metrics. kube-burner looks for metrics.yml file in current directory (or generates if not found) and uses that for metrics. This patch adds metrics.yml in the e2e workload directory which will have only required metrics. 3) At large scale (500 node+), prometheous is failing to scrape containerCPU-AggregatedWorkers, containerMemory-AggregatedWorkers, nodeCPU-AggregatedWorkers, nodeMemoryUtilization-AggregatedWorkers, podStatusCount and podDistribution. So removed them from the metrics.yml 4) we manually need to reduce node count after test is finished and run 'kube-burner index' to scrape containerCPU-AggregatedWorkers and containerMemory-AggregatedWorkers metrics if needed. --- workloads/kube-burner-ocp-wrapper/alerts.yml | 61 +++++ .../curl-deployment.yml | 53 ++++ .../metrics-report.yml | 230 ++++++++++++++++++ workloads/kube-burner-ocp-wrapper/metrics.yml | 100 ++++++++ .../node-density-cni.yml | 39 +++ .../webserver-deployment.yml | 41 ++++ .../webserver-service.yml | 12 + 7 files changed, 536 insertions(+) create mode 100644 workloads/kube-burner-ocp-wrapper/alerts.yml create mode 100644 workloads/kube-burner-ocp-wrapper/curl-deployment.yml create mode 100644 workloads/kube-burner-ocp-wrapper/metrics-report.yml create mode 100644 workloads/kube-burner-ocp-wrapper/metrics.yml create mode 100644 workloads/kube-burner-ocp-wrapper/node-density-cni.yml create mode 100644 workloads/kube-burner-ocp-wrapper/webserver-deployment.yml create mode 100644 workloads/kube-burner-ocp-wrapper/webserver-service.yml diff --git a/workloads/kube-burner-ocp-wrapper/alerts.yml b/workloads/kube-burner-ocp-wrapper/alerts.yml new file mode 100644 index 00000000..12ff3f83 --- /dev/null +++ b/workloads/kube-burner-ocp-wrapper/alerts.yml @@ -0,0 +1,61 @@ +# etcd + +- expr: avg_over_time(histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[2m]))[10m:]) > 0.01 + description: 10 minutes avg. 99th etcd fsync latency on {{$labels.pod}} higher than 10ms. {{$value}}s + severity: warning + +- expr: avg_over_time(histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[2m]))[10m:]) > 0.03 + description: 10 minutes avg. 99th etcd commit latency on {{$labels.pod}} higher than 30ms. {{$value}}s + severity: warning + +- expr: rate(etcd_server_leader_changes_seen_total[2m]) > 0 + description: etcd leader changes observed + severity: warning + +# API server +- expr: avg_over_time(histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"POST|PUT|DELETE|PATCH", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb))[10m:]) > 1 + description: 10 minutes avg. 99th mutating API call latency for {{$labels.verb}}/{{$labels.resource}} higher than 1 second. {{$value}}s + severity: warning + +- expr: avg_over_time(histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"LIST|GET", subresource!~"log|exec|portforward|attach|proxy", scope="resource"}[2m])) by (le, resource, verb, scope))[5m:]) > 1 + description: 5 minutes avg. 99th read-only API call latency for {{$labels.verb}}/{{$labels.resource}} in scope {{$labels.scope}} higher than 1 second. {{$value}}s + severity: warning + +- expr: avg_over_time(histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"LIST|GET", subresource!~"log|exec|portforward|attach|proxy", scope="namespace"}[2m])) by (le, resource, verb, scope))[5m:]) > 5 + description: 5 minutes avg. 99th read-only API call latency for {{$labels.verb}}/{{$labels.resource}} in scope {{$labels.scope}} higher than 5 seconds. {{$value}}s + severity: warning + +- expr: avg_over_time(histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"LIST|GET", subresource!~"log|exec|portforward|attach|proxy", scope="cluster"}[2m])) by (le, resource, verb, scope))[5m:]) > 30 + description: 5 minutes avg. 99th read-only API call latency for {{$labels.verb}}/{{$labels.resource}} in scope {{$labels.scope}} higher than 30 seconds. {{$value}}s + severity: warning + +# Control plane pods +- expr: up{apiserver=~"kube-apiserver|openshift-apiserver"} == 0 + description: "{{$labels.apiserver}} {{$labels.instance}} down" + severity: warning + +- expr: up{namespace=~"openshift-etcd"} == 0 + description: "{{$labels.namespace}}/{{$labels.pod}} down" + severity: error + +- expr: up{namespace=~"openshift-.*(kube-controller-manager|scheduler|controller-manager|sdn|ovn-kubernetes|dns)"} == 0 + description: "{{$labels.namespace}}/{{$labels.pod}} down" + severity: warning + +- expr: up{job=~"crio|kubelet"} == 0 + description: "{{$labels.node}}/{{$labels.job}} down" + severity: warning + +- expr: up{job="ovnkube-node"} == 0 + description: "{{$labels.instance}}/{{$labels.pod}} {{$labels.job}} down" + severity: warning + +# Service sync latency +- expr: histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket[2m])) by (le)) > 10 + description: 99th Kubeproxy network programming latency higher than 10 seconds. {{$value}}s + severity: warning + +# Prometheus alerts +- expr: ALERTS{severity="critical", alertstate="firing"} > 0 + description: Critical prometheus alert. {{$labels.alertname}} + severity: warning diff --git a/workloads/kube-burner-ocp-wrapper/curl-deployment.yml b/workloads/kube-burner-ocp-wrapper/curl-deployment.yml new file mode 100644 index 00000000..de96b719 --- /dev/null +++ b/workloads/kube-burner-ocp-wrapper/curl-deployment.yml @@ -0,0 +1,53 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: curl-{{.Replica}}-{{.Iteration}} +spec: + template: + metadata: + labels: + name: curl-{{.Replica}}-{{.Iteration}} + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + - key: node-role.kubernetes.io/infra + operator: DoesNotExist + - key: node-role.kubernetes.io/workload + operator: DoesNotExist + containers: + - name: curlapp + image: quay.io/cloud-bulldozer/curl:latest + command: ["sleep", "inf"] + resources: + requests: + memory: "10Mi" + cpu: "10m" + env: + - name: WEBSERVER_HOSTNAME + value: webserver-{{.Replica}}-{{.Iteration}} + - name: WEBSERVER_PORT + value: "8080" + imagePullPolicy: IfNotPresent + securityContext: + privileged: false + startupProbe: + exec: + command: + - "/bin/sh" + - "-c" + - "curl --fail -sS ${WEBSERVER_HOSTNAME}:${WEBSERVER_PORT} -o /dev/null" + periodSeconds: 1 + timeoutSeconds: 1 + failureThreshold: 600 + restartPolicy: Always + replicas: 1 + selector: + matchLabels: + name: curl-{{.Replica}}-{{.Iteration}} + strategy: + type: RollingUpdate diff --git a/workloads/kube-burner-ocp-wrapper/metrics-report.yml b/workloads/kube-burner-ocp-wrapper/metrics-report.yml new file mode 100644 index 00000000..8570a594 --- /dev/null +++ b/workloads/kube-burner-ocp-wrapper/metrics-report.yml @@ -0,0 +1,230 @@ +--- +# Kubelet & CRI-O + +# Average of the CPU usage from all worker's kubelet +- query: avg(avg_over_time(irate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[2m])[{{.elapsed}}:]) and on (node) kube_node_role{role="worker"}) + metricName: cpu-kubelet + instant: true + +# Average of the memory usage from all worker's kubelet +- query: avg(avg_over_time(process_resident_memory_bytes{service="kubelet",job="kubelet"}[{{.elapsed}}:]) and on (node) kube_node_role{role="worker"}) + metricName: memory-kubelet + instant: true + +# Max of the memory usage from all worker's kubelet +- query: max(max_over_time(process_resident_memory_bytes{service="kubelet",job="kubelet"}[{{.elapsed}}:]) and on (node) kube_node_role{role="worker"}) + metricName: max-memory-kubelet + instant: true + +# Average of the CPU usage from all worker's CRI-O +- query: avg(avg_over_time(irate(process_cpu_seconds_total{service="kubelet",job="crio"}[2m])[{{.elapsed}}:]) and on (node) kube_node_role{role="worker"}) + metricName: cpu-crio + instant: true + +# Average of the memory usage from all worker's CRI-O +- query: avg(avg_over_time(process_resident_memory_bytes{service="kubelet",job="crio"}[{{.elapsed}}:]) and on (node) kube_node_role{role="worker"}) + metricName: memory-crio + instant: true + +# Max of the memory usage from all worker's CRI-O +- query: max(max_over_time(process_resident_memory_bytes{service="kubelet",job="crio"}[{{.elapsed}}:]) and on (node) kube_node_role{role="worker"}) + metricName: max-memory-crio + instant: true + +# Etcd + +- query: avg(avg_over_time(histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[2m]))[{{.elapsed}}:])) + metricName: 99thEtcdDiskBackendCommit + instant: true + +- query: avg(avg_over_time(histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[2m]))[{{.elapsed}}:])) + metricName: 99thEtcdDiskWalFsync + instant: true + +- query: avg(avg_over_time(histogram_quantile(0.99, irate(etcd_network_peer_round_trip_time_seconds_bucket[2m]))[{{.elapsed}}:])) + metricName: 99thEtcdRoundTripTime + instant: true + +# Control-plane + +- query: avg(avg_over_time(topk(1, sum(irate(container_cpu_usage_seconds_total{name!="", namespace="openshift-kube-controller-manager"}[2m])) by (pod))[{{.elapsed}}:])) + metricName: cpu-kube-controller-manager + instant: true + +- query: avg(avg_over_time(topk(1, sum(container_memory_rss{name!="", namespace="openshift-kube-controller-manager"}) by (pod))[{{.elapsed}}:])) + metricName: memory-kube-controller-manager + instant: true + +- query: max(max_over_time(topk(1, sum(container_memory_rss{name!="", namespace="openshift-kube-controller-manager"}) by (pod))[{{.elapsed}}:])) + metricName: maxmemory-kube-controller-manager + instant: true + +- query: avg(avg_over_time(topk(3, sum(irate(container_cpu_usage_seconds_total{name!="", namespace="openshift-kube-apiserver"}[2m])) by (pod))[{{.elapsed}}:])) + metricName: cpu-kube-apiserver + instant: true + +- query: avg(avg_over_time(topk(3, sum(container_memory_rss{name!="", namespace="openshift-kube-apiserver"}) by (pod))[{{.elapsed}}:])) + metricName: memory-kube-apiserver + instant: true + +- query: max(max_over_time(topk(3, sum(container_memory_rss{name!="", namespace="openshift-kube-apiserver"}) by (pod))[{{.elapsed}}:])) + metricName: max-memory-kube-apiserver + instant: true + +- query: avg(avg_over_time(topk(3, sum(irate(container_cpu_usage_seconds_total{name!="", namespace="openshift-apiserver"}[2m])) by (pod))[{{.elapsed}}:])) + metricName: cpu-openshift-apiserver + instant: true + +- query: avg(avg_over_time(topk(3, sum(container_memory_rss{name!="", namespace="openshift-apiserver"}) by (pod))[{{.elapsed}}:])) + metricName: memory-openshift-apiserver + instant: true + +- query: max(max_over_time(topk(3, sum(container_memory_rss{name!="", namespace="openshift-apiserver"}) by (pod))[{{.elapsed}}:])) + metricName: max-memory-openshift-apiserver + instant: true + +- query: avg(avg_over_time(topk(3, sum(irate(container_cpu_usage_seconds_total{name!="", namespace="openshift-etcd"}[2m])) by (pod))[{{.elapsed}}:])) + metricName: cpu-etcd + instant: true + +- query: avg(avg_over_time(topk(1, sum(irate(container_cpu_usage_seconds_total{name!="", namespace="openshift-controller-manager"}[2m])) by (pod))[{{.elapsed}}:])) + metricName: cpu-openshift-controller-manager + instant: true + +- query: avg(avg_over_time(topk(1, sum(container_memory_rss{name!="", namespace="openshift-controller-manager"}) by (pod))[{{.elapsed}}:])) + metricName: memory-openshift-controller-manager + instant: true + +- query: max(max_over_time(topk(1, sum(container_memory_rss{name!="", namespace="openshift-controller-manager"}) by (pod))[{{.elapsed}}:])) + metricName: max-memory-openshift-controller-manager + instant: true + +- query: avg(avg_over_time(topk(3,sum(container_memory_rss{name!="", namespace="openshift-etcd"}) by (pod))[{{.elapsed}}:])) + metricName: memory-etcd + instant: true + +- query: max(max_over_time(topk(3,sum(container_memory_rss{name!="", namespace="openshift-etcd"}) by (pod))[{{.elapsed}}:])) + metricName: max-memory-etcd + instant: true + + # multus + +- query: avg(avg_over_time(irate(container_cpu_usage_seconds_total{name!="", namespace="openshift-multus", pod=~"(multus).+", container!="POD"}[2m])[{{.elapsed}}:])) by (container) + metricName: cpu-multus + instant: true + +- query: avg(avg_over_time(container_memory_rss{name!="", namespace="openshift-multus", pod=~"(multus).+", container!="POD"}[{{.elapsed}}:])) by (container) + metricName: memory-multus + instant: true + +- query: max(avg_over_time(container_memory_rss{name!="", namespace="openshift-multus", pod=~"(multus).+", container!="POD"}[{{.elapsed}}:])) by (container) + metricName: max-memory-multus + instant: true + +# OVNKubernetes - standard & IC + +- query: avg(avg_over_time(irate(container_cpu_usage_seconds_total{name!="", namespace="openshift-ovn-kubernetes", pod=~"(ovnkube-master|ovnkube-control-plane).+", container!="POD"}[2m])[{{.elapsed}}:])) by (container) + metricName: cpu-ovn-control-plane + instant: true + +- query: avg(avg_over_time(container_memory_rss{name!="", namespace="openshift-ovn-kubernetes", pod=~"(ovnkube-master|ovnkube-control-plane).+", container!="POD"}[{{.elapsed}}:])) by (container) + metricName: memory-ovn-control-plane + instant: true + +- query: max(avg_over_time(container_memory_rss{name!="", namespace="openshift-ovn-kubernetes", pod=~"(ovnkube-master|ovnkube-control-plane).+", container!="POD"}[{{.elapsed}}:])) by (container) + metricName: max-memory-ovn-control-plane + instant: true + +- query: avg(avg_over_time(irate(container_cpu_usage_seconds_total{name!="", namespace="openshift-ovn-kubernetes", pod=~"ovnkube-node.+", container!="POD"}[2m])[{{.elapsed}}:])) by (container) + metricName: cpu-ovnkube-node + instant: true + +- query: avg(avg_over_time(container_memory_rss{name!="", namespace="openshift-ovn-kubernetes", pod=~"ovnkube-node.+", container!="POD"}[{{.elapsed}}:])) by (container) + metricName: memory-ovnkube-node + instant: true + +- query: max(max_over_time(container_memory_rss{name!="", namespace="openshift-ovn-kubernetes", pod=~"ovnkube-node.+", container!="POD"}[{{.elapsed}}:])) by (container) + metricName: max-memory-ovnkube-node + instant: true + +# Nodes + +- query: avg(avg_over_time(sum(irate(node_cpu_seconds_total{mode!="idle", mode!="steal"}[2m]) and on (instance) label_replace(kube_node_role{role="master"}, "instance", "$1", "node", "(.+)")) by (instance)[{{.elapsed}}:])) + metricName: cpu-masters + instant: true + +- query: avg(avg_over_time((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes)[{{.elapsed}}:]) and on (instance) label_replace(kube_node_role{role="master"}, "instance", "$1", "node", "(.+)")) + metricName: memory-masters + instant: true + +- query: max(max_over_time((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes)[{{.elapsed}}:]) and on (instance) label_replace(kube_node_role{role="master"}, "instance", "$1", "node", "(.+)")) + metricName: max-memory-masters + instant: true + +- query: avg(avg_over_time(sum(irate(node_cpu_seconds_total{mode!="idle", mode!="steal"}[2m]) and on (instance) label_replace(kube_node_role{role="worker"}, "instance", "$1", "node", "(.+)")) by (instance)[{{.elapsed}}:])) + metricName: cpu-workers + instant: true + +- query: avg(avg_over_time((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes)[{{.elapsed}}:]) and on (instance) label_replace(kube_node_role{role="worker"}, "instance", "$1", "node", "(.+)")) + metricName: memory-workers + instant: true + +- query: max(max_over_time((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes)[{{.elapsed}}:]) and on (instance) label_replace(kube_node_role{role="worker"}, "instance", "$1", "node", "(.+)")) + metricName: max-memory-workers + instant: true + +- query: avg(avg_over_time(sum(irate(node_cpu_seconds_total{mode!="idle", mode!="steal"}[2m]) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)")) by (instance)[{{.elapsed}}:])) + metricName: cpu-infra + instant: true + +- query: avg(avg_over_time(sum(irate(node_cpu_seconds_total{mode!="idle", mode!="steal"}[2m]) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)")) by (instance)[{{.elapsed}}:])) + metricName: memory-infra + instant: true + +- query: max(max_over_time((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes)[{{.elapsed}}:]) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)")) + metricName: max-memory-infra + instant: true + +# Monitoring and ingress + +- query: avg(avg_over_time(sum(irate(container_cpu_usage_seconds_total{name!="", namespace="openshift-monitoring", pod=~"prometheus-k8s.+"}[2m])) by (pod)[{{.elapsed}}:])) + metricName: cpu-prometheus + instant: true + +- query: avg(avg_over_time(sum(container_memory_rss{name!="", namespace="openshift-monitoring", pod=~"prometheus-k8s.+"}) by (pod)[{{.elapsed}}:])) + metricName: memory-prometheus + instant: true + +- query: max(max_over_time(sum(container_memory_rss{name!="", namespace="openshift-monitoring", pod=~"prometheus-k8s.+"}) by (pod)[{{.elapsed}}:])) + metricName: max-memory-prometheus + instant: true + +- query: avg(avg_over_time(sum(irate(container_cpu_usage_seconds_total{name!="", namespace="openshift-ingress", pod=~"router-default.+"}[2m])) by (pod)[{{.elapsed}}:])) + metricName: cpu-router + instant: true + +- query: avg(avg_over_time(sum(container_memory_rss{name!="", namespace="openshift-ingress", pod=~"router-default.+"}) by (pod)[{{.elapsed}}:])) + metricName: memory-router + instant: true + +- query: max(max_over_time(sum(container_memory_rss{name!="", namespace="openshift-ingress", pod=~"router-default.+"}) by (pod)[{{.elapsed}}:])) + metricName: max-memory-router + instant: true + +# Cluster + +- query: avg_over_time(cluster:memory_usage:ratio[{{.elapsed}}:]) + metricName: memory-cluster-usage-ratio + instant: true + +- query: avg_over_time(cluster:memory_usage:ratio[{{.elapsed}}:]) + metricName: max-memory-cluster-usage-ratio + instant: true + +- query: avg_over_time(cluster:node_cpu:ratio[{{.elapsed}}:]) + metricName: cpu-cluster-usage-ratio + instant: true + +- query: max_over_time(cluster:node_cpu:ratio[{{.elapsed}}:]) + metricName: max-cpu-cluster-usage-ratio + instant: true diff --git a/workloads/kube-burner-ocp-wrapper/metrics.yml b/workloads/kube-burner-ocp-wrapper/metrics.yml new file mode 100644 index 00000000..2aecc0c2 --- /dev/null +++ b/workloads/kube-burner-ocp-wrapper/metrics.yml @@ -0,0 +1,100 @@ +# API server + +- query: irate(apiserver_request_total{verb="POST", resource="pods", subresource="binding",code="201"}[2m]) > 0 + metricName: schedulingThroughput + +- query: histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"LIST|GET", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb, scope)) > 0 + metricName: readOnlyAPICallsLatency + +- query: histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"POST|PUT|DELETE|PATCH", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb, scope)) > 0 + metricName: mutatingAPICallsLatency + +- query: sum(irate(apiserver_request_total{apiserver="kube-apiserver",verb!="WATCH"}[2m])) by (verb,resource,code) > 0 + metricName: APIRequestRate + +# Kubeproxy and OVN service sync latency + +- query: histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket[2m])) by (le)) > 0 + metricName: serviceSyncLatency + +- query: histogram_quantile(0.99, sum(rate(ovnkube_master_network_programming_duration_seconds_bucket{kind="service"}[2m])) by (le)) + metricName: serviceSyncLatency + +# Containers & pod metrics + +- query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"openshift-(etcd|oauth-apiserver|sdn|ovn-kubernetes|network-node-identity|multus|.*apiserver|authentication|.*controller-manager|.*scheduler|image-registry|operator-lifecycle-manager)"}[2m]) * 100) by (container, pod, namespace, node) and on (node) kube_node_role{role="master"}) > 0 + metricName: containerCPU-Masters + +- query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"openshift-(monitoring|sdn|ovn-kubernetes|multus|ingress)"}[2m]) * 100) by (container, pod, namespace, node) and on (node) kube_node_role{role="infra"}) > 0 + metricName: containerCPU-Infra + +- query: (sum(container_memory_rss{name!="",container!="POD",namespace=~"openshift-(etcd|oauth-apiserver|.*apiserver|ovn-kubernetes|network-node-identity|sdn|multus|ingress|authentication|.*controller-manager|.*scheduler|image-registry|operator-lifecycle-manager)"}) by (container, pod, namespace, node) and on (node) kube_node_role{role="master"}) > 0 + metricName: containerMemory-Masters + +- query: (sum(container_memory_rss{name!="",container!="POD",namespace=~"openshift-(sdn|ovn-kubernetes|multus|ingress|monitoring|image-registry)"}) by (container, pod, namespace, node) and on (node) kube_node_role{role="infra"}) > 0 + metricName: containerMemory-Infra + +# Node metrics: CPU & Memory + +- query: (sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="master"}, "instance", "$1", "node", "(.+)")) > 0 + metricName: nodeCPU-Masters + +- query: (sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)")) > 0 + metricName: nodeCPU-Infra + +# We compute memory utilization by substrating available memory to the total +# +- query: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) and on (instance) label_replace(kube_node_role{role="master"}, "instance", "$1", "node", "(.+)") + metricName: nodeMemoryUtilization-Masters + +- query: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)") + metricName: nodeMemoryUtilization-Infra + +# Etcd metrics + +- query: sum(rate(etcd_server_leader_changes_seen_total[2m])) + metricName: etcdLeaderChangesRate + +- query: histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[2m])) + metricName: 99thEtcdDiskBackendCommitDurationSeconds + +- query: histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[2m])) + metricName: 99thEtcdDiskWalFsyncDurationSeconds + +- query: histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket[5m])) + metricName: 99thEtcdRoundTripTimeSeconds + +- query: sum by (cluster_version)(etcd_cluster_version) + metricName: etcdVersion + instant: true + +# Cluster metrics + +- query: sum(kube_namespace_status_phase) by (phase) > 0 + metricName: namespaceCount + +- query: count(kube_secret_info{}) + metricName: secretCount + instant: true + +- query: count(kube_deployment_labels{}) + metricName: deploymentCount + instant: true + +- query: count(kube_configmap_info{}) + metricName: configmapCount + instant: true + +- query: count(kube_service_info{}) + metricName: serviceCount + instant: true + +- query: kube_node_role + metricName: nodeRoles + +- query: sum(kube_node_status_condition{status="true"}) by (condition) + metricName: nodeStatus + +- query: count(kube_replicaset_labels{}) + metricName: replicaSetCount + instant: true diff --git a/workloads/kube-burner-ocp-wrapper/node-density-cni.yml b/workloads/kube-burner-ocp-wrapper/node-density-cni.yml new file mode 100644 index 00000000..7386eacf --- /dev/null +++ b/workloads/kube-burner-ocp-wrapper/node-density-cni.yml @@ -0,0 +1,39 @@ +--- +global: + gc: {{.GC}} + requestTimeout: 60s + gcMetrics: {{.GC_METRICS}} + indexerConfig: + esServers: ["{{.ES_SERVER}}"] + insecureSkipVerify: true + defaultIndex: {{.ES_INDEX}} + type: {{.INDEXING_TYPE}} + measurements: + - name: podLatency +jobs: + - name: node-density-cni + namespace: node-density-cni + jobIterations: {{.JOB_ITERATIONS}} + qps: {{.QPS}} + burst: {{.BURST}} + namespacedIterations: {{.NAMESPACED_ITERATIONS}} + iterationsPerNamespace: {{.ITERATIONS_PER_NAMESPACE}} + podWait: false + waitWhenFinished: true + preLoadImages: true + preLoadPeriod: 15s + namespaceLabels: + security.openshift.io/scc.podSecurityLabelSync: false + pod-security.kubernetes.io/enforce: privileged + pod-security.kubernetes.io/audit: privileged + pod-security.kubernetes.io/warn: privileged + objects: + + - objectTemplate: webserver-deployment.yml + replicas: 1 + + - objectTemplate: webserver-service.yml + replicas: 1 + + - objectTemplate: curl-deployment.yml + replicas: 1 diff --git a/workloads/kube-burner-ocp-wrapper/webserver-deployment.yml b/workloads/kube-burner-ocp-wrapper/webserver-deployment.yml new file mode 100644 index 00000000..34aa5c8d --- /dev/null +++ b/workloads/kube-burner-ocp-wrapper/webserver-deployment.yml @@ -0,0 +1,41 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: webserver-{{.Replica}}-{{.Iteration}} +spec: + template: + metadata: + labels: + name: webserver-{{.Replica}}-{{.Iteration}} + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + - key: node-role.kubernetes.io/infra + operator: DoesNotExist + - key: node-role.kubernetes.io/workload + operator: DoesNotExist + containers: + - name: webserver + image: quay.io/cloud-bulldozer/sampleapp:latest + resources: + requests: + memory: "10Mi" + cpu: "10m" + ports: + - containerPort: 8080 + protocol: TCP + imagePullPolicy: IfNotPresent + securityContext: + privileged: false + restartPolicy: Always + replicas: 1 + selector: + matchLabels: + name: webserver-{{.Replica}}-{{.Iteration}} + strategy: + type: RollingUpdate diff --git a/workloads/kube-burner-ocp-wrapper/webserver-service.yml b/workloads/kube-burner-ocp-wrapper/webserver-service.yml new file mode 100644 index 00000000..a569151b --- /dev/null +++ b/workloads/kube-burner-ocp-wrapper/webserver-service.yml @@ -0,0 +1,12 @@ +kind: Service +apiVersion: v1 +metadata: + name: webserver-{{.Replica}}-{{.Iteration}} +spec: + selector: + name: webserver-{{.Replica}}-{{.Iteration}} + ports: + - protocol: TCP + port: 8080 + targetPort: 8080 + type: ClusterIP From e67478a05dee18c882a80ec35f5562f9e0921c0c Mon Sep 17 00:00:00 2001 From: venkataanil Date: Wed, 25 Oct 2023 18:13:19 +0530 Subject: [PATCH 2/3] label and scrape metrics from 10 workers Label workers with ovnic and then scrape metrics from only these workers. node-desnity-cni on 500 nodes runs for 2 hours 15 minutes. Scraping metrics from 500 nodes for the duration of 2 hours 15 minutes is overkill. So we scrape from only 10 worker nodes if the worker node count is more than 120. --- workloads/kube-burner-ocp-wrapper/metrics.yml | 9 +++++++++ workloads/kube-burner-ocp-wrapper/run.sh | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/workloads/kube-burner-ocp-wrapper/metrics.yml b/workloads/kube-burner-ocp-wrapper/metrics.yml index 2aecc0c2..e3df522f 100644 --- a/workloads/kube-burner-ocp-wrapper/metrics.yml +++ b/workloads/kube-burner-ocp-wrapper/metrics.yml @@ -34,8 +34,17 @@ - query: (sum(container_memory_rss{name!="",container!="POD",namespace=~"openshift-(sdn|ovn-kubernetes|multus|ingress|monitoring|image-registry)"}) by (container, pod, namespace, node) and on (node) kube_node_role{role="infra"}) > 0 metricName: containerMemory-Infra +- query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"openshift-(sdn|ovn-kubernetes|multus)"}[2m]) * 100) by (container, pod, namespace, node) and on (node) kube_node_role{role="ovnic"}) > 0 + metricName: containerCPU-Workers + +- query: (sum(container_memory_rss{name!="",container!="POD",namespace=~"openshift-(sdn|ovn-kubernetes|multus)"}) by (container, pod, namespace, node) and on (node) kube_node_role{role="ovnic"}) > 0 + metricName: containerMemory-Workers + # Node metrics: CPU & Memory +- query: (sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="ovnic"}, "instance", "$1", "node", "(.+)")) > 0 + metricName: nodeCPU-Workers + - query: (sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="master"}, "instance", "$1", "node", "(.+)")) > 0 metricName: nodeCPU-Masters diff --git a/workloads/kube-burner-ocp-wrapper/run.sh b/workloads/kube-burner-ocp-wrapper/run.sh index 9a36e35d..81eb488c 100755 --- a/workloads/kube-burner-ocp-wrapper/run.sh +++ b/workloads/kube-burner-ocp-wrapper/run.sh @@ -116,6 +116,24 @@ fi # Capture the exit code of the run, but don't exit the script if it fails. set +e +# Label workers with ovnic. Metrics from only these workers are pulled. +# node-desnity-cni on 500 nodes runs for 2 hours 15 minutes. Scraping metrics from 500 nodes for the duration of 2 hours 15 minutes is overkill. +# So we scrape from only 10 worker nodes if the worker node count is more than 120. +workers_to_label=$(oc get nodes --ignore-not-found -l node-role.kubernetes.io/worker --no-headers=true | wc -l) || true +if [ "$workers_to_label" -gt 2 ]; then + workers_to_label=2 +fi + +count=0 +for node in $(oc get nodes --ignore-not-found -l node-role.kubernetes.io/worker --no-headers -o custom-columns=":.metadata.name"); do + if [ "$count" -eq "$workers_to_label" ]; then + break + fi + oc label nodes $node 'node-role.kubernetes.io/ovnic=' + ((count++)) +done + + echo $cmd JOB_START=$(date -u +"%Y-%m-%dT%H:%M:%SZ") $cmd From 9aa5ebfc4774e60dec877c224d6d962f4d17feb0 Mon Sep 17 00:00:00 2001 From: venkataanil Date: Mon, 30 Oct 2023 15:30:34 +0530 Subject: [PATCH 3/3] scale the machineset Scale the machineset to the desired count before running kube-burner. run.sh accepts the number of workers to scale for each avaialability zone in region as env variable. Currently this patch is hard coded to support us-west-2 region with 4 avaialaility zones. For example, if user wants to scale US_WEST_2A to 143 nodes and US_WEST_2B to 188 nodes, pass them to run.sh like below US_WEST_2A=143 US_WEST_2B=188 WORKLOAD=node-desnity-cni ./run.sh note: it scales nodes 50 at a time --- workloads/kube-burner-ocp-wrapper/run.sh | 40 ++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/workloads/kube-burner-ocp-wrapper/run.sh b/workloads/kube-burner-ocp-wrapper/run.sh index 81eb488c..cdc828d8 100755 --- a/workloads/kube-burner-ocp-wrapper/run.sh +++ b/workloads/kube-burner-ocp-wrapper/run.sh @@ -13,6 +13,10 @@ GC=${GC:-true} EXTRA_FLAGS=${EXTRA_FLAGS:-} UUID=${UUID:-$(uuidgen)} KUBE_DIR=${KUBE_DIR:-/tmp} +US_WEST_2A=${US_WEST_2A:-} +US_WEST_2B=${US_WEST_2B:-} +US_WEST_2C=${US_WEST_2C:-} +US_WEST_2D=${US_WEST_2D:-} download_binary(){ KUBE_BURNER_URL=https://github.com/cloud-bulldozer/kube-burner/releases/download/v${KUBE_BURNER_VERSION}/kube-burner-V${KUBE_BURNER_VERSION}-linux-x86_64.tar.gz @@ -116,6 +120,42 @@ fi # Capture the exit code of the run, but don't exit the script if it fails. set +e +# scale machineset +for machineset_name in $(oc get -n openshift-machine-api machineset --no-headers -o custom-columns=":.metadata.name" | grep -i worker); do + region=$(oc get -n openshift-machine-api machineset --no-headers -o custom-columns=":.spec.template.spec.providerSpec.value.placement.availabilityZone" $machineset_name) + # region will be of the form us-west-2a. We need to match it to user provided var i.e replae "-" with '_' and then convert it to upper case. + # For example us-west-2a will be converted to US_WEST_2A. + region_var=$(echo "$region" | tr '-' '_' | tr '[:lower:]' '[:upper:]') + # desired_replicas will be the value stored in US_WEST_2A (if povided by user) + desired_replicas=${!region_var} + if [[ "${desired_replicas}" != "" ]]; then + echo "scale the ${machineset_name} to ${desired_replicas}" + current_replicas=$(oc get -n openshift-machine-api -o template machineset "$machineset_name" --template={{.status.replicas}}) + # scale 50 at at time + while ((current_replicas < desired_replicas)); do + needed_replicas=$((desired_replicas - current_replicas)) + scale_step=$((current_replicas + needed_replicas)) + + if ((needed_replicas > 50)); then + scale_step=$((current_replicas + 50)) + fi + echo "Scaling from $current_replicas to $scale_step replicas." + oc scale -n openshift-machine-api machineset "$machineset_name" --replicas="${scale_step}" + # wait for 1 hour i.e 720 retries, each retry with 5 seconds sleep + for ((i = 1; i <= 720; i++)); do + available_replicas=$(oc get -n openshift-machine-api -o template machineset "$machineset_name" --template={{.status.availableReplicas}}) + if [ "$available_replicas" -eq "$scale_step" ]; then + echo "Desired number of replicas ($scale_step) reached." + break + fi + sleep 5 + done + current_replicas=$(oc get -n openshift-machine-api -o template machineset "$machineset_name" --template={{.status.replicas}}) + done + fi +done + + # Label workers with ovnic. Metrics from only these workers are pulled. # node-desnity-cni on 500 nodes runs for 2 hours 15 minutes. Scraping metrics from 500 nodes for the duration of 2 hours 15 minutes is overkill. # So we scrape from only 10 worker nodes if the worker node count is more than 120.