From 76ecd1972fe5b65b904a871f8c055e1575c8b475 Mon Sep 17 00:00:00 2001 From: Simone Rodigari <32323373+SRodi@users.noreply.github.com> Date: Fri, 4 Oct 2024 14:30:56 +0100 Subject: [PATCH] fix(grafana/PR#158): fix node selection, metrics name dns, pod-level dash (#797) # Description This PR is to fix https://github.com/microsoft/retina/pull/158 * reduce scope of PR * [make it possible to select multiple nodes on clusters dash](https://github.com/microsoft/retina/pull/158#discussion_r1668847543) * [fix pod-level regex](https://github.com/microsoft/retina/pull/158#discussion_r1589821558) * [~~use named capture groups here to separate displayed value and used value in clusters dash~~](https://github.com/microsoft/retina/pull/158#discussion_r1680522494) >NOTE: I have reverted the change to DS_PROMETHEUS not to break existing deployments and tests. This was requested in [this comment](https://github.com/microsoft/retina/pull/158#pullrequestreview-2226013402) ## Related Issue fix https://github.com/microsoft/retina/issues/271 If this pull request is related to any issue, please mention it here. Additionally, make sure that the issue is assigned to you before submitting this pull request. ## Checklist - [x] I have read the [contributing documentation](https://retina.sh/docs/contributing). - [x] I signed and signed-off the commits (`git commit -S -s ...`). See [this documentation](https://docs.github.com/en/authentication/managing-commit-signature-verification/about-commit-signature-verification) on signing commits. - [x] I have correctly attributed the author(s) of the code. - [x] I have tested the changes locally. - [x] I have followed the project's style guidelines. - [x] I have updated the documentation, if necessary. - [x] I have added tests, if applicable. ## Screenshots (if applicable) or Testing Completed Please add any relevant screenshots or GIFs to showcase the changes made. ### All dashboards ![Screenshot 2024-10-01 152822](https://github.com/user-attachments/assets/6b15f10d-dc12-4405-9898-7da59b2fcdd9) ![Screenshot 2024-10-01 152846](https://github.com/user-attachments/assets/5e1763ce-2a48-4dd9-b4c5-f2b52a7cb3d5) ![Screenshot 2024-10-01 152917](https://github.com/user-attachments/assets/3e4aab9d-7b44-4357-a709-d137e3bb8e47) ### Node selection fix ![Screenshot 2024-10-02 103738](https://github.com/user-attachments/assets/5b61ce34-6a1e-414b-8c9e-1b35f89f7efb) ![Screenshot 2024-10-02 103802](https://github.com/user-attachments/assets/529d6b9f-85a9-48e6-be52-252ecadd066b) ## Additional Notes Thanks to @aslafy-z for the original PR https://github.com/microsoft/retina/pull/158 --- Please refer to the [CONTRIBUTING.md](../CONTRIBUTING.md) file for more information on how to contribute to this project. Signed-off-by: Simone Rodigari --- .../legacy/grafana/dashboards/clusters.json | 16 +++---- deploy/legacy/grafana/dashboards/dns.json | 10 ++--- .../legacy/grafana/dashboards/pod-level.json | 45 ++++++++++++------- 3 files changed, 40 insertions(+), 31 deletions(-) diff --git a/deploy/legacy/grafana/dashboards/clusters.json b/deploy/legacy/grafana/dashboards/clusters.json index 9d448999b4..c5045314c9 100644 --- a/deploy/legacy/grafana/dashboards/clusters.json +++ b/deploy/legacy/grafana/dashboards/clusters.json @@ -171,7 +171,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum by (cluster) (rate(networkobservability_forward_bytes[$__rate_interval]))", + "expr": "sum by (cluster) (rate(networkobservability_forward_bytes{instance=~\"$Nodes\"}[$__rate_interval]))", "format": "table", "instant": true, "legendFormat": "__auto", @@ -279,7 +279,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum by (cluster) (rate(networkobservability_forward_bytes[$__rate_interval]))", + "expr": "sum by (cluster) (rate(networkobservability_forward_bytes{instance=~\"$Nodes\"}[$__rate_interval]))", "legendFormat": "__auto", "range": true, "refId": "A" @@ -355,7 +355,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum by (cluster) (rate(networkobservability_drop_bytes[$__rate_interval]))", + "expr": "sum by (cluster) (rate(networkobservability_drop_bytes{instance=~\"$Nodes\"}[$__rate_interval]))", "format": "table", "instant": true, "legendFormat": "__auto", @@ -463,7 +463,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum by (cluster) (rate(networkobservability_drop_bytes[$__rate_interval]))", + "expr": "sum by (cluster) (rate(networkobservability_drop_bytes{instance=~\"$Nodes\"}[$__rate_interval]))", "legendFormat": "__auto", "range": true, "refId": "A" @@ -3713,23 +3713,19 @@ "type": "query" }, { - "allValue": "(.*)", "current": {}, "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(kube_node_info{cluster=\"$cluster\"},node)", + "definition": "label_values(kube_node_info{cluster=\"$cluster\"}, node)", "hide": 0, "includeAll": true, "label": "Nodes", "multi": true, "name": "Nodes", "options": [], - "query": { - "query": "label_values(kube_node_info{cluster=\"$cluster\"},node)", - "refId": "PrometheusVariableQueryEditor-VariableQuery" - }, + "query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)", "refresh": 2, "regex": "", "skipUrlSync": false, diff --git a/deploy/legacy/grafana/dashboards/dns.json b/deploy/legacy/grafana/dashboards/dns.json index cb9ff6f766..542e4c9a64 100644 --- a/deploy/legacy/grafana/dashboards/dns.json +++ b/deploy/legacy/grafana/dashboards/dns.json @@ -856,6 +856,7 @@ "type": "datasource" }, { + "allValue": "(.*)", "current": {}, "datasource": { "type": "prometheus", @@ -863,7 +864,7 @@ }, "definition": "label_values(kube_node_info, cluster)", "hide": 0, - "includeAll": true, + "includeAll": false, "label": "Cluster", "multi": false, "name": "cluster", @@ -884,17 +885,14 @@ "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(kube_node_info, node)", + "definition": "label_values(kube_node_info{cluster=\"$cluster\"}, node)", "hide": 0, "includeAll": true, "label": "Nodes", "multi": true, "name": "Nodes", "options": [], - "query": { - "query": "label_values(kube_node_info, node)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)", "refresh": 2, "regex": "", "skipUrlSync": false, diff --git a/deploy/legacy/grafana/dashboards/pod-level.json b/deploy/legacy/grafana/dashboards/pod-level.json index beebf031d1..7ccfdf970a 100755 --- a/deploy/legacy/grafana/dashboards/pod-level.json +++ b/deploy/legacy/grafana/dashboards/pod-level.json @@ -143,7 +143,7 @@ "uid": "${datasource}" }, "editorMode": "builder", - "expr": "sum(irate(retina_adv_forward_count{source_podname=~\"$pod\"}[1m]))", + "expr": "sum(irate(networkobservability_adv_forward_count{source_podname=~\"$pod\"}[1m]))", "legendFormat": "Total PPS", "range": true, "refId": "A" @@ -236,7 +236,7 @@ "uid": "${datasource}" }, "editorMode": "builder", - "expr": "sum(irate(retina_adv_forward_count{destination_podname=~\"$pod\"}[1m]))", + "expr": "sum(irate(networkobservability_adv_forward_count{destination_podname=~\"$pod\"}[1m]))", "legendFormat": "Total PPS", "range": true, "refId": "A" @@ -331,7 +331,7 @@ "uid": "${datasource}" }, "editorMode": "builder", - "expr": "sum by(source_podname) (irate(retina_adv_forward_count{destination_podname!=\"unknown\", source_podname!=\"unknown\"}[1m]))", + "expr": "sum by(source_podname) (irate(networkobservability_adv_forward_count{destination_podname!=\"unknown\", source_podname!=\"unknown\"}[1m]))", "legendFormat": "__auto", "range": true, "refId": "A" @@ -423,7 +423,7 @@ "uid": "${datasource}" }, "editorMode": "builder", - "expr": "sum by(destination_podname) (irate(retina_adv_forward_count{destination_podname!=\"unknown\", source_podname!=\"unknown\"}[1m]))", + "expr": "sum by(destination_podname) (irate(networkobservability_adv_forward_count{destination_podname!=\"unknown\", source_podname!=\"unknown\"}[1m]))", "legendFormat": "__auto", "range": true, "refId": "A" @@ -518,7 +518,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum by(source_podname) (irate(retina_adv_drop_count{destination_podname!=\"unknown\", source_podname!=\"unknown\"}[1m]))", + "expr": "sum by(source_podname) (irate(networkobservability_adv_drop_count{destination_podname!=\"unknown\", source_podname!=\"unknown\"}[1m]))", "hide": false, "legendFormat": "__auto", "range": true, @@ -611,7 +611,7 @@ "uid": "${datasource}" }, "editorMode": "code", - "expr": "sum by(destination_podname) (irate(retina_adv_forward_count{destination_podname!=\"unknown\", source_podname!=\"unknown\"}[1m]))", + "expr": "sum by(destination_podname) (irate(networkobservability_adv_forward_count{destination_podname!=\"unknown\", source_podname!=\"unknown\"}[1m]))", "hide": false, "legendFormat": "__auto", "range": true, @@ -707,7 +707,7 @@ "uid": "${datasource}" }, "editorMode": "builder", - "expr": "sum by(source_ip) (irate(retina_adv_forward_count{source_podname=\"unknown\"}[1m]))", + "expr": "sum by(source_ip) (irate(networkobservability_adv_forward_count{source_podname=\"unknown\"}[1m]))", "legendFormat": "__auto", "range": true, "refId": "A" @@ -799,7 +799,7 @@ "uid": "${datasource}" }, "editorMode": "builder", - "expr": "sum by(destination_ip) (irate(retina_adv_forward_count{destination_podname=\"unknown\"}[1m]))", + "expr": "sum by(destination_ip) (irate(networkobservability_adv_forward_count{destination_podname=\"unknown\"}[1m]))", "legendFormat": "__auto", "range": true, "refId": "A" @@ -875,7 +875,7 @@ "uid": "${datasource}" }, "editorMode": "builder", - "expr": "topk(10, sum by(source_podname) (irate(retina_adv_forward_count{source_podname!=\"unknown\"}[5m])))", + "expr": "topk(10, sum by(source_podname) (irate(networkobservability_adv_forward_count{source_podname!=\"unknown\"}[5m])))", "legendFormat": "__auto", "range": true, "refId": "A" @@ -951,7 +951,7 @@ "uid": "${datasource}" }, "editorMode": "builder", - "expr": "topk(10, sum by(destination_podname) (irate(retina_adv_forward_count{destination_podname!=\"unknown\"}[5m])))", + "expr": "topk(10, sum by(destination_podname) (irate(networkobservability_adv_forward_count{destination_podname!=\"unknown\"}[5m])))", "legendFormat": "__auto", "range": true, "refId": "A" @@ -1043,7 +1043,7 @@ "uid": "${datasource}" }, "editorMode": "builder", - "expr": "avg by(instance) (sum by(le, instance) (rate(retina_node_apiserver_handshake_latency_ms_bucket[1m])))", + "expr": "avg by(instance) (sum by(le, instance) (rate(networkobservability_node_apiserver_handshake_latency_ms_bucket[1m])))", "legendFormat": "__auto", "range": true, "refId": "A" @@ -1062,25 +1062,40 @@ ], "templating": { "list": [ + { + "current": {}, + "hide": 0, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, { "current": {}, "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "definition": "query_result(retina_adv_forward_count)", + "definition": "query_result(networkobservability_adv_forward_count)", "hide": 1, "includeAll": true, - "label": "Pod Name ", + "label": "Pod Name", "multi": false, "name": "pod", "options": [], "query": { - "query": "query_result(retina_adv_forward_count)", + "query": "query_result(networkobservability_adv_forward_count)", "refId": "StandardVariableQuery" }, "refresh": 1, - "regex": "/.*_podname=\"([^\"]*).*/", + "regex": "/.*podname=\"([^\"]*).*/", "skipUrlSync": false, "sort": 0, "type": "query"