From cdbc1658271ad75a292a131a8ab2470d7860ee31 Mon Sep 17 00:00:00 2001 From: "camillo.rossi@gmail.com" Date: Wed, 6 Nov 2024 13:06:48 +1100 Subject: [PATCH] closes #27 #26 #25 #28 --- charts/aci-monitoring-stack/Chart.yaml | 2 +- .../config.d/aci-configs.yaml | 36 ++++ .../aci-monitoring-stack/config.d/system.yaml | 1 + .../aci-monitoring-stack/config.d/vlan.yaml | 34 +++- .../dashboards/epg-explore.json | 174 ++++++++++++++++-- .../prometheus/configmap-config.yaml | 14 +- charts/aci-monitoring-stack/values.yaml | 7 +- 7 files changed, 246 insertions(+), 22 deletions(-) create mode 100644 charts/aci-monitoring-stack/config.d/aci-configs.yaml diff --git a/charts/aci-monitoring-stack/Chart.yaml b/charts/aci-monitoring-stack/Chart.yaml index 22fd609..5573d11 100644 --- a/charts/aci-monitoring-stack/Chart.yaml +++ b/charts/aci-monitoring-stack/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.7 +version: 0.1.8 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. diff --git a/charts/aci-monitoring-stack/config.d/aci-configs.yaml b/charts/aci-monitoring-stack/config.d/aci-configs.yaml new file mode 100644 index 0000000..0897aec --- /dev/null +++ b/charts/aci-monitoring-stack/config.d/aci-configs.yaml @@ -0,0 +1,36 @@ +#This file is used to ingest the APIC configs to then analyse/display it inside Grafana. +class_queries: + subnets: + class_name: fvSubnet + query_parameter: '?order-by=fvSubnet.dn' + metrics: + - name: subnets + value_name: fvSubnet.attributes.uid + labels: + - property_name: fvSubnet.attributes.dn + regex: "^uni/tn-(?P.*)/BD-(?P.*)/subnet-\\[(?P.*)\\]" + + epg_to_bd: + class_name: fvRsBd + query_parameter: '?order-by=fvRsBd.dn' + metrics: + - name: epg_to_bd + value_name: fvRsBd.attributes.uid + labels: + - property_name: fvRsBd.attributes.dn + regex: "^uni/tn-(?P.*)/ap-(?P.*)/epg-(?P.*)/rsbd" + - property_name: fvRsBd.attributes.tDn + regex: "^uni/tn-(?P.*)/BD-(?P.*)" + + bd_to_vrf: + class_name: fvRtCtx + query_parameter: '?order-by=fvRtCtx.dn' + metrics: + - name: bd_to_vrf + value_name: fvRtCtx.attributes.status + value_transform: + '' : 0 + labels: + - property_name: fvRtCtx.attributes.dn + regex: "^uni/tn-(?P.*)/ctx-(?P.*)/rtctx-\\[uni/tn-(?P.*)/BD-(?P.*)\\]" + diff --git a/charts/aci-monitoring-stack/config.d/system.yaml b/charts/aci-monitoring-stack/config.d/system.yaml index 6515a5e..d338663 100644 --- a/charts/aci-monitoring-stack/config.d/system.yaml +++ b/charts/aci-monitoring-stack/config.d/system.yaml @@ -20,6 +20,7 @@ class_queries: 'unknown': 7 'decommissioned': 8 'maintenance': 9 + 'commissioned': 10 labels: - property_name: fabricNode.attributes.name regex: "^(?P.*)" diff --git a/charts/aci-monitoring-stack/config.d/vlan.yaml b/charts/aci-monitoring-stack/config.d/vlan.yaml index 2ec833f..210be63 100644 --- a/charts/aci-monitoring-stack/config.d/vlan.yaml +++ b/charts/aci-monitoring-stack/config.d/vlan.yaml @@ -57,66 +57,98 @@ class_queries: 'link-up': 3 - name: epg_rx_flood value_name: vlanCktEp.children.[l2IngrBytes5min].attributes.floodCum + value_transform: + '': 0 type: counter unit: "bytes" - name: epg_rx_multicast value_name: vlanCktEp.children.[l2IngrBytes5min].attributes.multicastCum + value_transform: + '': 0 type: counter unit: "bytes" - name: epg_rx_unicast value_name: vlanCktEp.children.[l2IngrBytes5min].attributes.unicastCum + value_transform: + '': 0 type: counter unit: "bytes" - name: epg_rx_drop value_name: vlanCktEp.children.[l2IngrBytes5min].attributes.dropCum + value_transform: + '': 0 type: counter unit: "bytes" - name: epg_tx_flood - value_name: vlanCktEp.children.[l2EgrBytes5min].attributes.floodCum + value_name: vlanCktEp.children.[l2EgrBytes5min].attributes.floodCum+ + value_transform: + '': 0 type: counter unit: "bytes" - name: epg_tx_multicast value_name: vlanCktEp.children.[l2EgrBytes5min].attributes.multicastCum + value_transform: + '': 0 type: counter unit: "bytes" - name: epg_tx_unicast value_name: vlanCktEp.children.[l2EgrBytes5min].attributes.unicastCum + value_transform: + '': 0 type: counter unit: "bytes" - name: epg_tx_drop value_name: vlanCktEp.children.[l2EgrBytes5min].attributes.dropCum + value_transform: + '': 0 type: counter unit: "bytes" - name: epg_rx_flood value_name: vlanCktEp.children.[l2IngrPkts5min].attributes.floodCum + value_transform: + '': 0 type: counter unit: "pkts" - name: epg_rx_multicast value_name: vlanCktEp.children.[l2IngrPkts5min].attributes.multicastCum + value_transform: + '': 0 type: counter unit: "pkts" - name: epg_rx_unicast value_name: vlanCktEp.children.[l2IngrPkts5min].attributes.unicastCum + value_transform: + '': 0 type: counter unit: "pkts" - name: epg_rx_drop value_name: vlanCktEp.children.[l2IngrPkts5min].attributes.dropCum + value_transform: + '': 0 type: counter unit: "pkts" - name: epg_tx_flood value_name: vlanCktEp.children.[l2EgrPkts5min].attributes.floodCum + value_transform: + '': 0 type: counter unit: "pkts" - name: epg_tx_multicast value_name: vlanCktEp.children.[l2EgrPkts5min].attributes.multicastCum + value_transform: + '': 0 type: counter unit: "pkts" - name: epg_tx_unicast value_name: vlanCktEp.children.[l2EgrPkts5min].attributes.unicastCum + value_transform: + '': 0 type: counter unit: "pkts" - name: epg_tx_drop value_name: vlanCktEp.children.[l2EgrPkts5min].attributes.dropCum + value_transform: + '': 0 type: counter unit: "pkts" diff --git a/charts/aci-monitoring-stack/dashboards/epg-explore.json b/charts/aci-monitoring-stack/dashboards/epg-explore.json index a2cfee0..de3d2b9 100644 --- a/charts/aci-monitoring-stack/dashboards/epg-explore.json +++ b/charts/aci-monitoring-stack/dashboards/epg-explore.json @@ -95,7 +95,7 @@ } ] }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.2.1", "targets": [ { "datasource": { @@ -237,7 +237,7 @@ } ] }, - "pluginVersion": "11.1.0", + "pluginVersion": "11.2.1", "targets": [ { "datasource": { @@ -303,6 +303,123 @@ } ], "type": "table" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Maps a Subnet to a BD and EPG", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 3, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": " (\n aci_bd_to_vrf\n + on (fabric, bd_name, bd_tenant) group_right (vrf_name, vrf_tenant)\n (\n aci_subnets{fabric=\"$fabric\",subnet=\"$subnet\"}\n + on (fabric, bd_name, bd_tenant) group_right ()\n aci_epg_to_bd\n )\n >=\n 0\n )\nor on ()\n (\n label_replace(\n aci_subnets{fabric=\"$fabric\",subnet=\"$subnet\"}\n + on (fabric, bd_name, bd_tenant) group_right ()\n aci_epg_to_bd,\n \"vrf_name\",\n \"NO VRF\",\n \"vrf_name\",\n \".*\"\n )\n )", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Subnets", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "aci": true, + "fabric": true, + "instance": true, + "job": true + }, + "includeByName": {}, + "indexByName": { + "Time": 1, + "Value": 12, + "aci": 2, + "app": 4, + "bd_name": 7, + "bd_tenant": 6, + "epg": 5, + "epg_tenant": 3, + "fabric": 0, + "instance": 10, + "job": 11, + "vrf_name": 9, + "vrf_tenant": 8 + }, + "renameByName": { + "app": "App", + "bd_name": "BD", + "bd_tenant": "BD Tenant", + "epg": "EPG", + "epg_tenant": "EPG Tenant", + "vrf_name": "VRF", + "vrf_tenant": "VRF Tenant" + } + } + } + ], + "type": "table" } ], "refresh": "", @@ -342,7 +459,7 @@ }, { "current": { - "selected": true, + "selected": false, "text": "common", "value": "common" }, @@ -372,8 +489,12 @@ { "current": { "selected": true, - "text": [], - "value": [] + "text": [ + "FileServer" + ], + "value": [ + "FileServer" + ] }, "datasource": { "type": "prometheus", @@ -400,12 +521,12 @@ }, { "current": { - "selected": false, + "selected": true, "text": [ - "" + "2" ], "value": [ - "" + "2" ] }, "datasource": { @@ -433,12 +554,12 @@ }, { "current": { - "selected": false, + "selected": true, "text": [ - "" + "7536649" ], "value": [ - "" + "7536649" ] }, "datasource": { @@ -463,6 +584,35 @@ "skipUrlSync": false, "sort": 3, "type": "query" + }, + { + "current": { + "selected": false, + "text": "192.168.100.1/24", + "value": "192.168.100.1/24" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(aci_subnets{fabric=\"$fabric\"},subnet)", + "description": "", + "hide": 0, + "includeAll": false, + "label": "Subnet", + "multi": false, + "name": "subnet", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(aci_subnets{fabric=\"$fabric\"},subnet)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" } ] }, @@ -474,6 +624,6 @@ "timezone": "", "title": "EPG Explore", "uid": "a4f7ea75-6ef6-4639-809a-755684e11ac1", - "version": 10, + "version": 8, "weekStart": "" } \ No newline at end of file diff --git a/charts/aci-monitoring-stack/templates/prometheus/configmap-config.yaml b/charts/aci-monitoring-stack/templates/prometheus/configmap-config.yaml index 8013b72..bccc829 100644 --- a/charts/aci-monitoring-stack/templates/prometheus/configmap-config.yaml +++ b/charts/aci-monitoring-stack/templates/prometheus/configmap-config.yaml @@ -21,19 +21,19 @@ data: {{- if $.Values.aci_exporter.aciServiceDiscoveryURLs }} {{- range $k, $v := $.Values.aci_exporter.aciServiceDiscoveryURLs }} - job_name: {{ $k }}-aci-exporter-apics - scrape_interval: 5m - scrape_timeout: 4m + scrape_interval: {{ $v.apic_polling }} + scrape_timeout: {{ $v.apic_scrape_timeout }} metrics_path: /probe params: # List of the queries to execute on the fabric level. They need to match the aci-exporter config # DO NOT INSERT SPACES and use \ for next line or aci-exporter will not be able to parse the queries queries: - - "health,fabric_node_info,max_capacity,max_global_pctags,\ + - "health,fabric_node_info,max_capacity,max_global_pctags,subnets,epg_to_bd,bd_to_vrf,\ vlans,static_binding_info,node_count,object_count,fault_insts,\ ps_power_usage,apic_hw_sensors,controller_topsystem" scheme: http http_sd_configs: - - url: {{ $v }} + - url: {{ $v.url }} refresh_interval: 5m relabel_configs: - source_labels: [ __meta_role ] @@ -51,8 +51,8 @@ data: replacement: "$1" target_label: __address__ - job_name: {{ $k }}-aci-exporter-switches - scrape_interval: 1m - scrape_timeout: 30s + scrape_interval: {{ $v.switch_polling }} + scrape_timeout: {{ $v.switch_scrape_timeout }} metrics_path: /probe params: # List of the queries to execute on the fabric level. They need to match the aci-exporter config @@ -68,7 +68,7 @@ data: node_scale_ctx,node_ospf_neighbors,node_fru_power_usage,node_temperature" scheme: http http_sd_configs: - - url: {{ $v }} + - url: {{ $v.url }} refresh_interval: 5m relabel_configs: - source_labels: [ __meta_role ] diff --git a/charts/aci-monitoring-stack/values.yaml b/charts/aci-monitoring-stack/values.yaml index 52f49d4..bb4704b 100644 --- a/charts/aci-monitoring-stack/values.yaml +++ b/charts/aci-monitoring-stack/values.yaml @@ -22,7 +22,12 @@ aci_exporter: # You need to change this ONLY if you want to deploy this chart multiple times in the same namespace to avoid # having duplicate service name in the same NS. aciServiceDiscoveryURLs: - sd: http://aci-exporter-svc:9643/sd + sd: + url: http://aci-exporter-svc:9643/sd + apic_polling: 5m + apic_scrape_timeout: 4m + switch_polling: 1m + switch_scrape_timeout: 30s prefix: aci_ prometheus: