diff --git a/Dockerfile b/Dockerfile index 4ade0e9..30713a5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,7 @@ FROM python:3.7-alpine LABEL MAINTAINER="Daniel Pryor " LABEL NAME=vmware_exporter +LABEL VERSION=0.18.4 WORKDIR /opt/vmware_exporter/ COPY . /opt/vmware_exporter/ diff --git a/README.md b/README.md index 4ae89b3..48a4b26 100644 --- a/README.md +++ b/README.md @@ -3,12 +3,14 @@ VMware vCenter Exporter for Prometheus. Get VMware vCenter information: + - Basic VM and Host metrics - Current number of active snapshots - Datastore size and other stuff - Snapshot Unix timestamp creation date ## Badges + ![Docker Stars](https://img.shields.io/docker/stars/pryorda/vmware_exporter.svg) ![Docker Pulls](https://img.shields.io/docker/pulls/pryorda/vmware_exporter.svg) ![Docker Automated](https://img.shields.io/docker/automated/pryorda/vmware_exporter.svg) @@ -24,17 +26,17 @@ Get VMware vCenter information: - Install with `$ python setup.py install` or via pip `$ pip install vmware_exporter`. The docker command below is preferred. - Create `config.yml` based on the configuration section. Some variables can be passed as environment variables - Run `$ vmware_exporter -c /path/to/your/config` -- Go to http://localhost:9272/metrics?vsphere_host=vcenter.company.com to see metrics +- Go to [http://localhost:9272/metrics?vsphere_host=vcenter.company.com](http://localhost:9272/metrics?vsphere_host=vcenter.company.com) to see metrics Alternatively, if you don't wish to install the package, run it using `$ vmware_exporter/vmware_exporter.py` or use the following docker command: -``` +```shell docker run -it --rm -p 9272:9272 -e VSPHERE_USER=${VSPHERE_USERNAME} -e VSPHERE_PASSWORD=${VSPHERE_PASSWORD} -e VSPHERE_HOST=${VSPHERE_HOST} -e VSPHERE_IGNORE_SSL=True -e VSPHERE_SPECS_SIZE=2000 --name vmware_exporter pryorda/vmware_exporter ``` When using containers combined with `--env-file` flag, please use capital letters to set bolleans, for example: -``` +```shell $ podman run -it --rm -p 9272:9272 --name vmware_exporter --env-file config.env pryorda/vmware_exporter $ cat config.env VSPHERE_USER=administrator@vsphere.my.domain.com @@ -44,24 +46,29 @@ VSPHERE_IGNORE_SSL=TRUE VSPHERE_SPECS_SIZE=2000 ``` - ### Configuration and limiting data collection Only provide a configuration file if enviroment variables are not used. If you do plan to use a configuration file, be sure to override the container entrypoint or add -c config.yml to the command arguments. If you want to limit the scope of the metrics gathered, you can update the subsystem under `collect_only` in the config section, e.g. under `default`, or by using the environment variables: +```yaml collect_only: vms: False vmguests: True datastores: True hosts: True snapshots: True + resourcepools: False +``` This would only connect datastores and hosts. +**WARNING**: Enabling resourcepools will have a significant performance penalty. + You can have multiple sections for different hosts and the configuration would look like: -``` + +```yaml default: vsphere_host: "vcenter" vsphere_user: "user" @@ -77,6 +84,7 @@ default: datastores: True hosts: True snapshots: True + resourcepools: True esx: vsphere_host: vc.example2.com @@ -93,6 +101,7 @@ esx: datastores: False hosts: True snapshots: True + resourcepools: False limited: vsphere_host: slowvc.example.com @@ -109,44 +118,48 @@ limited: datastores: True hosts: False snapshots: False - + resourcepools: False ``` + Switching sections can be done by adding ?section=limited to the URL. #### Environment Variables -| Variable | Precedence | Defaults | Description | -| --------------------------------------| ---------------------- | -------- | --------------------------------------------------------------------------| -| `VSPHERE_HOST` | config, env, get_param | n/a | vsphere server to connect to | -| `VSPHERE_USER` | config, env | n/a | User for connecting to vsphere | -| `VSPHERE_PASSWORD` | config, env | n/a | Password for connecting to vsphere | -| `VSPHERE_SPECS_SIZE` | config, env | 5000 | Size of specs list for query stats function | -| `VSPHERE_IGNORE_SSL` | config, env | False | Ignore the ssl cert on the connection to vsphere host | -| `VSPHERE_FETCH_CUSTOM_ATTRIBUTES` | config, env | False | Set to true to collect objects custom attributes as metric labels | -| `VSPHERE_FETCH_TAGS` | config, env | False | Set to true to collect objects tags as metric labels | -| `VSPHERE_FETCH_ALARMS` | config, env | False | Fetch objects triggered alarms, and in case of hosts hdw alarms as well | -| `VSPHERE_COLLECT_HOSTS` | config, env | True | Set to false to disable collection of host metrics | -| `VSPHERE_COLLECT_DATASTORES` | config, env | True | Set to false to disable collection of datastore metrics | -| `VSPHERE_COLLECT_VMS` | config, env | True | Set to false to disable collection of virtual machine metrics | -| `VSPHERE_COLLECT_VMGUESTS` | config, env | True | Set to false to disable collection of virtual machine guest metrics | -| `VSPHERE_COLLECT_SNAPSHOTS` | config, env | True | Set to false to disable collection of snapshot metrics | + +| Variable | Precedence | Defaults | Description | +|-----------------------------------|------------------------|----------|---------------------------------------------------------------------------------| +| `VSPHERE_HOST` | config, env, get_param | n/a | vsphere server to connect to | +| `VSPHERE_USER` | config, env | n/a | User for connecting to vsphere | +| `VSPHERE_PASSWORD` | config, env | n/a | Password for connecting to vsphere | +| `VSPHERE_SPECS_SIZE` | config, env | 5000 | Size of specs list for query stats function | +| `VSPHERE_IGNORE_SSL` | config, env | False | Ignore the ssl cert on the connection to vsphere host | +| `VSPHERE_FETCH_CUSTOM_ATTRIBUTES` | config, env | False | Set to true to collect objects custom attributes as metric labels | +| `VSPHERE_FETCH_TAGS` | config, env | False | Set to true to collect objects tags as metric labels | +| `VSPHERE_FETCH_ALARMS` | config, env | False | Fetch objects triggered alarms, and in case of hosts hdw alarms as well | +| `VSPHERE_COLLECT_HOSTS` | config, env | True | Set to false to disable collection of host metrics | +| `VSPHERE_COLLECT_DATASTORES` | config, env | True | Set to false to disable collection of datastore metrics | +| `VSPHERE_COLLECT_VMS` | config, env | True | Set to false to disable collection of virtual machine metrics | +| `VSPHERE_COLLECT_VMGUESTS` | config, env | True | Set to false to disable collection of virtual machine guest metrics | +| `VSPHERE_COLLECT_SNAPSHOTS` | config, env | True | Set to false to disable collection of snapshot metrics | +| `VSPHERE_COLLECT_RESOURCEPOOLS` | config, env | True | Set to false to disable collection of resourcepool metrics | You can create new sections as well, with very similiar variables. For example, to create a `limited` section you can set: -| Variable | Precedence | Defaults | Description | -| ----------------------------------------------| ---------------------- | -------- | --------------------------------------------------------------------------| -| `VSPHERE_LIMITED_HOST` | config, env, get_param | n/a | vsphere server to connect to | -| `VSPHERE_LIMITED_USER` | config, env | n/a | User for connecting to vsphere | -| `VSPHERE_LIMITED_PASSWORD` | config, env | n/a | Password for connecting to vsphere | -| `VSPHERE_LIMITED_SPECS_SIZE` | config, env | 5000 | Size of specs list for query stats function | -| `VSPHERE_LIMITED_IGNORE_SSL` | config, env | False | Ignore the ssl cert on the connection to vsphere host | -| `VSPHERE_LIMITED_FETCH_CUSTOM_ATTRIBUTES` | config, env | False | Set to true to collect objects custom attributes as metric labels | -| `VSPHERE_LIMITED_FETCH_TAGS` | config, env | False | Set to true to collect objects tags as metric labels | -| `VSPHERE_LIMITED_FETCH_ALARMS` | config, env | False | Fetch objects triggered alarms, and in case of hosts hdw alarms as well | -| `VSPHERE_LIMITED_COLLECT_HOSTS` | config, env | True | Set to false to disable collection of host metrics | -| `VSPHERE_LIMITED_COLLECT_DATASTORES` | config, env | True | Set to false to disable collection of datastore metrics | -| `VSPHERE_LIMITED_COLLECT_VMS` | config, env | True | Set to false to disable collection of virtual machine metrics | -| `VSPHERE_LIMITED_COLLECT_VMGUESTS` | config, env | True | Set to false to disable collection of virtual machine guest metrics | -| `VSPHERE_LIMITED_COLLECT_SNAPSHOTS` | config, env | True | Set to false to disable collection of snapshot metrics | +| Variable | Precedence | Defaults | Description | +| ------------------------------------------| ---------------------- | -------- | ------------------------------------------------------------------------| +| `VSPHERE_LIMITED_HOST` | config, env, get_param | n/a | vsphere server to connect to | +| `VSPHERE_LIMITED_USER` | config, env | n/a | User for connecting to vsphere | +| `VSPHERE_LIMITED_PASSWORD` | config, env | n/a | Password for connecting to vsphere | +| `VSPHERE_LIMITED_SPECS_SIZE` | config, env | 5000 | Size of specs list for query stats function | +| `VSPHERE_LIMITED_IGNORE_SSL` | config, env | False | Ignore the ssl cert on the connection to vsphere host | +| `VSPHERE_LIMITED_FETCH_CUSTOM_ATTRIBUTES` | config, env | False | Set to true to collect objects custom attributes as metric labels | +| `VSPHERE_LIMITED_FETCH_TAGS` | config, env | False | Set to true to collect objects tags as metric labels | +| `VSPHERE_LIMITED_FETCH_ALARMS` | config, env | False | Fetch objects triggered alarms, and in case of hosts hdw alarms as well | +| `VSPHERE_LIMITED_COLLECT_HOSTS` | config, env | True | Set to false to disable collection of host metrics | +| `VSPHERE_LIMITED_COLLECT_DATASTORES` | config, env | True | Set to false to disable collection of datastore metrics | +| `VSPHERE_LIMITED_COLLECT_VMS` | config, env | True | Set to false to disable collection of virtual machine metrics | +| `VSPHERE_LIMITED_COLLECT_VMGUESTS` | config, env | True | Set to false to disable collection of virtual machine guest metrics | +| `VSPHERE_LIMITED_COLLECT_SNAPSHOTS` | config, env | True | Set to false to disable collection of snapshot metrics | +| `VSPHERE_LIMITED_COLLECT_RESOURCEPOOLS` | config, env | True | Set to false to disable collection of resourcepool metrics | You need to set at least `VSPHERE_SECTIONNAME_USER` for the section to be detected. @@ -154,7 +167,7 @@ You need to set at least `VSPHERE_SECTIONNAME_USER` for the section to be detect You can use the following parameters in the Prometheus configuration file. The `params` section is used to manage multiple login/passwords. -``` +```yaml - job_name: 'vmware_vcenter' metrics_path: '/metrics' static_configs: @@ -205,6 +218,7 @@ You can use the following parameters in the Prometheus configuration file. The ` - vCenter and vSphere 6.0/6.5 have been tested. - VM information, Snapshot, Host and Datastore basic information is exported, i.e: + ``` # HELP vmware_snapshots VMware current number of existing snapshots # TYPE vmware_snapshot_count gauge @@ -253,16 +267,18 @@ vmware_host_memory_max{host_name="esx1.company.com"} 131059.01953125 ## References The VMware exporter uses theses libraries: + - [pyVmomi](https://github.com/vmware/pyvmomi) for VMware connection - Prometheus [client_python](https://github.com/prometheus/client_python) for Prometheus supervision - [Twisted](http://twistedmatrix.com/trac/) for HTTP server The initial code is mainly inspired by: -- https://www.robustperception.io/writing-a-jenkins-exporter-in-python/ -- https://github.com/vmware/pyvmomi-community-samples -- https://github.com/jbidinger/pyvmomi-tools -Forked from https://github.com/rverchere/vmware_exporter. I removed the fork so that I could do searching and everything. +- [https://www.robustperception.io/writing-a-jenkins-exporter-in-python](https://www.robustperception.io/writing-a-jenkins-exporter-in-python/) +- [https://github.com/vmware/pyvmomi-community-samples](https://github.com/vmware/pyvmomi-community-samples) +- [https://github.com/jbidinger/pyvmomi-tools](https://github.com/jbidinger/pyvmomi-tools) + +Forked from [rverchere/vmware_exporter](https://github.com/rverchere/vmware_exporter). I removed the fork so that I could do searching and everything. ## Maintainer diff --git a/dashboards/resourcepool.json b/dashboards/resourcepool.json new file mode 100644 index 0000000..564ab1f --- /dev/null +++ b/dashboards/resourcepool.json @@ -0,0 +1,844 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 98, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 6, + "panels": [], + "title": "Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto", + "inspect": false + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".+vs.+" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "lcd-gauge" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Resource Pool" + }, + "properties": [ + { + "id": "custom.minWidth", + "value": 300 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Memory Limit" + }, + "properties": [ + { + "id": "unit", + "value": "decmbytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU Limit" + }, + "properties": [ + { + "id": "unit", + "value": "rothz" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU Reserved" + }, + "properties": [ + { + "id": "unit", + "value": "rothz" + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 0, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "Resource Pool" + } + ] + }, + "pluginVersion": "9.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "100 * (vmware_resourcepool_memory_overall_usage{resourcepool_name=~\"$resourcepool_name\"} / 1024 / 1024) / vmware_resourcepool_memory_allocation_limit{resourcepool_name=~\"$resourcepool_name\"}", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{resourcepool_name}}", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "100 * (vmware_resourcepool_memory_overall_usage{resourcepool_name=~\"$resourcepool_name\"} / 1024 / 1024) / vmware_resourcepool_memory_allocation_reservation{resourcepool_name=~\"$resourcepool_name\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "{{resourcepool_name}}", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "100 * (vmware_resourcepool_cpu_overall_usage{resourcepool_name=~\"$resourcepool_name\"}) / vmware_resourcepool_cpu_allocation_limit{resourcepool_name=~\"$resourcepool_name\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "{{resourcepool_name}}", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "100 * (vmware_resourcepool_cpu_overall_usage{resourcepool_name=~\"$resourcepool_name\"}) / vmware_resourcepool_cpu_allocation_reservation{resourcepool_name=~\"$resourcepool_name\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "{{resourcepool_name}}", + "range": false, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "vmware_resourcepool_cpu_allocation_limit{resourcepool_name=~\"$resourcepool_name\"} * 1000000", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "{{resourcepool_name}}", + "range": false, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "vmware_resourcepool_memory_allocation_limit{resourcepool_name=~\"$resourcepool_name\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "{{resourcepool_name}}", + "range": false, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "vmware_resourcepool_cpu_allocation_reservation{resourcepool_name=~\"$resourcepool_name\"} * 1000000", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "{{resourcepool_name}}", + "range": false, + "refId": "G" + } + ], + "title": "Resource Pool Usage", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "resourcepool_name" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true, + "Time 7": true, + "Value #B": true, + "__name__": true, + "__name__ 1": true, + "__name__ 2": true, + "cluster_name 1": true, + "cluster_name 2": true, + "cluster_name 3": true, + "cluster_name 4": true, + "cluster_name 5": true, + "cluster_name 6": true, + "cluster_name 7": true, + "container 1": true, + "container 2": true, + "container 3": true, + "container 4": true, + "container 5": true, + "container 6": true, + "container 7": true, + "dc_name 1": true, + "dc_name 2": true, + "dc_name 3": true, + "dc_name 4": true, + "dc_name 5": true, + "dc_name 6": true, + "dc_name 7": true, + "endpoint 1": true, + "endpoint 2": true, + "endpoint 3": true, + "endpoint 4": true, + "endpoint 5": true, + "endpoint 6": true, + "endpoint 7": true, + "instance 1": true, + "instance 2": true, + "instance 3": true, + "instance 4": true, + "instance 5": true, + "instance 6": true, + "instance 7": true, + "job 1": true, + "job 2": true, + "job 3": true, + "job 4": true, + "job 5": true, + "job 6": true, + "job 7": true, + "namespace 1": true, + "namespace 2": true, + "namespace 3": true, + "namespace 4": true, + "namespace 5": true, + "namespace 6": true, + "namespace 7": true, + "pod 1": true, + "pod 2": true, + "pod 3": true, + "pod 4": true, + "pod 5": true, + "pod 6": true, + "pod 7": true, + "resourcepool_status 1": true, + "resourcepool_status 2": true, + "resourcepool_status 3": true, + "resourcepool_status 4": true, + "resourcepool_status 5": true, + "resourcepool_status 6": true, + "resourcepool_status 7": true, + "service 1": true, + "service 2": true, + "service 3": true, + "service 4": true, + "service 5": true, + "service 6": true, + "service 7": true + }, + "indexByName": { + "Time 1": 1, + "Time 2": 2, + "Time 3": 26, + "Time 4": 39, + "Time 5": 50, + "Time 6": 63, + "Time 7": 75, + "Value #A": 6, + "Value #B": 5, + "Value #C": 38, + "Value #D": 37, + "Value #E": 62, + "Value #F": 7, + "Value #G": 61, + "__name__": 74, + "cluster_name 1": 8, + "cluster_name 2": 17, + "cluster_name 3": 27, + "cluster_name 4": 40, + "cluster_name 5": 51, + "cluster_name 6": 64, + "cluster_name 7": 76, + "container 1": 9, + "container 2": 18, + "container 3": 28, + "container 4": 41, + "container 5": 52, + "container 6": 65, + "container 7": 77, + "dc_name 1": 10, + "dc_name 2": 3, + "dc_name 3": 29, + "dc_name 4": 42, + "dc_name 5": 53, + "dc_name 6": 66, + "dc_name 7": 78, + "endpoint 1": 11, + "endpoint 2": 19, + "endpoint 3": 30, + "endpoint 4": 43, + "endpoint 5": 54, + "endpoint 6": 67, + "endpoint 7": 79, + "instance 1": 12, + "instance 2": 20, + "instance 3": 31, + "instance 4": 44, + "instance 5": 55, + "instance 6": 68, + "instance 7": 80, + "job 1": 13, + "job 2": 21, + "job 3": 32, + "job 4": 45, + "job 5": 56, + "job 6": 69, + "job 7": 81, + "namespace 1": 14, + "namespace 2": 22, + "namespace 3": 33, + "namespace 4": 46, + "namespace 5": 57, + "namespace 6": 70, + "namespace 7": 82, + "pod 1": 15, + "pod 2": 23, + "pod 3": 34, + "pod 4": 47, + "pod 5": 58, + "pod 6": 71, + "pod 7": 83, + "resourcepool_name": 0, + "resourcepool_status 1": 4, + "resourcepool_status 2": 24, + "resourcepool_status 3": 35, + "resourcepool_status 4": 48, + "resourcepool_status 5": 59, + "resourcepool_status 6": 72, + "resourcepool_status 7": 84, + "service 1": 16, + "service 2": 25, + "service 3": 36, + "service 4": 49, + "service 5": 60, + "service 6": 73, + "service 7": 85 + }, + "renameByName": { + "Value #A": "Mem vs Limit", + "Value #B": "Mem vs Reservation", + "Value #C": "CPU vs Limit", + "Value #D": "CPU vs Reservation", + "Value #E": "CPU Limit", + "Value #F": "Memory Limit", + "Value #G": "CPU Reserved", + "dc_name 2": "DC", + "resourcepool_name": "Resource Pool", + "resourcepool_status 1": "Status" + } + } + } + ], + "transparent": true, + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto", + "inspect": false + }, + "decimals": 0, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "Time.+" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".+\\ Hz" + }, + "properties": [ + { + "id": "unit", + "value": "hertz" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".+\\ %" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "custom.displayMode", + "value": "lcd-gauge" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#6ED0E0", + "value": 0.8 + }, + { + "color": "#EAB839", + "value": 1 + }, + { + "color": "red", + "value": 1.2 + } + ] + } + }, + { + "id": "max", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".+\\ MB" + }, + "properties": [ + { + "id": "unit", + "value": "decmbytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Resource Pool" + }, + "properties": [ + { + "id": "custom.width", + "value": 308 + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 204, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 0, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "Resource Pool" + } + ] + }, + "pluginVersion": "9.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (resourcepool_name) (vmware_vm_num_cpu{resourcepool_name=~\"$resourcepool_name\"})", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{resourcepool_name}}", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (resourcepool_name) (vmware_vm_num_cpu{resourcepool_name=~\"$resourcepool_name\"} * 2400000000)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{resourcepool_name}}", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max by (resourcepool_name) (vmware_resourcepool_cpu_allocation_limit{resourcepool_name=~\"$resourcepool_name\"})", + "format": "table", + "hide": true, + "instant": true, + "legendFormat": "{{resourcepool_name}}", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (resourcepool_name) (vmware_vm_num_cpu{resourcepool_name=~\"$resourcepool_name\"} * 2400)", + "format": "table", + "hide": true, + "instant": true, + "legendFormat": "{{resourcepool_name}}", + "range": false, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (resourcepool_name) (vmware_vm_num_cpu{resourcepool_name=~\"$resourcepool_name\"} * 2400) / max by (resourcepool_name) (vmware_resourcepool_cpu_allocation_limit{resourcepool_name=~\"$resourcepool_name\"})", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{resourcepool_name}}", + "range": false, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max by (resourcepool_name) (vmware_vm_num_cpu{resourcepool_name=~\"$resourcepool_name\"})", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{resourcepool_name}}", + "range": false, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (resourcepool_name) (vmware_vm_memory_max{resourcepool_name=~\"$resourcepool_name\"})", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{resourcepool_name}}", + "range": false, + "refId": "G" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "max by (resourcepool_name) (vmware_vm_memory_max{resourcepool_name=~\"$resourcepool_name\"})", + "format": "table", + "hide": false, + "interval": "", + "legendFormat": "{{resourcepool_name}}", + "range": true, + "refId": "H" + } + ], + "title": "Resource Pool Breakdown", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "resourcepool_name" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": false, + "Time 2": false + }, + "indexByName": {}, + "renameByName": { + "Value": "Aantal CPU", + "Value #A": "Claimde CPU", + "Value #B": "Claimde CPU Hz", + "Value #C": "RP Limit", + "Value #D": "Geclaimde CPU MHz", + "Value #E": "Overcommit Ratio %", + "Value #F": "Largest claim CPU", + "Value #G": "Claimde MEM MB", + "Value #H": "Largest claim MEM MB", + "resourcepool_name": "Resource Pool" + } + } + } + ], + "transparent": true, + "type": "table" + } + ], + "refresh": false, + "schemaVersion": 37, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(vmware_resourcepool_cpu_overall_usage, resourcepool_name)", + "hide": 0, + "includeAll": true, + "label": "Resource Pool", + "multi": true, + "name": "resourcepool_name", + "options": [], + "query": { + "query": "label_values(vmware_resourcepool_cpu_overall_usage, resourcepool_name)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5m" + ] + }, + "timezone": "", + "title": "VMWare Resourcepool", + "uid": "9MZxpgWSk", + "version": 2, + "weekStart": "" +} \ No newline at end of file diff --git a/requirements-tests.txt b/requirements-tests.txt index 3e3cc7a..e593428 100644 --- a/requirements-tests.txt +++ b/requirements-tests.txt @@ -1,6 +1,6 @@ -pytest_docker_tools==0.2.0 -pytest==5.4.1 -pytest-cov==2.8.1 +pytest_docker_tools==3.1.3 +pytest==7.4.0 +pytest-cov==4.1.0 pytest-twisted==1.12 codecov==2.0.17 flake8>=3.6.0 diff --git a/requirements.txt b/requirements.txt index d924920..07680e5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,8 @@ +wheel>=0.40.0 prometheus-client==0.0.19 -pytz +pytz>=2023.3 pyvmomi>=6.5 twisted>=14.0.2 pyyaml>=5.1 -service-identity +service-identity>=21.1.0 +requests>=2.31.0 diff --git a/vmware_exporter/vmware_exporter.py b/vmware_exporter/vmware_exporter.py index e497fa2..ae95368 100755 --- a/vmware_exporter/vmware_exporter.py +++ b/vmware_exporter/vmware_exporter.py @@ -83,11 +83,16 @@ def __init__( self._vmsCustomAttributes = {} self._hostsCustomAttributes = {} self._datastoresCustomAttributes = {} + self._vmsPoolAttributes = {} # Tags # flag to wheter fetch tags or not self.fetch_tags = fetch_tags + # Resourcepools + # flag to wheter fetch resourcepools or not + self.fetch_resourcepools = collect_only['resourcepools'] is True + # Alarms # flag wheter to fetch alarms or not self.fetch_alarms = fetch_alarms @@ -101,8 +106,13 @@ def __init__( 'datastores': ['ds_name', 'dc_name', 'ds_cluster'], 'hosts': ['host_name', 'dc_name', 'cluster_name'], 'host_perf': ['host_name', 'dc_name', 'cluster_name'], + 'resourcepools': ['resourcepool_name', 'dc_name', 'cluster_name', 'resourcepool_status'], } + # if resourcepools are gonna be fetched, 'resourcepool_name' will be a label too + if self.fetch_resourcepools: + self._labelNames['vms'].append('resourcepool_name') + # if tags are gonna be fetched 'tags' will be a label too if self.fetch_tags: for section in self._labelNames.keys(): @@ -178,6 +188,88 @@ def _create_metric_containers(self): 'VMWare Snapshot creation time in seconds', labels=self._labelNames['snapshots'] + ['vm_snapshot_name']), } + metric_list['resourcepools'] = { + 'vmware_resourcepool_cpu_allocation_expandable_reservation': GaugeMetricFamily( + 'vmware_resourcepool_cpu_allocation_expandable_reservation', + 'VMWare Resource Pool CPU Allocation expandable reservation', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_cpu_allocation_limit': GaugeMetricFamily( + 'vmware_resourcepool_cpu_allocation_limit', + 'VMWare Resource Pool CPU Allocation limit', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_cpu_allocation_overhead_limit': GaugeMetricFamily( + 'vmware_resourcepool_cpu_allocation_overhead_limit', + 'VMWare Resource Pool CPU Allocation overhead limit', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_cpu_allocation_reservation': GaugeMetricFamily( + 'vmware_resourcepool_cpu_allocation_reservation', + 'VMWare Resource Pool CPU Allocation reservation', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_memory_allocation_expandable_reservation': GaugeMetricFamily( + 'vmware_resourcepool_memory_allocation_expandable_reservation', + 'VMWare Resource Pool Memory Allocation expandable reservation', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_memory_allocation_limit': GaugeMetricFamily( + 'vmware_resourcepool_memory_allocation_limit', + 'VMWare Resource Pool Memory Allocation limit', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_memory_allocation_overhead_limit': GaugeMetricFamily( + 'vmware_resourcepool_memory_allocation_overhead_limit', + 'VMWare Resource Pool Memory Allocation overhead limit', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_memory_allocation_reservation': GaugeMetricFamily( + 'vmware_resourcepool_memory_allocation_reservation', + 'VMWare Resource Pool Memory Allocation reservation', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_cpu_max_usage': GaugeMetricFamily( + 'vmware_resourcepool_cpu_max_usage', + 'VMWare Resource Pool Max CPU usage', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_cpu_overall_usage': GaugeMetricFamily( + 'vmware_resourcepool_cpu_overall_usage', + 'VMWare Resource Pool Overall CPU usage', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_cpu_reservation_used': GaugeMetricFamily( + 'vmware_resourcepool_cpu_reservation_used', + 'VMWare Resource Pool CPU reservation used', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_cpu_reservation_used_for_vm': GaugeMetricFamily( + 'vmware_resourcepool_cpu_reservation_used_for_vm', + 'VMWare Resource Pool CPU reservation used for VMs', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_cpu_unreserved_for_pool': GaugeMetricFamily( + 'vmware_resourcepool_cpu_unreserved_for_pool', + 'VMWare Resource Pool CPU unreserved for pool', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_cpu_unreserved_for_vm': GaugeMetricFamily( + 'vmware_resourcepool_cpu_unreserved_for_vm', + 'VMWare Resource Pool CPU unreserved for VMs', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_memory_max_usage': GaugeMetricFamily( + 'vmware_resourcepool_memory_max_usage', + 'VMWare Resource Pool Max Memory usage', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_memory_overall_usage': GaugeMetricFamily( + 'vmware_resourcepool_memory_overall_usage', + 'VMWare Resource Pool Overall Memory usage', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_memory_reservation_used': GaugeMetricFamily( + 'vmware_resourcepool_memory_reservation_used', + 'VMWare Resource Pool Memory reservation used', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_memory_reservation_used_for_vm': GaugeMetricFamily( + 'vmware_resourcepool_memory_reservation_used_for_vm', + 'VMWare Resource Pool Memory reservation used for VMs', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_memory_unreserved_for_pool': GaugeMetricFamily( + 'vmware_resourcepool_memory_unreserved_for_pool', + 'VMWare Resource Pool Memory unreserved for pool', + labels=self._labelNames['resourcepools']), + 'vmware_resourcepool_memory_unreserved_for_vm': GaugeMetricFamily( + 'vmware_resourcepool_memory_unreserved_for_vm', + 'VMWare Resource Pool Memory unreserved for VMs', + labels=self._labelNames['resourcepools']), + } metric_list['datastores'] = { 'vmware_datastore_capacity_size': GaugeMetricFamily( 'vmware_datastore_capacity_size', @@ -423,6 +515,10 @@ def collect(self): if collect_only['datastores'] is True: tasks.append(self._vmware_get_datastores(metrics, )) + # Collect Resource Pool metrics + if collect_only['resourcepools'] is True: + tasks.append(self._vmware_get_resourcepool(metrics)) + if collect_only['hosts'] is True: tasks.append(self._vmware_get_hosts(metrics)) tasks.append(self._vmware_get_host_perf_manager_metrics(metrics)) @@ -662,6 +758,32 @@ def datastore_inventory(self): return datastores + + @run_once_property + @defer.inlineCallbacks + def resourcepool_inventory(self): + logging.info("Fetching vim.ResourcePool inventory") + start = datetime.datetime.utcnow() + properties = [ + 'name', + 'parent', + 'config.memoryAllocation', + 'config.cpuAllocation', + 'runtime.cpu', + 'runtime.memory' + ] + + resource_pools = yield self.batch_fetch_properties( + vim.ResourcePool, + properties, + ) + + fetch_time = datetime.datetime.utcnow() - start + logging.info("Fetched vim.ResourcePool inventory ({fetch_time})".format(fetch_time=fetch_time)) + + return resource_pools + + @run_once_property @defer.inlineCallbacks def host_system_inventory(self): @@ -761,6 +883,9 @@ def vm_inventory(self): if self.collect_only['snapshots'] is True: properties.append('snapshot') + if self.collect_only['resourcepools'] is True: + properties.append('resourcePool') + """ papa smurf, are we collecting custom attributes? """ @@ -778,6 +903,18 @@ def vm_inventory(self): properties, ) + """ + Translate the resourcePool interface to name. + """ + if self.fetch_resourcepools: + self._vmsPoolAttributes = dict( + [ + (vm_modId, (vm['resourcePool'].name if 'resourcePool' in vm else '-')) + for vm_modId, vm in virtual_machines.items() + ] + ) + + """ once custom attributes are fetched, store'em linked to their moid @@ -974,6 +1111,40 @@ def datacenter_inventory(self): datacenters = yield threads.deferToThread(lambda: content.rootFolder.childEntity) return datacenters + + @run_once_property + @defer.inlineCallbacks + def resourcepool_labels(self): + + def _collect(node, level=1, dc=None, ccr=""): + inventory = {} + if isinstance(node, vim.Folder) and not isinstance(node, vim.StoragePod): + logging.debug("[Folder ] {level} {name}".format(name=node.name, level=('-' * level).ljust(7))) + for child in node.childEntity: + inventory.update(_collect(child, level + 1, dc)) + elif isinstance(node, vim.Datacenter): + logging.debug("[Datacenter] {level} {name}".format(name=node.name, level=('-' * level).ljust(7))) + inventory.update(_collect(node.hostFolder, level + 1, node.name)) + elif isinstance(node, vim.ClusterComputeResource): + logging.debug("[Compute ] {level} {name}".format(name=node.name, level=('-' * level).ljust(7))) + inventory.update(_collect(node.resourcePool, level + 1, dc, node.name)) + elif isinstance(node, vim.ResourcePool): + for resourcePool in node.resourcePool: + logging.debug("[Pool ] {level} {name}".format(name=node.name, level=('-' * level).ljust(7))) + inventory[resourcePool.name] = [resourcePool.name, dc, ccr, resourcePool.overallStatus] + else: + logging.debug("[? ] {level} {node}".format(node=node, level=('-' * level).ljust(7))) + return inventory + + labels = {} + dcs = yield self.datacenter_inventory + for dc in dcs: + result = yield threads.deferToThread(lambda: _collect(dc)) + labels.update(result) + + return labels + + @run_once_property @defer.inlineCallbacks def datastore_labels(self): @@ -1113,6 +1284,9 @@ def vm_labels(self): if self.fetch_tags: labels_cnt += 1 + if self.fetch_resourcepools: + labels_cnt += 1 + if labels_cnt < len(self._labelNames['vms']): logging.info( "Only ${cnt}/{expected} labels (vm, host, dc, cluster) found, filling n/a" @@ -1490,6 +1664,76 @@ def _vmware_get_host_perf_manager_metrics(self, host_metrics): logging.info('FIN: _vmware_get_host_perf_manager_metrics') + + @defer.inlineCallbacks + def _vmware_get_resourcepool(self, metrics): + """ + Get Resource Pool information + """ + logging.info("Starting rp metrics collection") + + results, resourcepool_labels = yield parallelize(self.resourcepool_inventory, self.resourcepool_labels) + + for resourcepool_id, resourcepool in results.items(): + name = resourcepool['name'] + # Skip the "root" resource pool + if name == 'Resources': + continue + labels = resourcepool_labels[name] + + configCpuAllocation = resourcepool.get('config.cpuAllocation') + + var = bool(configCpuAllocation.expandableReservation) + metrics['vmware_resourcepool_cpu_allocation_expandable_reservation'].add_metric(labels, var) + var = float(configCpuAllocation.limit if configCpuAllocation.limit is not None else 0) + metrics['vmware_resourcepool_cpu_allocation_limit'].add_metric(labels, var) + var = float(configCpuAllocation.overheadLimit if configCpuAllocation.overheadLimit is not None else 0) + metrics['vmware_resourcepool_cpu_allocation_overhead_limit'].add_metric(labels, var) + var = float(configCpuAllocation.reservation if configCpuAllocation.reservation is not None else 0) + metrics['vmware_resourcepool_cpu_allocation_reservation'].add_metric(labels, var) + + configMemoryAllocation = resourcepool.get('config.memoryAllocation') + var = bool(configMemoryAllocation.expandableReservation) + metrics['vmware_resourcepool_memory_allocation_expandable_reservation'].add_metric(labels, var) + var = float(configMemoryAllocation.limit if configMemoryAllocation.limit is not None else 0) + metrics['vmware_resourcepool_memory_allocation_limit'].add_metric(labels, var) + var = float(configMemoryAllocation.overheadLimit if configMemoryAllocation.overheadLimit is not None else 0) + metrics['vmware_resourcepool_memory_allocation_overhead_limit'].add_metric(labels, var) + var = float(configMemoryAllocation.reservation if configMemoryAllocation.reservation is not None else 0) + metrics['vmware_resourcepool_memory_allocation_reservation'].add_metric(labels, var) + + runtimeCpu = resourcepool.get('runtime.cpu') + + var = float(runtimeCpu.maxUsage if runtimeCpu.maxUsage is not None else 0) + metrics['vmware_resourcepool_cpu_max_usage'].add_metric(labels, var) + var = float(runtimeCpu.overallUsage if runtimeCpu.overallUsage is not None else 0) + metrics['vmware_resourcepool_cpu_overall_usage'].add_metric(labels, var) + var = float(runtimeCpu.reservationUsed if runtimeCpu.reservationUsed is not None else 0) + metrics['vmware_resourcepool_cpu_reservation_used'].add_metric(labels, var) + var = float(runtimeCpu.reservationUsedForVm if runtimeCpu.reservationUsedForVm is not None else 0) + metrics['vmware_resourcepool_cpu_reservation_used_for_vm'].add_metric(labels, var) + var = float(runtimeCpu.unreservedForPool if runtimeCpu.unreservedForPool is not None else 0) + metrics['vmware_resourcepool_cpu_unreserved_for_pool'].add_metric(labels, var) + var = float(runtimeCpu.unreservedForVm if runtimeCpu.unreservedForVm is not None else 0) + metrics['vmware_resourcepool_cpu_unreserved_for_vm'].add_metric(labels, var) + + runtimeMemory = resourcepool.get('runtime.memory') + var = float(runtimeMemory.maxUsage if runtimeMemory.maxUsage is not None else 0) + metrics['vmware_resourcepool_memory_max_usage'].add_metric(labels, var) + var = float(runtimeMemory.overallUsage if runtimeMemory.overallUsage is not None else 0) + metrics['vmware_resourcepool_memory_overall_usage'].add_metric(labels, var) + var = float(runtimeMemory.reservationUsed if runtimeMemory.reservationUsed is not None else 0) + metrics['vmware_resourcepool_memory_reservation_used'].add_metric(labels, var) + var = float(runtimeMemory.reservationUsedForVm if runtimeMemory.reservationUsedForVm is not None else 0) + metrics['vmware_resourcepool_memory_reservation_used_for_vm'].add_metric(labels, var) + var = float(runtimeMemory.unreservedForPool if runtimeMemory.unreservedForPool is not None else 0) + metrics['vmware_resourcepool_memory_unreserved_for_pool'].add_metric(labels, var) + var = float(runtimeMemory.unreservedForVm if runtimeMemory.unreservedForVm is not None else 0) + metrics['vmware_resourcepool_memory_unreserved_for_vm'].add_metric(labels, var) + + logging.info("Finished rp metrics collection") + + @defer.inlineCallbacks def _vmware_get_vms(self, metrics): """ @@ -1528,6 +1772,9 @@ def _vmware_get_vms(self, metrics): for labelName in customAttributesLabelNames: customLabels.append(customAttributes[moid].get(labelName)) + if self.fetch_resourcepools: + vm_labels[moid].append(self._vmsPoolAttributes[moid]) + if self.fetch_tags: tags = vm_tags.get(moid, []) tags = ','.join(tags) @@ -1896,6 +2143,7 @@ def configure(self, args): 'datastores': get_bool_env('VSPHERE_COLLECT_DATASTORES', True), 'hosts': get_bool_env('VSPHERE_COLLECT_HOSTS', True), 'snapshots': get_bool_env('VSPHERE_COLLECT_SNAPSHOTS', True), + 'resourcepools': get_bool_env('VSPHERE_COLLECT_RESOURCEPOOLS', True), } } } @@ -1923,6 +2171,7 @@ def configure(self, args): 'datastores': get_bool_env('VSPHERE_{}_COLLECT_DATASTORES'.format(section), True), 'hosts': get_bool_env('VSPHERE_{}_COLLECT_HOSTS'.format(section), True), 'snapshots': get_bool_env('VSPHERE_{}_COLLECT_SNAPSHOTS'.format(section), True), + 'resourcepools': get_bool_env('VSPHERE_{}_COLLECT_RESOURCEPOOLS'.format(section), True), } }