Skip to content

Commit

Permalink
Merge pull request #17 from metal-stack/hardware-metrics
Browse files Browse the repository at this point in the history
add hardware metrics
  • Loading branch information
majst01 authored Oct 8, 2024
2 parents f565dba + b67d705 commit 61819e5
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 28 deletions.
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,28 @@ metal_switch_sync_durations{partition="fra-equ01",rackid="fra-equ01-rack01",swit
# TYPE metal_switch_sync_failed gauge
metal_switch_sync_failed{partition="fra-equ01",rackid="fra-equ01-rack01",switchname="fra-equ01-r01leaf01"} 0
metal_switch_sync_failed{partition="fra-equ01",rackid="fra-equ01-rack01",switchname="fra-equ01-r01leaf02"} 0
# HELP metal_machine_hardware_info Provide information about the machine
# TYPE metal_machine_hardware_info gauge
metal_machine_hardware_info{size="c1-xlarge-x86",biosVersion="3.4",bmcVersion="1.73",boardMfg="Supermicro",boardMfgSerial="",boardPartNumber="X11DPT-B",chassisPartNumber="CSE-217BHQ+-R2K22BP2",chassisPartSerial="C217BAH24AE0006",machineid="11111111-2222-3333-4444-aabbccddeeff",partition="fra-equ01",productManufacturer="Supermicro",productPartNumber="SYS-2029BT-HNR",productSerial="E262335X9100766D"} 1
# HELP metal_machine_info Provide the ipmi ip address
# TYPE metal_machine_info gauge
metal_machine_info{ipmiIP="10.1.1.36:623",machineid="11111111-2222-3333-4444-aabbccddeeff"} 1
# HELP metal_machine_power_state Provide information about the machine power state
# TYPE metal_machine_power_state gauge
metal_machine_power_state{machineid="11111111-2222-3333-4444-aabbccddeeff"} 1
# HELP metal_machine_power_supplies_healthy Provide information about the number of healthy power supplies
# TYPE metal_machine_power_supplies_healthy gauge
metal_machine_power_supplies_healthy{machineid="11111111-2222-3333-4444-aabbccddeeff"} 2
# HELP metal_machine_power_supplies_total Provide information about the total number of power supplies
# TYPE metal_machine_power_supplies_total gauge
metal_machine_power_supplies_total{machineid="11111111-2222-3333-4444-aabbccddeeff"} 2
# HELP metal_machine_power_usage Provide information about the machine power usage in watts
# TYPE metal_machine_power_usage gauge
metal_machine_power_usage{machineid="11111111-2222-3333-4444-aabbccddeeff"} 69
```
18 changes: 9 additions & 9 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ module github.com/metal-stack/metal-metrics-exporter
go 1.23

require (
github.com/metal-stack/metal-go v0.35.2
github.com/prometheus/client_golang v1.20.3
github.com/metal-stack/metal-go v0.37.1
github.com/prometheus/client_golang v1.20.4
k8s.io/klog/v2 v2.130.1
)

Expand Down Expand Up @@ -32,33 +32,33 @@ require (
github.com/google/uuid v1.6.0 // indirect
github.com/gorilla/mux v1.8.1 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/klauspost/compress v1.17.10 // indirect
github.com/lestrrat-go/blackmagic v1.0.2 // indirect
github.com/lestrrat-go/httpcc v1.0.1 // indirect
github.com/lestrrat-go/httprc v1.0.6 // indirect
github.com/lestrrat-go/iter v1.0.2 // indirect
github.com/lestrrat-go/jwx/v2 v2.1.1 // indirect
github.com/lestrrat-go/option v1.0.1 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/metal-stack/metal-lib v0.18.2
github.com/metal-stack/metal-lib v0.18.3
github.com/metal-stack/security v0.8.1 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/oklog/ulid v1.3.1 // indirect
github.com/opentracing/opentracing-go v1.2.0 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.59.1 // indirect
github.com/prometheus/common v0.60.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/segmentio/asm v1.2.0 // indirect
go.mongodb.org/mongo-driver v1.16.1 // indirect
go.mongodb.org/mongo-driver v1.17.1 // indirect
go.opentelemetry.io/otel v1.30.0 // indirect
go.opentelemetry.io/otel/metric v1.30.0 // indirect
go.opentelemetry.io/otel/trace v1.30.0 // indirect
golang.org/x/crypto v0.27.0 // indirect
golang.org/x/net v0.29.0 // indirect
golang.org/x/crypto v0.28.0 // indirect
golang.org/x/net v0.30.0 // indirect
golang.org/x/oauth2 v0.23.0 // indirect
golang.org/x/sync v0.8.0 // indirect
golang.org/x/sys v0.25.0 // indirect
golang.org/x/sys v0.26.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
36 changes: 18 additions & 18 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/klauspost/compress v1.17.10 h1:oXAz+Vh0PMUvJczoi+flxpnBEPxoER1IaAnU/NMPtT0=
github.com/klauspost/compress v1.17.10/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
Expand All @@ -74,10 +74,10 @@ github.com/lestrrat-go/option v1.0.1 h1:oAzP2fvZGQKWkvHa1/SAcFolBEca1oN+mQ7eooNB
github.com/lestrrat-go/option v1.0.1/go.mod h1:5ZHFbivi4xwXxhxY9XHDe2FHo6/Z7WWmtT7T5nBBp3I=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/metal-stack/metal-go v0.35.2 h1:brTbmPUiYSH9IdbnCkrcRWFRlIfc0H/53f2phsB+5kY=
github.com/metal-stack/metal-go v0.35.2/go.mod h1:3MJTYCS4YJz8D8oteTKhjpaAKNMMjMKYDrIy9awHGtQ=
github.com/metal-stack/metal-lib v0.18.2 h1:EAmZkZeKpenAvxZRSKsA6gj9Jd8XLR6Z0/QhABFCCDE=
github.com/metal-stack/metal-lib v0.18.2/go.mod h1:GJjipRpHmpd2vjBtsaw9gGk5ZFan7NlShyjIsTdY1x4=
github.com/metal-stack/metal-go v0.37.1 h1:vlvg/MY9Ep61h86GF54DER1VYADcqyHbFPZH3DqEbdM=
github.com/metal-stack/metal-go v0.37.1/go.mod h1:3MJTYCS4YJz8D8oteTKhjpaAKNMMjMKYDrIy9awHGtQ=
github.com/metal-stack/metal-lib v0.18.3 h1:bovFiJPB9SMvuGLqcXVWz6jFB8HrdzwnCX7TFlen4r0=
github.com/metal-stack/metal-lib v0.18.3/go.mod h1:Ctyi6zaXFr2NVrQZLFsDLnFCzupKnYErTtgRFKAsnbw=
github.com/metal-stack/security v0.8.1 h1:4zmVUxZvDWShVvVIxM3XhIv7pTmPe9DvACRIHW6YTsk=
github.com/metal-stack/security v0.8.1/go.mod h1:OO8ZilZO6fUV5QEmwc7HP/RAjqYrGQxXoYIddJ9TvqE=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
Expand All @@ -91,12 +91,12 @@ github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYr
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.20.3 h1:oPksm4K8B+Vt35tUhw6GbSNSgVlVSBH0qELP/7u83l4=
github.com/prometheus/client_golang v1.20.3/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI=
github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/common v0.59.1 h1:LXb1quJHWm1P6wq/U824uxYi4Sg0oGvNeUm1z5dJoX0=
github.com/prometheus/common v0.59.1/go.mod h1:GpWM7dewqmVYcd7SmRaiWVe9SSqjf0UrwnYnpEZNuT0=
github.com/prometheus/common v0.60.0 h1:+V9PAREWNvJMAuJ1x1BaWl9dewMW4YrHZQbx0sJNllA=
github.com/prometheus/common v0.60.0/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
Expand All @@ -111,8 +111,8 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
go.mongodb.org/mongo-driver v1.16.1 h1:rIVLL3q0IHM39dvE+z2ulZLp9ENZKThVfuvN/IiN4l8=
go.mongodb.org/mongo-driver v1.16.1/go.mod h1:oB6AhJQvFQL4LEHyXi6aJzQJtBiTQHiAd83l0GdFaiw=
go.mongodb.org/mongo-driver v1.17.1 h1:Wic5cJIwJgSpBhe3lx3+/RybR5PiYRMpVFgO7cOHyIM=
go.mongodb.org/mongo-driver v1.17.1/go.mod h1:wwWm/+BuOddhcq3n68LKRmgk2wXzmF6s0SFOa0GINL4=
go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts=
go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc=
go.opentelemetry.io/otel/metric v1.30.0 h1:4xNulvn9gjzo4hjg+wzIKG7iNFEaBMX00Qd4QIZs7+w=
Expand All @@ -121,16 +121,16 @@ go.opentelemetry.io/otel/sdk v1.24.0 h1:YMPPDNymmQN3ZgczicBY3B6sf9n62Dlj9pWD3ucg
go.opentelemetry.io/otel/sdk v1.24.0/go.mod h1:KVrIYw6tEubO9E96HQpcmpTKDVn9gdv35HoYiQWGDFg=
go.opentelemetry.io/otel/trace v1.30.0 h1:7UBkkYzeg3C7kQX8VAidWh2biiQbtAKjyIML8dQ9wmc=
go.opentelemetry.io/otel/trace v1.30.0/go.mod h1:5EyKqTzzmyqB9bwtCCq6pDLktPK6fmGf/Dph+8VI02o=
golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A=
golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70=
golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo=
golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0=
golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw=
golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U=
golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs=
golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
Expand Down
74 changes: 73 additions & 1 deletion metalCollector.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ type metalCollector struct {
machineIssues *prometheus.Desc
machineIssuesInfo *prometheus.Desc

machinePowerUsage *prometheus.Desc
machinePowerState *prometheus.Desc
machinePowerSuppliesTotal *prometheus.Desc
machinePowerSuppliesHealthy *prometheus.Desc
machineHardwareInfo *prometheus.Desc

projectInfo *prometheus.Desc

client metalgo.Client
Expand Down Expand Up @@ -181,6 +187,32 @@ func newMetalCollector(client metalgo.Client) *metalCollector {
"Provide information on machine issues",
[]string{"machineid", "issueid"}, nil,
),
machinePowerUsage: prometheus.NewDesc(
"metal_machine_power_usage",
"Provide information about the machine power usage in watts",
[]string{"machineid"}, nil,
),
machinePowerState: prometheus.NewDesc(
"metal_machine_power_state",
"Provide information about the machine power state",
[]string{"machineid"}, nil,
),
machinePowerSuppliesTotal: prometheus.NewDesc(
"metal_machine_power_supplies_total",
"Provide information about the total number of power supplies",
[]string{"machineid"}, nil,
),
machinePowerSuppliesHealthy: prometheus.NewDesc(
"metal_machine_power_supplies_healthy",
"Provide information about the number of healthy power supplies",
[]string{"machineid"}, nil,
),
machineHardwareInfo: prometheus.NewDesc(
"metal_machine_hardware_info",
"Provide information about the machine",
[]string{"machineid", "partition", "size", "bmcVersion", "biosVersion", "chassisPartNumber", "chassisPartSerial", "boardMfg", "boardMfgSerial",
"boardPartNumber", "productManufacturer", "productPartNumber", "productSerial"}, nil,
),
projectInfo: prometheus.NewDesc(
"metal_project_info",
"Provide information about metal projects",
Expand Down Expand Up @@ -217,6 +249,10 @@ func (collector *metalCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- collector.machineAllocationInfo
ch <- collector.machineIssues
ch <- collector.machineIssuesInfo
ch <- collector.machinePowerUsage
ch <- collector.machinePowerState
ch <- collector.machinePowerSuppliesTotal
ch <- collector.machinePowerSuppliesHealthy
}

func (collector *metalCollector) Collect(ch chan<- prometheus.Metric) {
Expand Down Expand Up @@ -335,7 +371,7 @@ func (collector *metalCollector) Collect(ch chan<- prometheus.Metric) {
}
}

machines, err := collector.client.Machine().FindMachines(machine.NewFindMachinesParams().WithBody(&models.V1MachineFindRequest{}), nil)
machines, err := collector.client.Machine().FindIPMIMachines(machine.NewFindIPMIMachinesParams().WithBody(&models.V1MachineFindRequest{}), nil)
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -417,6 +453,42 @@ func (collector *metalCollector) Collect(ch chan<- prometheus.Metric) {
partitionID = *m.Partition.ID
}

if m.Ipmi != nil {
if m.Ipmi.Powerstate != nil {
var powerstate float64
switch *m.Ipmi.Powerstate {
case "ON":
powerstate = 1
case "OFF":
powerstate = 0
default:
powerstate = -1
}
ch <- prometheus.MustNewConstMetric(collector.machinePowerState, prometheus.GaugeValue, powerstate, *m.ID)
}
if m.Ipmi.Powersupplies != nil {
ch <- prometheus.MustNewConstMetric(collector.machinePowerSuppliesTotal, prometheus.GaugeValue, float64(len(m.Ipmi.Powersupplies)), *m.ID)
healthy := 0
for _, ps := range m.Ipmi.Powersupplies {
if ps.Status != nil && ps.Status.Health != nil && *ps.Status.Health == "OK" {
healthy++
}
}
ch <- prometheus.MustNewConstMetric(collector.machinePowerSuppliesHealthy, prometheus.GaugeValue, float64(healthy), *m.ID)
}
if m.Ipmi.Powermetric != nil && m.Ipmi.Powermetric.Averageconsumedwatts != nil {
ch <- prometheus.MustNewConstMetric(collector.machinePowerUsage, prometheus.GaugeValue, float64(pointer.SafeDeref(m.Ipmi.Powermetric.Averageconsumedwatts)), *m.ID)
}
size := "UNKNOWN"
if m.Size != nil {
size = m.Size.Name
}
if m.Bios != nil && m.Ipmi.Fru != nil {
ch <- prometheus.MustNewConstMetric(collector.machineHardwareInfo, prometheus.GaugeValue, 1.0, *m.ID, partitionID, size, pointer.SafeDeref(m.Ipmi.Bmcversion),
pointer.SafeDeref(m.Bios.Version), m.Ipmi.Fru.ChassisPartNumber, m.Ipmi.Fru.ChassisPartSerial, m.Ipmi.Fru.BoardMfg, m.Ipmi.Fru.BoardMfgSerial, m.Ipmi.Fru.BoardPartNumber,
m.Ipmi.Fru.ProductManufacturer, m.Ipmi.Fru.ProductPartNumber, m.Ipmi.Fru.ProductSerial)
}
}
ch <- prometheus.MustNewConstMetric(collector.machineAllocationInfo, prometheus.GaugeValue, 1.0, *m.ID, partitionID, hostname, clusterID, primaryASN, role, state)

for issueID := range allIssuesByID {
Expand Down

0 comments on commit 61819e5

Please sign in to comment.