diff --git a/cmd/metal-api/internal/issues/issues_test.go b/cmd/metal-api/internal/issues/issues_test.go index 934f26ad..5d5ef094 100644 --- a/cmd/metal-api/internal/issues/issues_test.go +++ b/cmd/metal-api/internal/issues/issues_test.go @@ -85,6 +85,39 @@ func TestFindIssues(t *testing.T) { } }, }, + { + name: "powersupply failure", + only: []Type{TypePowerSupplyFailure}, + machines: func() metal.Machines { + defectPowerSupplyMachine := machineTemplate("power-supply-failure") + defectPowerSupplyMachine.IPMI = metal.IPMI{ + PowerSupplies: metal.PowerSupplies{ + {Status: metal.PowerSupplyStatus{Health: "NO-OK", State: "Absent"}}, + }, + } + + return metal.Machines{ + defectPowerSupplyMachine, + machineTemplate("good"), + } + }, + eventContainers: func() metal.ProvisioningEventContainers { + return metal.ProvisioningEventContainers{ + eventContainerTemplate("power-supply-failure"), + eventContainerTemplate("good"), + } + }, + want: func(machines metal.Machines) MachineIssues { + return MachineIssues{ + { + Machine: &machines[0], + Issues: Issues{ + toIssue(&issuePowerSupplyFailure{details: "Health:NO-OK State:Absent"}), + }, + }, + } + }, + }, { name: "liveliness dead", only: []Type{TypeLivelinessDead}, diff --git a/cmd/metal-api/internal/issues/powersupply-failure.go b/cmd/metal-api/internal/issues/powersupply-failure.go new file mode 100644 index 00000000..e9c49a3b --- /dev/null +++ b/cmd/metal-api/internal/issues/powersupply-failure.go @@ -0,0 +1,41 @@ +package issues + +import ( + "fmt" + "strings" + + "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" +) + +const ( + TypePowerSupplyFailure Type = "powersupply-failure" +) + +type ( + issuePowerSupplyFailure struct { + details string + } +) + +func (i *issuePowerSupplyFailure) Spec() *spec { + return &spec{ + Type: TypePowerSupplyFailure, + Severity: SeverityMajor, + Description: "machine has power supply failures", + RefURL: "https://docs.metal-stack.io/stable/installation/troubleshoot/#power-supply-failure", + } +} + +func (i *issuePowerSupplyFailure) Evaluate(m metal.Machine, ec metal.ProvisioningEventContainer, c *Config) bool { + for _, ps := range m.IPMI.PowerSupplies { + if strings.ToLower(ps.Status.Health) != "ok" || strings.ToLower(ps.Status.State) != "enabled" { + i.details = fmt.Sprintf("Health:%s State:%s", ps.Status.Health, ps.Status.State) + return true + } + } + return false +} + +func (i *issuePowerSupplyFailure) Details() string { + return i.details +} diff --git a/cmd/metal-api/internal/issues/types.go b/cmd/metal-api/internal/issues/types.go index 2ff13ad0..c38536a0 100644 --- a/cmd/metal-api/internal/issues/types.go +++ b/cmd/metal-api/internal/issues/types.go @@ -21,6 +21,7 @@ func AllIssueTypes() []Type { TypeASNUniqueness, TypeNonDistinctBMCIP, TypeNoEventContainer, + TypePowerSupplyFailure, } } @@ -52,6 +53,8 @@ func NewIssueFromType(t Type) (issue, error) { return &issueNonDistinctBMCIP{}, nil case TypeNoEventContainer: return &issueNoEventContainer{}, nil + case TypePowerSupplyFailure: + return &issuePowerSupplyFailure{}, nil default: return nil, fmt.Errorf("unknown issue type: %s", t) }