From 7b9dd68de0edc695a03523b0c2946ad6d4cb7334 Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Mon, 24 Oct 2016 16:06:47 +0100 Subject: [PATCH] Rename metrics to be in line with best practices. Counters should end with _total. Units should be specified where known. Group related metrics together. Remove machine-level metrics, they should come from the node exporter. Remove calculated rates, they should be calculated in Prometheus. Convert units to seconds where possible. --- exporter/redis.go | 109 ++++++++++++++++++++--------------------- exporter/redis_test.go | 15 +++--- 2 files changed, 59 insertions(+), 65 deletions(-) diff --git a/exporter/redis.go b/exporter/redis.go index a10b9308..68f2021b 100644 --- a/exporter/redis.go +++ b/exporter/redis.go @@ -46,75 +46,69 @@ type scrapeResult struct { } var ( - renameMap = map[string]string{ - "loading": "repl_loading", - } - - inclMap = map[string]bool{ + metricMap = map[string]string{ // # Server - "uptime_in_seconds": true, + "uptime_in_seconds": "uptime_in_seconds", // # Clients - "connected_clients": true, - "blocked_clients": true, + "connected_clients": "connected_clients", + "blocked_clients": "blocked_clients", // # Memory - "used_memory": true, - "used_memory_rss": true, - "used_memory_peak": true, - "used_memory_lua": true, - "total_system_memory": true, - "max_memory": true, - "mem_fragmentation_ratio": true, + "used_memory": "memory_used_bytes", + "used_memory_rss": "memory_used_rss_bytes", + "used_memory_peak": "memory_used_peak_bytes", + "used_memory_lua": "memory_used_lua_bytes", + "max_memory": "memory_max_bytes", + "mem_fragmentation_ratio": "memory_fragmentation_ratio", // # Persistence - "rdb_changes_since_last_save": true, - "rdb_last_bgsave_time_sec": true, - "rdb_current_bgsave_time_sec": true, - "aof_enabled": true, - "aof_rewrite_in_progress": true, - "aof_rewrite_scheduled": true, - "aof_last_rewrite_time_sec": true, - "aof_current_rewrite_time_sec": true, + "rdb_changes_since_last_save": "rdb_changes_since_last_save", + "rdb_last_bgsave_time_sec": "rdb_last_bgsave_duration_sec", + "rdb_current_bgsave_time_sec": "rdb_current_bgsave_duration_sec", + "aof_enabled": "aof_enabled", + "aof_rewrite_in_progress": "aof_rewrite_in_progress", + "aof_rewrite_scheduled": "aof_rewrite_scheduled", + "aof_last_rewrite_time_sec": "aof_last_rewrite_duration_sec", + "aof_current_rewrite_time_sec": "aof_current_rewrite_duration_sec", // # Stats - "total_connections_received": true, - "total_commands_processed": true, - "instantaneous_ops_per_sec": true, - "total_net_input_bytes": true, - "total_net_output_bytes": true, - "rejected_connections": true, - "expired_keys": true, - "evicted_keys": true, - "keyspace_hits": true, - "keyspace_misses": true, - "pubsub_channels": true, - "pubsub_patterns": true, + "total_connections_received": "connections_received_total", + "total_commands_processed": "commands_processed_total", + "total_net_input_bytes": "net_input_bytes_total", + "total_net_output_bytes": "net_output_bytes_total", + "rejected_connections": "rejected_connections_total", + "expired_keys": "expired_keys_total", + "evicted_keys": "evicted_keys_total", + "keyspace_hits": "keyspace_hits_total", + "keyspace_misses": "keyspace_misses_total", + "pubsub_channels": "pubsub_channels", + "pubsub_patterns": "pubsub_patterns", // # Replication - "loading": true, - "connected_slaves": true, - "repl_backlog_size": true, + "loading": "loading_dump_file", + "connected_slaves": "connected_slaves", + "repl_backlog_size": "replication_backlog_bytes", // # CPU - "used_cpu_sys": true, - "used_cpu_user": true, - "used_cpu_sys_children": true, - "used_cpu_user_children": true, + "used_cpu_sys": "used_cpu_sys", + "used_cpu_user": "used_cpu_user", + "used_cpu_sys_children": "used_cpu_sys_children", + "used_cpu_user_children": "used_cpu_user_children", } ) func (e *Exporter) initGauges() { e.metrics = map[string]*prometheus.GaugeVec{} - e.metrics["db_keys_total"] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + e.metrics["db_keys"] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: e.namespace, - Name: "db_keys_total", + Name: "db_keys", Help: "Total number of keys by DB", }, []string{"addr", "db"}) - e.metrics["db_expiring_keys_total"] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + e.metrics["db_keys_expiring"] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: e.namespace, - Name: "db_expiring_keys_total", + Name: "db_keys_expiring", Help: "Total number of expiring keys by DB", }, []string{"addr", "db"}) e.metrics["db_avg_ttl_seconds"] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ @@ -122,15 +116,16 @@ func (e *Exporter) initGauges() { Name: "db_avg_ttl_seconds", Help: "Avg TTL in seconds", }, []string{"addr", "db"}) - e.metrics["command_calls_total"] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + // Emulate a Summary. + e.metrics["command_call_duration_seconds_count"] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: e.namespace, - Name: "command_calls_total", + Name: "command_call_duration_seconds_count", Help: "Total number of calls per command", }, []string{"addr", "cmd"}) - e.metrics["command_calls_usec_total"] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + e.metrics["command_call_duration_seconds_sum"] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: e.namespace, - Name: "command_calls_usec_total", - Help: "Total amount of time in usecs spent per command", + Name: "command_call_duration_seconds_sum", + Help: "Total amount of time in seconds spent per command", }, []string{"addr", "cmd"}) } @@ -238,7 +233,7 @@ func includeMetric(s string) bool { return true } - _, ok := inclMap[s] + _, ok := metricMap[s] return ok } @@ -343,15 +338,15 @@ func (e *Exporter) extractInfoMetrics(info, addr string, scrapes chan<- scrapeRe } e.metricsMtx.RLock() - e.metrics["command_calls_total"].WithLabelValues(addr, cmd).Set(calls) - e.metrics["command_calls_usec_total"].WithLabelValues(addr, cmd).Set(usecTotal) + e.metrics["command_call_duration_seconds_count"].WithLabelValues(addr, cmd).Set(calls) + e.metrics["command_call_duration_seconds_sum"].WithLabelValues(addr, cmd).Set(usecTotal / 1e6) e.metricsMtx.RUnlock() continue } if keysTotal, keysEx, avgTTL, ok := parseDBKeyspaceString(split[0], split[1]); ok { - scrapes <- scrapeResult{Name: "db_keys_total", Addr: addr, DB: split[0], Value: keysTotal} - scrapes <- scrapeResult{Name: "db_expiring_keys_total", Addr: addr, DB: split[0], Value: keysEx} + scrapes <- scrapeResult{Name: "db_keys", Addr: addr, DB: split[0], Value: keysTotal} + scrapes <- scrapeResult{Name: "db_keys_expiring", Addr: addr, DB: split[0], Value: keysEx} if avgTTL > -1 { scrapes <- scrapeResult{Name: "db_avg_ttl_seconds", Addr: addr, DB: split[0], Value: avgTTL} } @@ -359,7 +354,7 @@ func (e *Exporter) extractInfoMetrics(info, addr string, scrapes chan<- scrapeRe } metricName := split[0] - if newName, ok := renameMap[metricName]; ok { + if newName, ok := metricMap[metricName]; ok { metricName = newName } diff --git a/exporter/redis_test.go b/exporter/redis_test.go index a2b29dfe..b0336463 100644 --- a/exporter/redis_test.go +++ b/exporter/redis_test.go @@ -140,7 +140,7 @@ func TestCountingKeys(t *testing.T) { var keysTestDB float64 for s := range scrapes { - if s.Name == "db_keys_total" && s.DB == dbNumStrFull { + if s.Name == "db_keys" && s.DB == dbNumStrFull { keysTestDB = s.Value break } @@ -156,7 +156,7 @@ func TestCountingKeys(t *testing.T) { want := keysTestDB + float64(len(keys)) + float64(len(keysExpiring)) + 1 for s := range scrapes { - if s.Name == "db_keys_total" && s.DB == dbNumStrFull { + if s.Name == "db_keys" && s.DB == dbNumStrFull { if want != s.Value { t.Errorf("values not matching, %f != %f", keysTestDB, s.Value) } @@ -169,7 +169,7 @@ func TestCountingKeys(t *testing.T) { e.scrape(scrapes) for s := range scrapes { - if s.Name == "db_keys_total" && s.DB == dbNumStrFull { + if s.Name == "db_keys" && s.DB == dbNumStrFull { if keysTestDB != s.Value { t.Errorf("values not matching, %f != %f", keysTestDB, s.Value) } @@ -202,11 +202,10 @@ func TestExporterMetrics(t *testing.T) { } wantKeys := []string{ - "db_keys_total", + "db_keys", "db_avg_ttl_seconds", - "instantaneous_ops_per_sec", "used_cpu_sys", - "repl_loading", // testing renameMap + "loading_dump_file", // testing renames } for _, k := range wantKeys { @@ -325,7 +324,7 @@ func TestCommandStats(t *testing.T) { close(chM) }() - want := map[string]bool{"test_command_calls_total": false, "test_command_calls_usec_total": false} + want := map[string]bool{"test_command_call_duration_seconds_count": false, "test_command_call_duration_seconds_sum": false} for m := range chM { switch m.(type) { @@ -371,7 +370,7 @@ func TestHTTPEndpoint(t *testing.T) { tests := []string{ `test_connected_clients`, - `test_total_commands_processed`, + `test_commands_processed_total`, `test_key_size`, } for _, test := range tests {