Skip to content

Commit

Permalink
Labels for Metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
kingster committed Mar 28, 2023
1 parent a5c7088 commit 97559c2
Show file tree
Hide file tree
Showing 11 changed files with 104 additions and 74 deletions.
2 changes: 1 addition & 1 deletion clients/java/dkv-client/src/test/resources/dkv_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ pprof : false
repl-master-addr : ""
repl-poll-interval : "2s"
role : ""
cluster-name : "test"
statsd-addr : ""
vbucket : "default"
log-level : "warn"
Expand All @@ -31,4 +32,3 @@ discovery-service:
discovery-service-addr: "multi:///127.0.0.1:8001,127.0.0.1:8002,127.0.0.1:8003"
push-status-interval: 1
poll-cluster-info-interval: 1

Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ db-engine : "rocksdb"
db-engine-ini : ""
block-cache-size : 3221225472
root-folder : "/tmp/dkvsrv"
cluster-name : "test"
dc-id : "dc1"
diskless : false
pprof : false
Expand Down
10 changes: 8 additions & 2 deletions cmd/dkvsrv/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -341,12 +341,18 @@ func setFlagsForNexusDirs() {
}

func setupStats() {
//default tags.
tags := []stats.Tag{stats.NewTag("cluster-name", config.ClusterName)}
if config.NodeName != "" {
tags = append(tags, stats.NewTag("node-name", config.NodeName))
}

if config.StatsdAddr != "" {
statsCli = stats.NewStatsDClient(config.StatsdAddr, "dkv.")
statsCli = stats.NewStatsDClient(config.StatsdAddr, "dkv.", tags...)
} else {
statsCli = stats.NewNoOpClient()
}
promRegistry = stats.NewPromethousRegistry()
promRegistry = stats.NewPromethousRegistry(tags)
statsStreamer = stats.NewStatStreamer()
statAggregatorRegistry = aggregate.NewStatAggregatorRegistry()
go statsStreamer.Run()
Expand Down
3 changes: 3 additions & 0 deletions dkvsrv.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@

node-name : "" #Name of the current Node Name
cluster-name : "dkv" # Cluster Identifier, used for reporting all/tags

listen-addr : "0.0.0.0:8080" #listen address
http-listen-addr : "0.0.0.0:8081" # http listen address
role : "none" #Role of the node - master|slave|standalone
Expand All @@ -19,6 +21,7 @@ dc-id : "default" # DC / Availability zone identifier
vbucket : "default" # vBucket identifier
database : "default" # Database identifier


discovery-service:
server:
status-ttl: 300
Expand Down
18 changes: 10 additions & 8 deletions internal/master/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,18 @@ type dkvServiceStat struct {

func newDKVServiceStat(registry prometheus.Registerer) *dkvServiceStat {
RequestLatency := prometheus.NewSummaryVec(prometheus.SummaryOpts{
Namespace: stats.Namespace,
Name: "latency",
Help: "Latency statistics for dkv service",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
MaxAge: 10 * time.Second,
Namespace: stats.Namespace,
Name: "latency",
Help: "Latency statistics for dkv service",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
MaxAge: 10 * time.Second,
ConstLabels: stats.ConstLabels,
}, []string{"Ops"})
ResponseError := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: stats.Namespace,
Name: "error",
Help: "Error count for storage operations",
Namespace: stats.Namespace,
Name: "error",
Help: "Error count for storage operations",
ConstLabels: stats.ConstLabels,
}, []string{"Ops"})
registry.MustRegister(RequestLatency, ResponseError)
return &dkvServiceStat{RequestLatency, ResponseError}
Expand Down
3 changes: 2 additions & 1 deletion internal/opts/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ import (
type Config struct {

// region level configuration.
DisklessMode bool `mapstructure:"diskless" desc:"Enables badger diskless mode where data is stored entirely in memory. "`
NodeName string `mapstructure:"node-name" desc:"Node Name"`
ClusterName string `mapstructure:"cluster-name" desc:"Cluster Name"`
DisklessMode bool `mapstructure:"diskless" desc:"Enables badger diskless mode where data is stored entirely in memory. "`
DbEngine string `mapstructure:"db-engine" desc:"Underlying DB engine for storing data - badger|rocksdb"`
DbEngineIni string `mapstructure:"db-engine-ini" desc:"An .ini file for configuring the underlying storage engine. Refer badger.ini or rocks.ini for more details."`
DbRole string `mapstructure:"role" desc:"Role of the node - master|slave|standalone|discovery"`
Expand Down
30 changes: 17 additions & 13 deletions internal/slave/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,25 +83,29 @@ type stat struct {

func newStat(registry prometheus.Registerer) *stat {
replicationLag := prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: stats.Namespace,
Name: "slave_replication_lag",
Help: "replication lag of the slave",
Namespace: stats.Namespace,
Name: "slave_replication_lag",
Help: "replication lag of the slave",
ConstLabels: stats.ConstLabels,
})
replicationDelay := prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: stats.Namespace,
Name: "slave_replication_delay",
Help: "replication delay of the slave",
Namespace: stats.Namespace,
Name: "slave_replication_delay",
Help: "replication delay of the slave",
ConstLabels: stats.ConstLabels,
})
replicationStatus := prometheus.NewSummaryVec(prometheus.SummaryOpts{
Namespace: stats.Namespace,
Name: "slave_replication_status",
Help: "replication status of the slave",
MaxAge: 5 * time.Second,
Namespace: stats.Namespace,
Name: "slave_replication_status",
Help: "replication status of the slave",
MaxAge: 5 * time.Second,
ConstLabels: stats.ConstLabels,
}, []string{"masterAddr"})
replicationSpeed := prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: stats.Namespace,
Name: "slave_replication_speed",
Help: "replication speed of the slave",
Namespace: stats.Namespace,
Name: "slave_replication_speed",
Help: "replication speed of the slave",
ConstLabels: stats.ConstLabels,
})
registry.MustRegister(replicationLag, replicationDelay, replicationSpeed, replicationStatus)
return &stat{
Expand Down
9 changes: 8 additions & 1 deletion internal/stats/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (

type promethousRegistry struct{}

var ConstLabels prometheus.Labels

func (*promethousRegistry) Register(c prometheus.Collector) error {
return prometheus.DefaultRegisterer.Register(c)
}
Expand All @@ -29,7 +31,11 @@ func (*promethousRegistry) Unregister(c prometheus.Collector) bool {
return prometheus.DefaultRegisterer.Unregister(c)
}

func NewPromethousRegistry() prometheus.Registerer {
func NewPromethousRegistry(tags []Tag) prometheus.Registerer {
ConstLabels = map[string]string{}
for _, tag := range tags {
ConstLabels[tag.key] = tag.val
}
return &promethousRegistry{}
}

Expand All @@ -44,6 +50,7 @@ func (*noopClient) Unregister(collector prometheus.Collector) bool {
}

func NewPromethousNoopRegistry() prometheus.Registerer {
ConstLabels = map[string]string{}
return &noopClient{}
}

Expand Down
61 changes: 32 additions & 29 deletions internal/storage/badger/metrics.go
Original file line number Diff line number Diff line change
@@ -1,79 +1,82 @@
package badger

import "github.com/prometheus/client_golang/prometheus"
import (
"github.com/flipkart-incubator/dkv/internal/stats"
"github.com/prometheus/client_golang/prometheus"
)

// NewBadgerCollector returns a prometheus Collector for Badger metrics from expvar.
func (bdb *badgerDB) metricsCollector() {
collector := prometheus.NewExpvarCollector(map[string]*prometheus.Desc{
"badger_v3_disk_reads_total": prometheus.NewDesc(
"badger_disk_reads_total",
prometheus.BuildFQName(stats.Namespace, "badger", "disk_reads_total"),
"Number of cumulative reads by Badger",
nil, nil,
nil, stats.ConstLabels,
),
"badger_v3_disk_writes_total": prometheus.NewDesc(
"badger_disk_writes_total",
prometheus.BuildFQName(stats.Namespace, "badger", "disk_writes_total"),
"Number of cumulative writes by Badger",
nil, nil,
nil, stats.ConstLabels,
),
"badger_v3_read_bytes": prometheus.NewDesc(
"badger_read_bytes",
prometheus.BuildFQName(stats.Namespace, "badger", "read_bytes"),
"Number of cumulative bytes read by Badger",
nil, nil,
nil, stats.ConstLabels,
),
"badger_v3_written_bytes": prometheus.NewDesc(
"badger_written_bytes",
prometheus.BuildFQName(stats.Namespace, "badger", "written_bytes"),
"Number of cumulative bytes written by Badger",
nil, nil,
nil, stats.ConstLabels,
),
"badger_v3_lsm_level_gets_total": prometheus.NewDesc(
"badger_lsm_level_gets_total",
prometheus.BuildFQName(stats.Namespace, "badger", "lsm_level_gets_total"),
"Total number of LSM gets",
[]string{"level"}, nil,
[]string{"level"}, stats.ConstLabels,
),
"badger_v3_lsm_bloom_hits_total": prometheus.NewDesc(
"badger_lsm_bloom_hits_total",
prometheus.BuildFQName(stats.Namespace, "badger", "lsm_bloom_hits_total"),
"Total number of LSM bloom hits",
[]string{"level"}, nil,
[]string{"level"}, stats.ConstLabels,
),
"badger_v3_gets_total": prometheus.NewDesc(
"badger_gets_total",
prometheus.BuildFQName(stats.Namespace, "badger", "gets_total"),
"Total number of gets",
nil, nil,
nil, stats.ConstLabels,
),
"badger_v3_puts_total": prometheus.NewDesc(
"badger_puts_total",
prometheus.BuildFQName(stats.Namespace, "badger", "puts_total"),
"Total number of puts",
nil, nil,
nil, stats.ConstLabels,
),
"badger_v3_blocked_puts_total": prometheus.NewDesc(
"badger_blocked_puts_total",
prometheus.BuildFQName(stats.Namespace, "badger", "blocked_puts_total"),
"Total number of blocked puts",
nil, nil,
nil, stats.ConstLabels,
),
"badger_v3_memtable_gets_total": prometheus.NewDesc(
"badger_memtable_gets_total",
prometheus.BuildFQName(stats.Namespace, "badger", "memtable_gets_total"),
"Total number of memtable gets",
nil, nil,
nil, stats.ConstLabels,
),
"badger_v3_lsm_size_bytes": prometheus.NewDesc(
"badger_lsm_size_bytes",
prometheus.BuildFQName(stats.Namespace, "badger", "lsm_size_bytes"),
"Size of the LSM in bytes",
[]string{"dir"}, nil,
[]string{"dir"}, stats.ConstLabels,
),
"badger_v3_vlog_size_bytes": prometheus.NewDesc(
"badger_vlog_size_bytes",
prometheus.BuildFQName(stats.Namespace, "badger", "vlog_size_bytes"),
"Size of the value log in bytes",
[]string{"dir"}, nil,
[]string{"dir"}, stats.ConstLabels,
),
"badger_v3_pending_writes_total": prometheus.NewDesc(
"badger_pending_writes_total",
prometheus.BuildFQName(stats.Namespace, "badger", "pending_writes_total"),
"Total number of pending writes",
[]string{"dir"}, nil,
[]string{"dir"}, stats.ConstLabels,
),
"badger_v3_compactions_current": prometheus.NewDesc(
"badger_compactions_current",
prometheus.BuildFQName(stats.Namespace, "badger", "compactions_current"),
"Number of tables being actively compacted",
nil, nil,
nil, stats.ConstLabels,
),
})

Expand Down
23 changes: 12 additions & 11 deletions internal/storage/rocksdb/metrics.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package rocksdb

import (
"github.com/flipkart-incubator/dkv/internal/stats"
"github.com/flipkart-incubator/gorocksdb"
"github.com/prometheus/client_golang/prometheus"
"go.uber.org/zap"
Expand All @@ -18,29 +19,29 @@ type rocksDBCollector struct {
func newRocksDBCollector(rdb *rocksDB) *rocksDBCollector {
return &rocksDBCollector{
memTableTotalGauge: prometheus.NewDesc(
prometheus.BuildFQName("rocksdb", "", "memory_usage_memtable_total"),
prometheus.BuildFQName(stats.Namespace, "rocksdb", "memory_usage_memtable_total"),
"Rocksdb MemTableTotal estimates memory usage of all mem-tables",
nil, nil),
nil, stats.ConstLabels),
memTableUnflushedGauge: prometheus.NewDesc(
prometheus.BuildFQName("rocksdb", "", "memory_usage_memtable_unflushed"),
prometheus.BuildFQName(stats.Namespace, "rocksdb", "memory_usage_memtable_unflushed"),
"Rocksdb MemTableUnflushed estimates memory usage of unflushed mem-tables",
nil, nil),
nil, stats.ConstLabels),
memTableReadersTotalGauge: prometheus.NewDesc(
prometheus.BuildFQName("rocksdb", "", "memory_usage_memtable_readers_total"),
prometheus.BuildFQName(stats.Namespace, "rocksdb", "memory_usage_memtable_readers_total"),
"Rocksdb MemTableReadersTotal memory usage of table readers (indexes and bloom filters)",
nil, nil),
nil, stats.ConstLabels),
cacheTotalGauge: prometheus.NewDesc(
prometheus.BuildFQName("rocksdb", "", "memory_usage_cache_total"),
prometheus.BuildFQName(stats.Namespace, "rocksdb", "memory_usage_cache_total"),
"Rocksdb CacheTotal memory usage of cache",
nil, nil),
nil, stats.ConstLabels),
db: rdb.db,
lgr: rdb.opts.lgr,
}

}

//Each and every collector must implement the Describe function.
//It essentially writes all descriptors to the prometheus desc channel.
// Each and every collector must implement the Describe function.
// It essentially writes all descriptors to the prometheus desc channel.
func (collector *rocksDBCollector) Describe(ch chan<- *prometheus.Desc) {
//Update this section with the each metric you create for a given collector
ch <- collector.memTableTotalGauge
Expand All @@ -49,7 +50,7 @@ func (collector *rocksDBCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- collector.cacheTotalGauge
}

//Collect implements required collect function for all promehteus collectors
// Collect implements required collect function for all promehteus collectors
func (collector *rocksDBCollector) Collect(ch chan<- prometheus.Metric) {
memoryUsage, err := gorocksdb.GetApproximateMemoryUsageByType([]*gorocksdb.DB{collector.db}, nil)
if err != nil {
Expand Down
18 changes: 10 additions & 8 deletions internal/storage/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,18 @@ type Stat struct {

func NewStat(registry prometheus.Registerer, engine string) *Stat {
RequestLatency := prometheus.NewSummaryVec(prometheus.SummaryOpts{
Namespace: stats.Namespace,
Name: fmt.Sprintf("storage_latency_%s", engine),
Help: fmt.Sprintf("Latency statistics for %s storage operations", engine),
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
MaxAge: 10 * time.Second,
Namespace: stats.Namespace,
Name: fmt.Sprintf("storage_latency_%s", engine),
Help: fmt.Sprintf("Latency statistics for %s storage operations", engine),
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
MaxAge: 10 * time.Second,
ConstLabels: stats.ConstLabels,
}, []string{stats.Ops})
ResponseError := prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: stats.Namespace,
Name: fmt.Sprintf("storage_error_%s", engine),
Help: fmt.Sprintf("Error count for %s storage operations", engine),
Namespace: stats.Namespace,
Name: fmt.Sprintf("storage_error_%s", engine),
Help: fmt.Sprintf("Error count for %s storage operations", engine),
ConstLabels: stats.ConstLabels,
}, []string{stats.Ops})
registry.MustRegister(RequestLatency, ResponseError)
return &Stat{RequestLatency, ResponseError}
Expand Down

0 comments on commit 97559c2

Please sign in to comment.