Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metrics reporting #53

Merged
merged 3 commits into from
Oct 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
bin/
.idea/
.idea/
.DS_Store
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ List of remco contributors and authors; these are the copyright holders
for remco, referred to as The Remco Authors.

Rene Kaufmann <[email protected]>
Rashit Azizbaev <[email protected]>
2 changes: 2 additions & 0 deletions cmd/remco/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/HeavyHorst/remco/pkg/backends"
"github.com/HeavyHorst/remco/pkg/backends/plugin"
"github.com/HeavyHorst/remco/pkg/log"
"github.com/HeavyHorst/remco/pkg/telemetry"
"github.com/HeavyHorst/remco/pkg/template"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -60,6 +61,7 @@ type Configuration struct {
PidFile string `toml:"pid_file"`
LogFile string `toml:"log_file"`
Resource []Resource
Telemetry telemetry.Telemetry
}

type DefaultBackends struct {
Expand Down
16 changes: 16 additions & 0 deletions cmd/remco/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"testing"

"github.com/HeavyHorst/remco/pkg/backends"
"github.com/HeavyHorst/remco/pkg/telemetry"
"github.com/HeavyHorst/remco/pkg/template"

. "gopkg.in/check.v1"
Expand Down Expand Up @@ -45,6 +46,11 @@ const (
watch = false
interval = 1

[telemetry]
enabled = true
[telemetry.sinks.prometheus]
addr = ":2112"
expiration = 600
`
resourceFile string = `
[[template]]
Expand Down Expand Up @@ -97,6 +103,16 @@ var expected = Configuration{
Backends: expectedBackend,
},
},
Telemetry: telemetry.Telemetry{
Enabled: true,
ServiceName: "",
Sinks: telemetry.Sinks{
Prometheus: &telemetry.PrometheusSink{
Addr: ":2112",
Expiration: 600,
},
},
},
}

// Hook up gocheck into the "go test" runner.
Expand Down
17 changes: 16 additions & 1 deletion cmd/remco/supervisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"time"

"github.com/HeavyHorst/remco/pkg/log"
"github.com/HeavyHorst/remco/pkg/telemetry"
"github.com/HeavyHorst/remco/pkg/template"
"github.com/pborman/uuid"
"github.com/pkg/errors"
Expand All @@ -38,7 +39,8 @@ type Supervisor struct {
signalChans map[string]chan os.Signal
signalChansMutex sync.RWMutex

pidFile string
pidFile string
telemetry telemetry.Telemetry

reapLock *sync.RWMutex
}
Expand All @@ -53,6 +55,7 @@ func NewSupervisor(cfg Configuration, reapLock *sync.RWMutex, done chan struct{}
}

w.pidFile = cfg.PidFile
w.telemetry = cfg.Telemetry
pid := os.Getpid()
err := w.writePid(pid)
if err != nil {
Expand All @@ -62,6 +65,10 @@ func NewSupervisor(cfg Configuration, reapLock *sync.RWMutex, done chan struct{}
stopChan := make(chan struct{})
stoppedChan := make(chan struct{})

_, err = w.telemetry.Init()
if err != nil {
log.Error(fmt.Sprintf("error starting telemetry: %v", err))
}
go w.runResource(cfg.Resource, stopChan, stoppedChan)
w.wg.Add(1)
go func() {
Expand All @@ -85,6 +92,14 @@ func NewSupervisor(cfg Configuration, reapLock *sync.RWMutex, done chan struct{}
log.WithFields(logrus.Fields{"pid_file": w.pidFile}).Error(err)
}
}
err = w.telemetry.Stop()
if err != nil {
log.Error(fmt.Sprintf("error stopping telemetry: %v", err))
}
_, err = rs.c.Telemetry.Init()
if err != nil {
log.Error(fmt.Sprintf("error starting telemetry: %v", err))
}
stopChan <- struct{}{}
<-stoppedChan
go w.runResource(rs.c.Resource, stopChan, stoppedChan)
Expand Down
8 changes: 8 additions & 0 deletions cmd/remco/supervisor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"os"

"github.com/HeavyHorst/remco/pkg/backends"
"github.com/HeavyHorst/remco/pkg/telemetry"
"github.com/HeavyHorst/remco/pkg/template"

. "gopkg.in/check.v1"
Expand Down Expand Up @@ -48,6 +49,11 @@ var exampleConfiguration = Configuration{
Backends: exampleBackend,
},
},
Telemetry: telemetry.Telemetry{
Enabled: true,
ServiceName: "test",
Sinks: telemetry.Sinks{},
},
}

type RunnerTestSuite struct {
Expand All @@ -66,6 +72,7 @@ func (s *RunnerTestSuite) TestNew(t *C) {
t.Check(s.runner.signalChans, NotNil)
t.Check(s.runner.reapLock, IsNil)
t.Check(s.runner.pidFile, Equals, "/tmp/remco_test.pid")
t.Check(s.runner.telemetry, DeepEquals, exampleConfiguration.Telemetry)
}

func (s *RunnerTestSuite) TestWritePid(t *C) {
Expand Down Expand Up @@ -94,6 +101,7 @@ func (s *RunnerTestSuite) TestSignalChan(t *C) {
func (s *RunnerTestSuite) TestReload(t *C) {
new := exampleConfiguration
new.PidFile = "/tmp/remco_test2.pid"
new.Telemetry.ServiceName = "test2"
s.runner.Reload(new)
}

Expand Down
40 changes: 40 additions & 0 deletions docs/content/config/configuration-options.md
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,43 @@ See the example configuration to see how global default values can be set for in
- **srv_record(string, optional):**
- A DNS server record to discover the zookeeper nodes.
</details>

## Telemetry configuration options
- **enabled(bool):**
- Flag to enable telemetry.
- **service_name(string):**
- Service name to add to every metric name. "remco" by default

## Sink configuration options

<details>
<summary> **inmem** </summary>

- **interval(int):**
- How long is each aggregation interval (seconds).
- **retain(int):**
- Retain controls how many metrics interval we keep.
</details>

<details>
<summary> **prometheus** </summary>

- **addr(string):**
- Address to expose metrics on. Prometheus stats will be available at /metrics endpoint.
- **expiration(int):**
- Expiration is the duration a metric is valid for, after which it will be untracked. If the value is zero, a metric is never expired.
</details>

<details>
<summary> **statsd** </summary>

- **addr(string):**
- Statsd/Statsite server address
</details>

<details>
<summary> **statsite** </summary>

- **addr(string):**
- Statsd/Statsite server address
</details>
2 changes: 1 addition & 1 deletion docs/content/config/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ chapter: true
date: 2016-12-03T14:37:29+01:00
icon: <b>2. </b>
next: /config/environment-variables/
prev: /details/process-lifecycle/
prev: /details/telemetry/
title: Configuration
weight: 0
---
Expand Down
8 changes: 8 additions & 0 deletions docs/content/config/sample-config.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,5 +72,13 @@ log_file = "/var/log/remco.log"
watch = true
keys = ["/prefix"]

################################################################
# Telemetry configuration
################################################################
[telemetry]
enabled = true
[telemetry.sinks.prometheus]
addr = ":2112"
expiration = 600
```

2 changes: 1 addition & 1 deletion docs/content/details/process-lifecycle.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
date: 2016-12-03T14:33:41+01:00
next: /config/
next: /details/telemetry/
prev: /details/plugins/
title: process lifecycle
toc: true
Expand Down
42 changes: 42 additions & 0 deletions docs/content/details/telemetry.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
---
title: "Telemetry"
date: 2020-09-05T21:12:31+03:00
next: /config/
prev: /details/process-lifecycle/
toc: true
weight: 50
---

Remco can expose different metrics about it's state using [go-metrics](https://github.com/armon/go-metrics).
You can configure any type of sink supported by go-metric using configuration file.
All the configured sinks will be aggregated using FanoutSink.


Currently supported sinks are:

- **inmem**
- **prometheus**
- **statsd**
- **statsite**

The different coniguration parameters can be found here: [telemetry configuration](/config/configuration-options/#telemetry-configuration-options).

Exposed metrics:
- **files.template_execution_duration**
- Duration of template execution
- **files.check_command_duration**
- Duration of check_command execution
- **files.reload_command_duration**
- Duration of reload_command execution
- **files.stage_errors_total**
- Total number of errors in file staging action
- **files.staged_total**
- Total number of successfully files staged
- **files.sync_errors_total**
- Total number of errors in file syncing action
- **files.synced_total**
- Total number of successfully files synced
- **backends.sync_errors_total**
- Total errors in backend sync action
- **backends.synced_total**
- Total number of successfully synced backends
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/HeavyHorst/easykv v1.2.5
github.com/HeavyHorst/memkv v1.0.1
github.com/HeavyHorst/pongo2 v3.3.0+incompatible
github.com/armon/go-metrics v0.3.4
github.com/dlclark/regexp2 v1.2.0 // indirect
github.com/dop251/goja v0.0.0-20190912223329-aa89e6a4c733
github.com/ghodss/yaml v1.0.0
Expand All @@ -24,6 +25,7 @@ require (
github.com/onsi/gomega v1.7.1 // indirect
github.com/pborman/uuid v1.2.0
github.com/pkg/errors v0.8.1
github.com/prometheus/client_golang v1.4.0
github.com/sirupsen/logrus v1.4.2
github.com/x-cray/logrus-prefixed-formatter v0.5.2
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15
Expand Down
Loading