Skip to content

Commit

Permalink
feat: add dead man's switch for td app itself (#67)
Browse files Browse the repository at this point in the history
  • Loading branch information
czarcas7ic authored Jul 6, 2023
1 parent dc491c9 commit fb37574
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 7 deletions.
8 changes: 8 additions & 0 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@ chains:
| `telegram.api_key` | API key ... talk to @BotFather. More setup info in the [telegram doc](telegram.md). |
| `telegram.channel` | See the [telegram doc](telegram.md) for how to get this value. |

## Health Check Settings

| Config Setting | Description |
|-------------------------|-------------------------------------------------------------------------------------|
| `healthcheck.enabled` | Send pings to determine if the monitor is running? |
| `healthcheck.ping_url` | URL to send pings to. |
| `healthcheck.ping_rate` | Rate in which pings are sent in seconds. |

## Chain Specific Settings

*This section can be repeated for monitoring multiple chains.*
Expand Down
17 changes: 12 additions & 5 deletions example-config.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
---

# controls whether the dashboard is enabled.
enable_dashboard: yes
# What TCP port the dashboard will listen on. Only the port is controllable for now.
Expand Down Expand Up @@ -39,7 +38,7 @@ telegram:
# Alert via telegram? Note: also supersedes chain-specific settings
enabled: no
# API key ... talk to @BotFather
api_key: '5555555555:AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
api_key: "5555555555:AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
# The group ID for the chat where messages will be sent. Google how to find this, will include better info later.
channel: "-666666666"

Expand All @@ -50,10 +49,18 @@ slack:
# The webhook can be added in the Slack app directory.
webhook: https://hooks.slack.com/services/AAAAAAAAAAAAAAAAAAAAAAA/bbbbbbbbbbbbbbbbbbbbbbbb

# Healthcheck settings (dead man's switch)
healthcheck:
# Send pings to determine if the monitor is running?
enabled: no
# URL to send pings to.
ping_url: https://hc-ping.com/aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee
# Rate in which pings are sent in seconds.
ping_rate: 60

# The various chains to be monitored. Create a new entry for each chain. The name itself can be arbitrary, but a
# user-friendly name is recommended.
chains:

# The user-friendly name that will be used for labels. Highly suggest wrapping in quotes.
"Osmosis":
# chain_id is validated for a match when connecting to an RPC endpoint, also used as a label in several places.
Expand Down Expand Up @@ -115,8 +122,8 @@ chains:

# Slack settings
slack:
enabled: yes
webhook: "" # uses default if blank
enabled: yes
webhook: "" # uses default if blank

# This section covers our RPC providers. No LCD (aka REST) endpoints are used, only TM's RPC endpoints
# Multiple hosts are encouraged, and will be tried sequentially until a working endpoint is discovered.
Expand Down
27 changes: 25 additions & 2 deletions td2/rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@ import (
"context"
"errors"
"fmt"
dash "github.com/blockpane/tenderduty/v2/td2/dashboard"
rpchttp "github.com/tendermint/tendermint/rpc/client/http"
"io"
"net/http"
"net/url"
"regexp"
"time"

dash "github.com/blockpane/tenderduty/v2/td2/dashboard"
rpchttp "github.com/tendermint/tendermint/rpc/client/http"
)

// newRpc sets up the rpc client used for monitoring. It will try nodes in order until a working node is found.
Expand Down Expand Up @@ -208,6 +209,28 @@ func (cc *ChainConfig) monitorHealth(ctx context.Context, chainName string) {
}
}

func (c *Config) pingHealthcheck() {
if !c.Healthcheck.Enabled {
return
}

ticker := time.NewTicker(c.Healthcheck.PingRate * time.Second)

go func() {
for {
select {
case <-ticker.C:
_, err := http.Get(c.Healthcheck.PingURL)
if err != nil {
l(fmt.Sprintf("❌ Failed to ping healthcheck URL: %s", err.Error()))
} else {
l(fmt.Sprintf("🏓 Successfully pinged healthcheck URL: %s", c.Healthcheck.PingURL))
}
}
}
}()
}

// endpointRex matches the first a tag's hostname and port if present.
var endpointRex = regexp.MustCompile(`//([^/:]+)(:\d+)?`)

Expand Down
5 changes: 5 additions & 0 deletions td2/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ func Run(configFile, stateFile, chainConfigDirectory string, password *string) e
}()
}

// tenderduty health checks:
if td.Healthcheck.Enabled {
td.pingHealthcheck()
}

for k := range td.Chains {
cc := td.Chains[k]

Expand Down
9 changes: 9 additions & 0 deletions td2/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ type Config struct {
Telegram TeleConfig `yaml:"telegram"`
// Slack webhook information
Slack SlackConfig `yaml:"slack"`
// Healthcheck information
Healthcheck HealthcheckConfig `yaml:"healthcheck"`

chainsMux sync.RWMutex // prevents concurrent map access for Chains
// Chains has settings for each validator to monitor. The map's name does not need to match the chain-id.
Expand Down Expand Up @@ -222,6 +224,13 @@ type SlackConfig struct {
Mentions []string `yaml:"mentions"`
}

// HealthcheckConfig holds the information needed to send pings to a healthcheck endpoint
type HealthcheckConfig struct {
Enabled bool `yaml:"enabled"`
PingURL string `yaml:"ping_url"`
PingRate time.Duration `yaml:"ping_rate"`
}

// validateConfig is a non-exhaustive check for common problems with the configuration. Needs love.
func validateConfig(c *Config) (fatal bool, problems []string) {
problems = make([]string, 0)
Expand Down

0 comments on commit fb37574

Please sign in to comment.