Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add health check #27

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
irccat
irccat.json
health.timestamp
4 changes: 4 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,12 @@ RUN CGO_ENABLED=0 go get -t -v ./... && go build -a .
# Step two: copy over the binary and root certs
FROM scratch
COPY --from=build /go/bin/irccat /irccat
COPY --from=build /go/bin/healthcheck /healthcheck
COPY --from=build /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt

# Docker-only (no OCI)
HEALTHCHECK --interval=60s --timeout=10s --start-period=600s CMD ["/healthcheck"]

EXPOSE 12345
EXPOSE 8045

Expand Down
7 changes: 5 additions & 2 deletions examples/irccat.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
"irccat": "#irccat-dev"
}
}
}
},
"health_endpoint": "/healthz"
},
"irc": {
"server": "irc.example.com:6697",
Expand All @@ -30,7 +31,9 @@
"sasl_login": "",
"sasl_pass": "",
"channels": ["#channel"],
"keys": {"#channel": "join_key"}
"keys": {"#channel": "join_key"},
"health_file": "./health.timestamp",
"health_period": "5m"
},
"commands": {
"auth_channel": "#channel",
Expand Down
43 changes: 43 additions & 0 deletions healthcheck/healthcheck.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Command healthcheck reads a file containing a UNIX UTC timestamp and exits
// nonzero after printing a message to stderr if the allotted time has elapsed
// since a value was last recorded. Config value irc.health_period specifies
// the max allotted time plus a small cushion for lag. It defaults to fifteen
// minutes, which is go-ircevent's default PING frequency (as of
// github.com/thoj/[email protected]). Optional
// config item irc.health_file designates the path to the timestamp file. If
// unset, the program says so (to stderr) and exits zero.
package main

import (
"log"
"time"

"github.com/irccloud/irccat/util"
"github.com/spf13/viper"
)

var lagInterval = 10 * time.Second // for testing
var defaultPeriod = 15 * time.Minute

func main() {
viper.SetConfigName("irccat")
viper.AddConfigPath("/etc")
viper.AddConfigPath(".")
viper.SetDefault("irc.health_period", defaultPeriod)

if err := viper.ReadInConfig(); err != nil {
log.Fatalln("Error reading config file. Exiting.")
}

healthFile := viper.GetString("irc.health_file")
if healthFile == "" {
log.Println("Config option irc.health_file unset; exiting.")
return
}

freq := lagInterval + viper.GetDuration("irc.health_period")

if err := util.CheckTimestamp(healthFile, freq); err != nil {
log.Fatalln(err.Error())
}
}
44 changes: 44 additions & 0 deletions httplistener/health.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package httplistener

import (
"fmt"
"net/http"
"time"

"github.com/irccloud/irccat/util"
"github.com/spf13/viper"
)

var LagInterval = 10 * time.Second // for testing
var DefaultPeriod = 15 * time.Minute

// healthHandler returns non-2xx if the configured timeout has elapsed.
//
// This mainly exists to present an interface for supervisors relying on
// liveliness/readiness probes (e.g., for Kubernetes deployments). However, a
// conservative client could query it before sending a payload. See also
// /healthcheck in this same pkg.
func (hl *HTTPListener) healthHandler(
w http.ResponseWriter,
request *http.Request,
) {
if request.Method != "GET" {
http.NotFound(w, request)
return
}
healthFile := viper.GetString("irc.health_file")
if healthFile == "" {
http.NotFound(w, request)
return
}
viper.SetDefault("irc.health_period", DefaultPeriod)

freq := LagInterval + viper.GetDuration("irc.health_period")
err := util.CheckTimestamp(healthFile, freq)
if err != nil {
log.Criticalf("%s", err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
fmt.Fprintln(w, "OK")
}
88 changes: 88 additions & 0 deletions httplistener/health_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package httplistener

import (
"bytes"
"context"
"fmt"
"io"
"path"
"testing"
"time"

"net/http"

"github.com/irccloud/irccat/util"
"github.com/juju/loggo"
"github.com/spf13/viper"
)

var origLag = LagInterval

var configFmt = `---
irc:
health_file: %s
health_period: 1ms
http:
health_endpoint: /testing/healthz
`

func startServer(t *testing.T) {
hl := HTTPListener{
http: http.Server{Addr: "localhost:18045"},
}
http.HandleFunc(viper.GetString("http.health_endpoint"), hl.healthHandler)
go hl.http.ListenAndServe()
t.Cleanup(func() {hl.http.Shutdown(context.Background())})
}

func getOne(t *testing.T) (*http.Response, string) {
res, err := http.Get("http://localhost:18045/testing/healthz")
if err != nil {
t.Error(err)
}
got, err := io.ReadAll(res.Body)
if err != nil {
t.Error(err)
}
err = res.Body.Close()
if err != nil {
t.Error(err)
}
return res, string(got)
}

func TestHealthHandler(t *testing.T) {
writer, err := loggo.RemoveWriter("default")
if err != nil {
t.Error(err)
}
t.Cleanup(func() {loggo.DefaultContext().AddWriter("default", writer)})
LagInterval = 0
t.Cleanup(func() {LagInterval = origLag})
dir := t.TempDir()
now := time.Now()
file := path.Join(dir, "timestamp")
if err := util.WriteTimestamp(file, now); err != nil {
t.Error(err)
}
viper.SetConfigType("yaml")
config := []byte(fmt.Sprintf(configFmt, file))
viper.ReadConfig(bytes.NewBuffer(config))
startServer(t)
time.Sleep(time.Millisecond)
// Fail
resp, got := getOne(t)
if resp.StatusCode != 500 {
t.Error("unexpected status", resp.Status)
}
t.Log(resp.Status, got)
// Success
viper.Set("irc.health_period", time.Second)
resp, got = getOne(t)
if resp.StatusCode != 200 {
t.Error("unexpected failure", resp.Status)
}
if string(got) != "OK\n" {
t.Error("unexpected output", string(got))
}
}
6 changes: 6 additions & 0 deletions httplistener/httplistener.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ func New(irc *irc.Connection) (*HTTPListener, error) {
mux.HandleFunc("/prometheus", hl.prometheusHandler)
}

if viper.IsSet("http.health_endpoint") {
ep := viper.GetString("http.health_endpoint")
log.Infof("Listening for HTTP GET requests at", ep)
mux.HandleFunc(ep, hl.healthHandler)
}

hl.http.Handler = mux
if viper.GetBool("http.tls") {
go hl.http.ListenAndServeTLS(viper.GetString("http.tls_cert"), viper.GetString("http.tls_key"))
Expand Down
4 changes: 4 additions & 0 deletions irc.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ func (i *IRCCat) connectIRC(debug bool) error {
irccon.Timeout = time.Second * 15
irccon.RequestCaps = []string{"away-notify", "account-notify", "draft/message-tags-0.2"}
irccon.UseTLS = viper.GetBool("irc.tls")
if viper.IsSet("irc.health_period") {
irccon.PingFreq = viper.GetDuration("irc.health_period")
}

if viper.IsSet("irc.sasl_pass") && viper.GetString("irc.sasl_pass") != "" {
if viper.IsSet("irc.sasl_login") && viper.GetString("irc.sasl_login") != "" {
Expand Down Expand Up @@ -52,6 +55,7 @@ func (i *IRCCat) connectIRC(debug bool) error {
irccon.AddCallback("QUIT", i.handleQuit)
irccon.AddCallback("KILL", i.handleQuit)
irccon.AddCallback("NICK", i.handleNick)
irccon.AddCallback("PONG", i.handlePong)

return nil
}
Expand Down
16 changes: 16 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package main
import (
"flag"
"fmt"
"time"

"github.com/deckarep/golang-set"
"github.com/fsnotify/fsnotify"
"github.com/irccloud/irccat/httplistener"
Expand All @@ -13,6 +15,7 @@ import (
"os"
"os/signal"
"syscall"
"github.com/irccloud/irccat/util"
)

var log = loggo.GetLogger("main")
Expand Down Expand Up @@ -106,3 +109,16 @@ func (i *IRCCat) handleConfigChange(e fsnotify.Event) {
i.channels.Remove(channel)
}
}

// handlePong writes the current time in UNIX seconds to a designated file.
// Opt in by specifying a file path for config item irc.health_file.
func (i *IRCCat) handlePong(e *irc.Event) {
healthFile := viper.GetString("irc.health_file")
if healthFile == "" {
return
}
err:= util.WriteTimestamp(healthFile, time.Now())
if err != nil {
log.Criticalf("%s", err)
}
}
53 changes: 53 additions & 0 deletions util/timestamp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package util

import (
"fmt"
"os"
"strconv"
"strings"
"time"
)

func isExpired(lastStamp int64, period time.Duration) bool {
return time.Now().Sub(time.Unix(lastStamp, 0)) > period
}

func getTimestamp(tsFile string) (int64, error) {
raw, err := os.ReadFile(tsFile)
if err != nil {
return 0, fmt.Errorf("Couldn't read timestamp file: %s", err)
}
s := strings.TrimRight(string(raw), "\n")
ts, err := strconv.ParseInt(s, 10, 64)
if err != nil {
// Parsing error already includes offending text
return 0, fmt.Errorf("Couldn't parse timestamp: %s", err)
}
return ts, nil
}

// WriteTimestamp creates a timestamp file.
func WriteTimestamp(tsFile string, t time.Time) error {
s := fmt.Sprintf("%d\n", t.Unix())
err := os.WriteFile(tsFile, []byte(s), 0666)
if err != nil {
return fmt.Errorf("Couldn't write to timestamp file: %s", err)
}
return nil
}

// CheckTimestamp reads a timestamp file and returns an error if it's expired.
//
// The file should contain a decimal representation of a Unix timestamp and an
// optional LF newline.
func CheckTimestamp(tsFile string, period time.Duration) error {
ts, err := getTimestamp(tsFile)
if err != nil {
return err
}
if isExpired(ts, period) {
diff := time.Now().Sub(time.Unix(ts, 0).Add(period))
return fmt.Errorf("Timeout exceeded by %s", diff.String())
}
return nil
}
49 changes: 49 additions & 0 deletions util/timestamp_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package util

import (
"os"
"path"
"testing"
"time"
)

func TestIsExpired(t *testing.T) {
ts := time.Now().Unix()
if isExpired(ts, time.Second) {
t.Error("Shouldn't have expired")
}
if !isExpired(ts, time.Microsecond) {
t.Error("Should've expired")
}
}

func TestCheckTimestamp(t *testing.T) {
dir := t.TempDir()
now := time.Now()
file := path.Join(dir, "timestamp")
WriteTimestamp(file, now)
if err := CheckTimestamp(file, time.Second); err != nil {
t.Error(err)
}
err := CheckTimestamp(file, time.Microsecond)
if err == nil {
t.Error("Did not raise timeout")
}
t.Log(err)
fake := path.Join(dir, "fake")
// File missing
err = CheckTimestamp(fake, time.Second)
if err == nil {
t.Error("Did not raise missing file error")
}
t.Log(err)
// Wrong format
if err := os.WriteFile(fake, []byte{':', '('}, 0600); err != nil {
t.Error(err)
}
err = CheckTimestamp(fake, time.Second)
if err == nil {
t.Error("Did not raise bad parse error")
}
t.Log(err)
}