diff --git a/cmd/flags.go b/cmd/flags.go index a0c42d3ef..f6ecd2d47 100644 --- a/cmd/flags.go +++ b/cmd/flags.go @@ -40,6 +40,8 @@ const ( flagVersion = "version" flagEnableDebugServer = "enable-debug-server" flagDebugAddr = "debug-addr" + flagEnableMetricsServer = "enable-metrics-server" + flagMetricsAddr = "metrics-addr" flagOverwriteConfig = "overwrite" flagLimit = "limit" flagHeight = "height" @@ -443,6 +445,31 @@ func debugServerFlags(v *viper.Viper, cmd *cobra.Command) *cobra.Command { return cmd } +func metricsServerFlags(v *viper.Viper, cmd *cobra.Command) *cobra.Command { + cmd.Flags().String( + flagMetricsAddr, + "", + "address to use for metrics server. By default, "+ + "will be the metrics-listen-addr parameter in the global config.", + ) + + if err := v.BindPFlag(flagMetricsAddr, cmd.Flags().Lookup(flagMetricsAddr)); err != nil { + panic(err) + } + + cmd.Flags().Bool( + flagEnableMetricsServer, + false, + "enables metrics server. By default, the metrics server is disabled due to security concerns.", + ) + + if err := v.BindPFlag(flagEnableMetricsServer, cmd.Flags().Lookup(flagEnableMetricsServer)); err != nil { + panic(err) + } + + return cmd +} + func processorFlag(v *viper.Viper, cmd *cobra.Command) *cobra.Command { cmd.Flags().StringP(flagProcessor, "p", relayer.ProcessorEvents, "which relayer processor to use") if err := v.BindPFlag(flagProcessor, cmd.Flags().Lookup(flagProcessor)); err != nil { diff --git a/cmd/start.go b/cmd/start.go index 8e1d38d5d..b9f258f82 100644 --- a/cmd/start.go +++ b/cmd/start.go @@ -24,6 +24,7 @@ import ( "strings" "github.com/cosmos/relayer/v2/internal/relaydebug" + "github.com/cosmos/relayer/v2/internal/relayermetrics" "github.com/cosmos/relayer/v2/relayer" "github.com/cosmos/relayer/v2/relayer/chains/cosmos" "github.com/cosmos/relayer/v2/relayer/processor" @@ -92,10 +93,9 @@ $ %s start demo-path2 --max-tx-size 10`, appName, appName, appName, appName)), return err } - var prometheusMetrics *processor.PrometheusMetrics - debugAddr := a.config.Global.APIListenPort + // debug server debugAddrFlag, err := cmd.Flags().GetString(flagDebugAddr) if err != nil { return err @@ -126,8 +126,45 @@ $ %s start demo-path2 --max-tx-size 10`, appName, appName, appName, appName)), } log := a.log.With(zap.String("sys", "debughttp")) log.Info("Debug server listening", zap.String("addr", debugAddr)) + relaydebug.StartDebugServer(cmd.Context(), log, ln) + } + + // metrics server + var prometheusMetrics *processor.PrometheusMetrics + + metricsAddr := a.config.Global.MetricsListenPort + + metricsAddrFlag, err := cmd.Flags().GetString(flagMetricsAddr) + if err != nil { + return err + } + + if metricsAddrFlag != "" { + metricsAddr = metricsAddrFlag + } + + flagEnableMetricsServer, err := cmd.Flags().GetBool(flagEnableMetricsServer) + if err != nil { + return err + } + + if flagEnableMetricsServer == false || metricsAddr == "" { + a.log.Info("Skipping metrics server due to empty metrics address flag") + } else { + ln, err := net.Listen("tcp", metricsAddr) + if err != nil { + a.log.Error( + "Failed to listen on metrics address. If you have another relayer process open, use --" + + flagMetricsAddr + + " to pick a different address.", + ) + + return fmt.Errorf("failed to listen on metrics address %q: %w", debugAddr, err) + } + log := a.log.With(zap.String("sys", "metricshttp")) + log.Info("Metrics server listening", zap.String("addr", metricsAddr)) prometheusMetrics = processor.NewPrometheusMetrics() - relaydebug.StartDebugServer(cmd.Context(), log, ln, prometheusMetrics.Registry) + relayermetrics.StartMetricsServer(cmd.Context(), log, ln, prometheusMetrics.Registry) for _, chain := range chains { if ccp, ok := chain.ChainProvider.(*cosmos.CosmosProvider); ok { ccp.SetMetrics(prometheusMetrics) @@ -195,6 +232,7 @@ $ %s start demo-path2 --max-tx-size 10`, appName, appName, appName, appName)), cmd = updateTimeFlags(a.viper, cmd) cmd = strategyFlag(a.viper, cmd) cmd = debugServerFlags(a.viper, cmd) + cmd = metricsServerFlags(a.viper, cmd) cmd = processorFlag(a.viper, cmd) cmd = initBlockFlag(a.viper, cmd) cmd = flushIntervalFlag(a.viper, cmd) diff --git a/internal/relaydebug/debugserver.go b/internal/relaydebug/debugserver.go index d72721a71..2ae8aae63 100644 --- a/internal/relaydebug/debugserver.go +++ b/internal/relaydebug/debugserver.go @@ -6,8 +6,6 @@ import ( "net/http" "net/http/pprof" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" "go.uber.org/zap" ) @@ -15,7 +13,7 @@ import ( // accepting connections on the given listener. // Any HTTP logging will be written at info level to the given logger. // The server will be forcefully shut down when ctx finishes. -func StartDebugServer(ctx context.Context, log *zap.Logger, ln net.Listener, registry *prometheus.Registry) { +func StartDebugServer(ctx context.Context, log *zap.Logger, ln net.Listener) { // Although we could just import net/http/pprof and rely on the default global server, // we may want many instances of this in test, // and we will probably want more endpoints as time goes on, @@ -33,12 +31,6 @@ func StartDebugServer(ctx context.Context, log *zap.Logger, ln net.Listener, reg // so operators don't see a mysterious 404 page. mux.Handle("/", http.RedirectHandler("/debug/pprof", http.StatusSeeOther)) - // Serve default prometheus metrics - mux.Handle("/metrics", promhttp.Handler()) - - // Serve relayer metrics - mux.Handle("/relayer/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{})) - srv := &http.Server{ Handler: mux, ErrorLog: zap.NewStdLog(log), diff --git a/internal/relayermetrics/metricsserver.go b/internal/relayermetrics/metricsserver.go new file mode 100644 index 000000000..74873a039 --- /dev/null +++ b/internal/relayermetrics/metricsserver.go @@ -0,0 +1,41 @@ +package relayermetrics + +import ( + "context" + "net" + "net/http" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + "go.uber.org/zap" +) + +// StartMetricsServer starts a metrics server in a background goroutine, +// accepting connections on the given listener. +// Any HTTP logging will be written at info level to the given logger. +// The server will be forcefully shut down when ctx finishes. +func StartMetricsServer(ctx context.Context, log *zap.Logger, ln net.Listener, registry *prometheus.Registry) { + // Set up new mux identical to the default mux configuration in net/http/pprof. + mux := http.NewServeMux() + + // Serve default prometheus metrics + mux.Handle("/metrics", promhttp.Handler()) + + // Serve relayer metrics + mux.Handle("/relayer/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{})) + + srv := &http.Server{ + Handler: mux, + ErrorLog: zap.NewStdLog(log), + BaseContext: func(net.Listener) context.Context { + return ctx + }, + } + + go srv.Serve(ln) + + go func() { + <-ctx.Done() + srv.Close() + }() +}