diff --git a/docs/content/configuration.md b/docs/content/configuration.md index 8e1fe9ea..dd967c02 100644 --- a/docs/content/configuration.md +++ b/docs/content/configuration.md @@ -32,7 +32,7 @@ establish authentication credentials. ```sh grr config set grafana.url http://localhost:3000 # URL for the root of your Grafana instance -grr config set grafana.user admin # Optional: Username if using basic auth +grr config set grafana.user admin # (Optional) Username if using basic auth grr config set grafana.token abcd12345 # Service account token (or basic auth password) ``` @@ -40,22 +40,23 @@ grr config set grafana.token abcd12345 # Service account token (or basic auth pa To interact with Grafana Cloud Prometheus (aka Mimir), use these settings: ```sh -grr config set mimir.address https://mimir.example.com # URL for Grafana Cloud Prometheus instance -grr config set mimir.tenant-id 1234567 # Tenant ID for your Grafana Cloud Prometheus account -grr config set mimir.api-key abcdef12345 # Authentication token +grr config set mimir.address https://mimir.example.com # URL for Mimir instance or Grafana Cloud Prometheus instance +grr config set mimir.tenant-id myTenant # Tenant ID for your Grafana Cloud Prometheus account +grr config set mimir.api-key abcdef12345 # Authentication token (if you are using Grafana Cloud) ``` -Note, this will also work with other Cortex installations, alongside Grafana Cloud Prometheus/Mimir. +**Notes** +* Be sure to set `api-key` when you need to interact with Grafana Cloud. ## Grafana Synthetic Monitoring To interact with Grafana Synthetic Monitoring, you must configure the below settings: ```sh grr config set synthetic-monitoring.token abcdef123456 # API key (must have MetricsPublisher permissions) -grr config set synthetic-monitoring.stack-id # Grafana stack ID -grr config set synthetic-monitoring.metrics-id # Metrics instance ID -grr config set synthetic-monitoring.logs-id # Logs instance ID -grr config set synthetic-monitoring.url # Synthetic Monitoring instance URL +grr config set synthetic-monitoring.stack-id 123 # Grafana stack ID +grr config set synthetic-monitoring.metrics-id 123 # Metrics instance ID +grr config set synthetic-monitoring.logs-id 123 # Logs instance ID +grr config set synthetic-monitoring.url https://synthetic-monitoring-api.grafana.net # Synthetic Monitoring instance URL ``` Your stack ID is the number at the end of the url when you view your Grafana instance details, ie. `grafana.com/orgs/myorg/stacks/123456` would be `123456`. Your metrics and logs ID's are the `User` when you view your Prometheus or Loki instance details in Grafana Cloud. You can find your instance URL under your Synthetic Monitoring configuration. @@ -149,13 +150,13 @@ docs](https://grafana.com/docs/grafana/latest/http_api/auth/) for more info. ## Grafana Cloud Prometheus To interact with Grafana Cloud Prometheus, you must have these environment variables set: -| Name | Description | Required | -|--------------------|-----------------------------------------------------|----------| -| `CORTEX_ADDRESS` | URL for Grafana Cloud Prometheus instance | true | -| `CORTEX_TENANT_ID` | Tenant ID for your Grafana Cloud Prometheus account | true | -| `CORTEX_API_KEY` | Authentication token/api key | true | +| Name | Description | Required | +|-------------------|-----------------------------------------------------|----------| +| `MIMIR_ADDRESS` | URL for Grafana Cloud Prometheus instance | true | +| `MIMIR_TENANT_ID` | Tenant ID for your Grafana Cloud Prometheus account | true | +| `MIMIR_API_KEY` | Authentication token/api key | false | -Note, this will also work with other Cortex installations, alongside Grafana Cloud Prometheus. +Note, this will also work with other Mimir installations, alongside Grafana Cloud Prometheus. ## Grafana Synthetic Monitoring To interact with Grafana Synthetic Monitoring, you must have these environment variable set: diff --git a/docs/content/prometheus.md b/docs/content/prometheus.md index d6db4a23..b5393e16 100644 --- a/docs/content/prometheus.md +++ b/docs/content/prometheus.md @@ -9,7 +9,7 @@ local disk. As Grizzly focuses on systems that can be managed via HTTP APIs, Grizzly cannot (currently) work with Prometheus itself. Various hosted Prometheus installations, such as Grafana Cloud Prometheus -are supported, as are systems running Cortex. +are supported, as are systems running Mimir. ## Configuring Prometheus Prometheus alert and recording rules are both created using the same `kind`: diff --git a/pkg/config/config.go b/pkg/config/config.go index 30cf9c84..02ba41cf 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -39,18 +39,37 @@ func override(v *viper.Viper) { "synthetic-monitoring.metrics-id": "GRAFANA_SM_METRICS_ID", "synthetic-monitoring.url": "GRAFANA_SM_URL", - "mimir.address": "CORTEX_ADDRESS", - "mimir.tenant-id": "CORTEX_TENANT_ID", - "mimir.api-key": "CORTEX_API_KEY", + "mimir.address": "MIMIR_ADDRESS", + "mimir.tenant-id": "MIMIR_TENANT_ID", + "mimir.api-key": "MIMIR_API_KEY", } + + // To keep retro compatibility + legacyBindings := map[string]string{ + "MIMIR_ADDRESS": "CORTEX_ADDRESS", + "MIMIR_TENANT_ID": "CORTEX_TENANT_ID", + "MIMIR_API_KEY": "CORTEX_API_KEY", + } + for key, env := range bindings { - val := os.Getenv(env) - if val != "" { + if val := getVal(env, legacyBindings); val != "" { v.Set(key, val) } } } +func getVal(env string, alternativeMap map[string]string) string { + if val := os.Getenv(env); val != "" { + return val + } + + if alternativeMap[env] != "" { + return getVal(alternativeMap[env], nil) + } + + return "" +} + func Read() error { err := viper.ReadInConfig() if err != nil { diff --git a/pkg/mimir/client/http_client.go b/pkg/mimir/client/http_client.go new file mode 100644 index 00000000..ccf341ea --- /dev/null +++ b/pkg/mimir/client/http_client.go @@ -0,0 +1,130 @@ +package client + +import ( + "bytes" + "errors" + "fmt" + "io" + "net/http" + "os" + "strconv" + "time" + + "github.com/grafana/grizzly/pkg/config" + "github.com/grafana/grizzly/pkg/mimir/models" + "gopkg.in/yaml.v3" +) + +var loadRulesEndpoint = "%s/prometheus/config/v1/rules/%s" +var listRulesEndpoint = "%s/prometheus/api/v1/rules" + +type ListGroupResponse struct { + Status string `yaml:"status"` + Data struct { + DataGroups []DataGroups `yaml:"groups"` + } `yaml:"data"` +} + +type DataGroups struct { + Name string `yaml:"name"` + File string `yaml:"file"` + Rules []interface{} `yaml:"rules"` +} + +type Client struct { + config *config.MimirConfig +} + +func NewHTTPClient(config *config.MimirConfig) Mimir { + return &Client{config: config} +} + +func (c *Client) ListRules() (map[string][]models.PrometheusRuleGroup, error) { + url := fmt.Sprintf(listRulesEndpoint, c.config.Address) + res, err := c.doRequest(http.MethodGet, url, nil) + if err != nil { + return nil, err + } + + var response ListGroupResponse + if err := yaml.Unmarshal(res, &response); err != nil { + return nil, err + } + + groups := make(map[string][]models.PrometheusRuleGroup) + for _, g := range response.Data.DataGroups { + groups[g.File] = append(groups[g.File], models.PrometheusRuleGroup{ + Name: g.Name, + Rules: g.Rules, + }) + } + + return groups, nil +} + +func (c *Client) CreateRules(resource models.PrometheusRuleGrouping) error { + url := fmt.Sprintf(loadRulesEndpoint, c.config.Address, resource.Namespace) + for _, group := range resource.Groups { + out, err := yaml.Marshal(group) + if err != nil { + return fmt.Errorf("cannot marshall groups: %s", err) + } + + if _, err = c.doRequest(http.MethodPost, url, out); err != nil { + return fmt.Errorf("error found creating rule group: %s", group.Name) + } + } + + return nil +} + +func (c *Client) doRequest(method string, url string, body []byte) ([]byte, error) { + if c.config.TenantID == "" { + return nil, errors.New("missing tenant-id") + } + req, err := http.NewRequest(method, url, bytes.NewReader(body)) + if err != nil { + return nil, err + } + + req.Header.Set("Content-Type", "application/yaml") + if c.config.APIKey != "" { + req.SetBasicAuth(c.config.TenantID, c.config.APIKey) + } else { + req.Header.Set("X-Scope-OrgID", c.config.TenantID) + } + + client, err := createHTTPClient() + if err != nil { + return nil, err + } + + res, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("request to load rules failed: %s", err) + } + + if res.StatusCode >= 300 { + return nil, fmt.Errorf("error loading rules: %d", res.StatusCode) + } + + b, err := io.ReadAll(res.Body) + if err != nil { + return nil, fmt.Errorf("cannot read response body: %s", err) + } + + return b, nil +} + +func createHTTPClient() (*http.Client, error) { + timeout := 10 * time.Second + // TODO: Move this configuration to the global configuration + if timeoutStr := os.Getenv("GRIZZLY_HTTP_TIMEOUT"); timeoutStr != "" { + timeoutSeconds, err := strconv.Atoi(timeoutStr) + if err != nil { + return nil, err + } + timeout = time.Duration(timeoutSeconds) * time.Second + } + return &http.Client{Timeout: timeout}, nil +} diff --git a/pkg/mimir/client/mimir.go b/pkg/mimir/client/mimir.go new file mode 100644 index 00000000..ccbc2976 --- /dev/null +++ b/pkg/mimir/client/mimir.go @@ -0,0 +1,10 @@ +package client + +import ( + "github.com/grafana/grizzly/pkg/mimir/models" +) + +type Mimir interface { + ListRules() (map[string][]models.PrometheusRuleGroup, error) + CreateRules(resource models.PrometheusRuleGrouping) error +} diff --git a/pkg/mimir/cortex_tool.go b/pkg/mimir/cortex_tool.go deleted file mode 100644 index 9ddae32c..00000000 --- a/pkg/mimir/cortex_tool.go +++ /dev/null @@ -1,39 +0,0 @@ -package mimir - -import ( - "fmt" - "os" - "os/exec" - - "github.com/grafana/grizzly/pkg/config" -) - -type CortexTool interface { - ExecuteCortexTool(args ...string) ([]byte, error) -} - -type Cortex struct { - config *config.MimirConfig -} - -func NewCortexTool(config *config.MimirConfig) *Cortex { - return &Cortex{config: config} -} - -func (c *Cortex) ExecuteCortexTool(args ...string) ([]byte, error) { - path := os.Getenv("CORTEXTOOL_PATH") - if path == "" { - var err error - path, err = exec.LookPath("cortextool") - if err != nil { - return nil, err - } else if path == "" { - return nil, fmt.Errorf("cortextool not found") - } - } - cmd := exec.Command(path, args...) - cmd.Env = append(cmd.Env, fmt.Sprintf("CORTEX_ADDRESS=%s", c.config.Address)) - cmd.Env = append(cmd.Env, fmt.Sprintf("CORTEX_TENANT_ID=%s", c.config.TenantID)) - cmd.Env = append(cmd.Env, fmt.Sprintf("CORTEX_API_KEY=%s", c.config.APIKey)) - return exec.Command(path, args...).Output() -} diff --git a/pkg/mimir/models/models.go b/pkg/mimir/models/models.go new file mode 100644 index 00000000..2d368274 --- /dev/null +++ b/pkg/mimir/models/models.go @@ -0,0 +1,13 @@ +package models + +// PrometheusRuleGroup encapsulates a list of rules +type PrometheusRuleGroup struct { + Name string `yaml:"name"` + Rules []interface{} `yaml:"rules"` +} + +// PrometheusRuleGrouping encapsulates a set of named rule groups +type PrometheusRuleGrouping struct { + Namespace string `yaml:"namespace"` + Groups []PrometheusRuleGroup `yaml:"groups"` +} diff --git a/pkg/mimir/provider.go b/pkg/mimir/provider.go index 1b7234e3..acf7cd67 100644 --- a/pkg/mimir/provider.go +++ b/pkg/mimir/provider.go @@ -2,32 +2,32 @@ package mimir import ( "fmt" - "os/exec" "path/filepath" "github.com/grafana/grizzly/pkg/config" "github.com/grafana/grizzly/pkg/grizzly" + "github.com/grafana/grizzly/pkg/mimir/client" ) // Provider is a grizzly.Provider implementation for Grafana. type Provider struct { - config *config.MimirConfig + config *config.MimirConfig + clientTool client.Mimir } // NewProvider instantiates a new Provider. func NewProvider(config *config.MimirConfig) (*Provider, error) { - if _, err := exec.LookPath("cortextool"); err != nil { - return nil, err - } + clientTool := client.NewHTTPClient(config) if config.Address == "" { return nil, fmt.Errorf("mimir address is not set") } - if config.APIKey == "" { - return nil, fmt.Errorf("mimir api key is not set") + if config.TenantID == "" { + return nil, fmt.Errorf("mimir tenant id is not set") } return &Provider{ - config: config, + config: config, + clientTool: clientTool, }, nil } @@ -53,6 +53,6 @@ func (p *Provider) APIVersion() string { // GetHandlers identifies the handlers for the Grafana provider func (p *Provider) GetHandlers() []grizzly.Handler { return []grizzly.Handler{ - NewRuleHandler(p), + NewRuleHandler(p, p.clientTool), } } diff --git a/pkg/mimir/rules-handler.go b/pkg/mimir/rules-handler.go index 6abf25b7..a3e2bf35 100644 --- a/pkg/mimir/rules-handler.go +++ b/pkg/mimir/rules-handler.go @@ -2,25 +2,25 @@ package mimir import ( "fmt" - "log" - "os" "strings" + "github.com/grafana/grizzly/pkg/mimir/client" + "github.com/grafana/grizzly/pkg/mimir/models" + "github.com/grafana/grizzly/pkg/grizzly" - "gopkg.in/yaml.v3" ) // RuleHandler is a Grizzly Handler for Prometheus Rules type RuleHandler struct { grizzly.BaseHandler - cortexTool CortexTool + clientTool client.Mimir } // NewRuleHandler returns a new Grizzly Handler for Prometheus Rules -func NewRuleHandler(provider *Provider) *RuleHandler { +func NewRuleHandler(provider *Provider, clientTool client.Mimir) *RuleHandler { return &RuleHandler{ BaseHandler: grizzly.NewBaseHandler(provider, "PrometheusRuleGroup", false), - cortexTool: NewCortexTool(provider.config), + clientTool: clientTool, } } @@ -86,15 +86,11 @@ func (h *RuleHandler) getRemoteRuleGroup(uid string) (*grizzly.Resource, error) namespace := parts[0] name := parts[1] - out, err := h.cortexTool.ExecuteCortexTool("rules", "print", "--disable-color") - if err != nil { - return nil, err - } - groupings := map[string][]PrometheusRuleGroup{} - err = yaml.Unmarshal(out, &groupings) + groupings, err := h.clientTool.ListRules() if err != nil { return nil, err } + for key, grouping := range groupings { if key == namespace { for _, group := range grouping { @@ -117,17 +113,12 @@ func (h *RuleHandler) getRemoteRuleGroup(uid string) (*grizzly.Resource, error) // getRemoteRuleGroupList retrieves a datasource object from Grafana func (h *RuleHandler) getRemoteRuleGroupList() ([]string, error) { - out, err := h.cortexTool.ExecuteCortexTool("rules", "print", "--disable-color") - if err != nil { - return nil, err - } - groupings := map[string][]PrometheusRuleGroup{} - err = yaml.Unmarshal(out, &groupings) + groupings, err := h.clientTool.ListRules() if err != nil { return nil, err } - IDs := []string{} + var IDs []string for namespace, grouping := range groupings { for _, group := range grouping { uid := fmt.Sprintf("%s.%s", namespace, group.Name) @@ -137,51 +128,20 @@ func (h *RuleHandler) getRemoteRuleGroupList() ([]string, error) { return IDs, nil } -// PrometheusRuleGroup encapsulates a list of rules -type PrometheusRuleGroup struct { - Namespace string `yaml:"-"` - Name string `yaml:"name"` - Rules []map[string]interface{} `yaml:"rules"` -} - -// PrometheusRuleGrouping encapsulates a set of named rule groups -type PrometheusRuleGrouping struct { - Namespace string `json:"namespace"` - Groups []PrometheusRuleGroup `json:"groups"` -} - func (h *RuleHandler) writeRuleGroup(resource grizzly.Resource) error { - tmpfile, err := os.CreateTemp("", "cortextool-*") - if err != nil { - return err - } - newGroup := PrometheusRuleGroup{ - Name: resource.Name(), - // Rules: resource.Spec()["rules"].([]map[string]interface{}), - Rules: []map[string]interface{}{}, + newGroup := models.PrometheusRuleGroup{ + Name: resource.Name(), + Rules: []interface{}{}, } rules := resource.Spec()["rules"].([]interface{}) for _, ruleIf := range rules { rule := ruleIf.(map[string]interface{}) newGroup.Rules = append(newGroup.Rules, rule) } - grouping := PrometheusRuleGrouping{ + grouping := models.PrometheusRuleGrouping{ Namespace: resource.GetMetadata("namespace"), - Groups: []PrometheusRuleGroup{newGroup}, - } - out, err := yaml.Marshal(grouping) - if err != nil { - return err - } - if err := os.WriteFile(tmpfile.Name(), out, 0644); err != nil { - return err + Groups: []models.PrometheusRuleGroup{newGroup}, } - output, err := h.cortexTool.ExecuteCortexTool("rules", "load", tmpfile.Name()) - if err != nil { - log.Println(output) - return err - } - os.Remove(tmpfile.Name()) - return err + return h.clientTool.CreateRules(grouping) } diff --git a/pkg/mimir/rules_test.go b/pkg/mimir/rules_test.go index 8c21df0f..80501042 100644 --- a/pkg/mimir/rules_test.go +++ b/pkg/mimir/rules_test.go @@ -6,21 +6,21 @@ import ( "testing" "github.com/grafana/grizzly/pkg/grizzly" + "github.com/grafana/grizzly/pkg/mimir/models" "github.com/stretchr/testify/require" - "gopkg.in/yaml.v3" ) -var errCortextoolClient = errors.New("error coming from cortextool client") +var errMimirClient = errors.New("error coming from mimir client") func TestRules(t *testing.T) { - cortexTool := &FakeCortexTool{} + client := &FakeClient{} h := RuleHandler{ BaseHandler: grizzly.NewBaseHandler(&Provider{}, "PrometheusRuleGroup", false), - cortexTool: cortexTool, + clientTool: client, } t.Run("get remote rule group", func(t *testing.T) { - cortexTool.mockResponse(t, true, nil) + client.mockResponse(t, true, nil) res, err := h.getRemoteRuleGroup("first_rules.grizzly_alerts") require.NoError(t, err) uid, err := h.GetUID(*res) @@ -31,36 +31,36 @@ func TestRules(t *testing.T) { require.Equal(t, "PrometheusRuleGroup", res.Kind()) }) - t.Run("get remote rule group - error from cortextool client", func(t *testing.T) { - cortexTool.mockResponse(t, false, errCortextoolClient) + t.Run("get remote rule group - error from mimir client", func(t *testing.T) { + client.mockResponse(t, false, errMimirClient) res, err := h.getRemoteRuleGroup("first_rules.grizzly_alerts") require.Error(t, err) require.Nil(t, res) }) t.Run("get remote rule group - return not found", func(t *testing.T) { - cortexTool.mockResponse(t, true, nil) + client.mockResponse(t, true, nil) res, err := h.getRemoteRuleGroup("name.name") require.Error(t, err) require.Nil(t, res) }) t.Run("get remote rule group list", func(t *testing.T) { - cortexTool.mockResponse(t, true, nil) + client.mockResponse(t, true, nil) res, err := h.getRemoteRuleGroupList() require.NoError(t, err) require.Equal(t, "first_rules.grizzly_alerts", res[0]) }) t.Run("get remote rule group list", func(t *testing.T) { - cortexTool.mockResponse(t, false, errCortextoolClient) + client.mockResponse(t, false, errMimirClient) res, err := h.getRemoteRuleGroupList() require.Error(t, err) require.Nil(t, res) }) t.Run("write rule group", func(t *testing.T) { - cortexTool.mockResponse(t, false, nil) + client.mockResponse(t, false, nil) spec := make(map[string]interface{}) file, err := os.ReadFile("testdata/rules.yaml") require.NoError(t, err) @@ -73,8 +73,8 @@ func TestRules(t *testing.T) { require.NoError(t, err) }) - t.Run("write rule group - error from the cortextool client", func(t *testing.T) { - cortexTool.mockResponse(t, false, errCortextoolClient) + t.Run("write rule group - error from the mimir client", func(t *testing.T) { + client.mockResponse(t, false, errMimirClient) spec := make(map[string]interface{}) file, err := os.ReadFile("testdata/rules.yaml") require.NoError(t, err) @@ -102,28 +102,55 @@ func TestRules(t *testing.T) { }) } -type FakeCortexTool struct { +type FakeClient struct { hasFile bool expectedError error } -func (f *FakeCortexTool) mockResponse(t *testing.T, hasFile bool, expectedError error) { - f.hasFile = hasFile - f.expectedError = expectedError - t.Cleanup(func() { - f.hasFile = false - f.expectedError = nil - }) -} - -func (f *FakeCortexTool) ExecuteCortexTool(_ ...string) ([]byte, error) { +func (f *FakeClient) ListRules() (map[string][]models.PrometheusRuleGroup, error) { if f.expectedError != nil { return nil, f.expectedError } if f.hasFile { - return os.ReadFile("testdata/list_rules.yaml") + res, err := os.ReadFile("testdata/list_rules.yaml") + if err != nil { + return nil, err + } + + var group map[string][]models.PrometheusRuleGroup + if err := yaml.Unmarshal(res, &group); err != nil { + return nil, err + } + + return group, nil } return nil, nil } + +func (f *FakeClient) CreateRules(_ models.PrometheusRuleGrouping) error { + if f.expectedError != nil { + return f.expectedError + } + + if f.hasFile { + _, err := os.ReadFile("testdata/list_rules.yaml") + if err != nil { + return err + } + + return nil + } + + return nil +} + +func (f *FakeClient) mockResponse(t *testing.T, hasFile bool, expectedError error) { + f.hasFile = hasFile + f.expectedError = expectedError + t.Cleanup(func() { + f.hasFile = false + f.expectedError = nil + }) +}