Skip to content

Commit

Permalink
feat(metricprovider): add prometheus range query support (#3704)
Browse files Browse the repository at this point in the history
* feat: add prometheus range query support

Signed-off-by: Matthew Clarke <[email protected]>

* fix: code-gen

Signed-off-by: Matthew Clarke <[email protected]>

* fix: handle matrix results from prometheus

Signed-off-by: Matthew Clarke <[email protected]>

* refactor: remove duplicated code

Signed-off-by: Matthew Clarke <[email protected]>

* feat: configurable step

Signed-off-by: Matthew Clarke <[email protected]>

* fix: codegen

Signed-off-by: Matthew Clarke <[email protected]>

* test: more tests

Signed-off-by: Matthew Clarke <[email protected]>

* docs: range query docs

Signed-off-by: Matthew Clarke <[email protected]>

* refactor: expr for prometheus range query start/end

Signed-off-by: Matthew Clarke <[email protected]>

* fix: lint issue

Signed-off-by: Matthew Clarke <[email protected]>

* docs: missing codegen

Signed-off-by: Matthew Clarke <[email protected]>

* fix: PR Fixes

Signed-off-by: Matthew Clarke <[email protected]>

---------

Signed-off-by: Matthew Clarke <[email protected]>
  • Loading branch information
mclarke47 authored Jul 10, 2024
1 parent 68f7b41 commit 3e4ea74
Show file tree
Hide file tree
Showing 19 changed files with 1,478 additions and 630 deletions.
2 changes: 1 addition & 1 deletion docs/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ make start-e2e E2E_INSTANCE_ID=''
```


6. Working on CRDs? While editing them directly works when you are finding the shape of things you want, the final CRDs are autogenerated. Make sure to regenerate them before submitting PRs. They are controlled by the relevant annotations in the types file:
6. Working on CRDs? While editing them directly works when you are finding the shape of things you want, the final CRDs are autogenerated. Make sure to regenerate them by running `make gen-crd` before submitting PRs. They are controlled by the relevant annotations in the types file:

eg: Analysis Templates are controlled by annotations in `pkg/apis/rollouts/v1alpha1/analysis_types.go`.

Expand Down
41 changes: 41 additions & 0 deletions docs/analysis/prometheus.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,47 @@ you validate your [PromQL expression](https://prometheus.io/docs/prometheus/late
See the [Analysis Overview page](../../features/analysis) for more details on the available options.
## Range queries
```yaml
apiVersion: argoproj.io/v1alpha1
kind: AnalysisTemplate
metadata:
name: range-query-example
spec:
args:
- name: service-name
- name: lookback-duration
value: 5m
metrics:
- name: success-rate
# checks that all returned values are under 1000ms
successCondition: "all(result, # < 1000)"
failureLimit: 3
provider:
prometheus:
rangeQuery:
# See https://expr-lang.org/docs/language-definition#date-functions
# for value date functions
# The start point to query from
start: 'now() - duration("{{args.lookback-duration}}")'
# The end point to query to
end: 'now()'
# Query resolution width
step: 1m
address: http://prometheus.example.com:9090
query: http_latency_ms{service="{{args.service-name}}"}
```
### Range query and successCondition/failureCondition
Since range queries will usually return multiple values from prometheus. It is important to assert on every value returned. See the following examples:
* ❌ `result[0] < 1000` - this will only check the first value returned
* ✅ `all(result, # < 1000)` - checks every value returns from prometheus

See [expr](https://github.com/expr-lang/expr) for more expression options.

## Authorization

### Utilizing Amazon Managed Prometheus
Expand Down
42 changes: 42 additions & 0 deletions docs/features/kustomize/rollout_cr_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -4685,6 +4685,20 @@
"query": {
"type": "string"
},
"rangeQuery": {
"properties": {
"end": {
"type": "string"
},
"start": {
"type": "string"
},
"step": {
"type": "string"
}
},
"type": "object"
},
"timeout": {
"format": "int64",
"type": "integer"
Expand Down Expand Up @@ -9522,6 +9536,20 @@
"query": {
"type": "string"
},
"rangeQuery": {
"properties": {
"end": {
"type": "string"
},
"start": {
"type": "string"
},
"step": {
"type": "string"
}
},
"type": "object"
},
"timeout": {
"format": "int64",
"type": "integer"
Expand Down Expand Up @@ -14372,6 +14400,20 @@
"query": {
"type": "string"
},
"rangeQuery": {
"properties": {
"end": {
"type": "string"
},
"start": {
"type": "string"
},
"step": {
"type": "string"
}
},
"type": "object"
},
"timeout": {
"format": "int64",
"type": "integer"
Expand Down
9 changes: 9 additions & 0 deletions manifests/crds/analysis-run-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3066,6 +3066,15 @@ spec:
type: boolean
query:
type: string
rangeQuery:
properties:
end:
type: string
start:
type: string
step:
type: string
type: object
timeout:
format: int64
type: integer
Expand Down
9 changes: 9 additions & 0 deletions manifests/crds/analysis-template-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3062,6 +3062,15 @@ spec:
type: boolean
query:
type: string
rangeQuery:
properties:
end:
type: string
start:
type: string
step:
type: string
type: object
timeout:
format: int64
type: integer
Expand Down
9 changes: 9 additions & 0 deletions manifests/crds/cluster-analysis-template-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3062,6 +3062,15 @@ spec:
type: boolean
query:
type: string
rangeQuery:
properties:
end:
type: string
start:
type: string
step:
type: string
type: object
timeout:
format: int64
type: integer
Expand Down
27 changes: 27 additions & 0 deletions manifests/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3067,6 +3067,15 @@ spec:
type: boolean
query:
type: string
rangeQuery:
properties:
end:
type: string
start:
type: string
step:
type: string
type: object
timeout:
format: int64
type: integer
Expand Down Expand Up @@ -6356,6 +6365,15 @@ spec:
type: boolean
query:
type: string
rangeQuery:
properties:
end:
type: string
start:
type: string
step:
type: string
type: object
timeout:
format: int64
type: integer
Expand Down Expand Up @@ -9523,6 +9541,15 @@ spec:
type: boolean
query:
type: string
rangeQuery:
properties:
end:
type: string
start:
type: string
step:
type: string
type: object
timeout:
format: int64
type: integer
Expand Down
59 changes: 34 additions & 25 deletions metricproviders/prometheus/mock_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,20 @@ import (
)

type mockAPI struct {
value model.Value
err error
warnings v1.Warnings
value model.Value
err error
warnings v1.Warnings
startTimeSent time.Time
endTimeSent time.Time
stepSent time.Duration
}

func (m mockAPI) WalReplay(ctx context.Context) (v1.WalReplayStatus, error) {
func (m *mockAPI) WalReplay(ctx context.Context) (v1.WalReplayStatus, error) {
panic("Not used")
}

// Query performs a query for the given time.
func (m mockAPI) Query(ctx context.Context, query string, ts time.Time, opt ...v1.Option) (model.Value, v1.Warnings, error) {
func (m *mockAPI) Query(ctx context.Context, query string, ts time.Time, opt ...v1.Option) (model.Value, v1.Warnings, error) {
if m.err != nil {
return nil, m.warnings, m.err
}
Expand All @@ -28,78 +31,84 @@ func (m mockAPI) Query(ctx context.Context, query string, ts time.Time, opt ...v

// Below methods are not used but required for the interface implementation

func (m mockAPI) Metadata(ctx context.Context, metric string, limit string) (map[string][]v1.Metadata, error) {
func (m *mockAPI) Metadata(ctx context.Context, metric string, limit string) (map[string][]v1.Metadata, error) {
panic("Not used")
}

func (m mockAPI) CleanTombstones(ctx context.Context) error {
func (m *mockAPI) CleanTombstones(ctx context.Context) error {
panic("Not used")
}

func (m mockAPI) DeleteSeries(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) error {
func (m *mockAPI) DeleteSeries(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) error {
panic("Not used")
}

func (m mockAPI) LabelNames(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) ([]string, v1.Warnings, error) {
func (m *mockAPI) LabelNames(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) ([]string, v1.Warnings, error) {
panic("Not used")
}

func (m mockAPI) LabelValues(ctx context.Context, label string, matches []string, startTime time.Time, endTime time.Time) (model.LabelValues, v1.Warnings, error) {
func (m *mockAPI) LabelValues(ctx context.Context, label string, matches []string, startTime time.Time, endTime time.Time) (model.LabelValues, v1.Warnings, error) {
panic("Not used")
}

func (m mockAPI) QueryRange(ctx context.Context, query string, r v1.Range, opt ...v1.Option) (model.Value, v1.Warnings, error) {
panic("Not used")
func (m *mockAPI) QueryRange(ctx context.Context, query string, r v1.Range, opt ...v1.Option) (model.Value, v1.Warnings, error) {
m.startTimeSent = r.Start
m.endTimeSent = r.End
m.stepSent = r.Step
if m.err != nil {
return nil, m.warnings, m.err
}
return m.value, m.warnings, nil
}

func (m mockAPI) Series(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) ([]model.LabelSet, v1.Warnings, error) {
func (m *mockAPI) Series(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) ([]model.LabelSet, v1.Warnings, error) {
panic("Not used")
}

func (m mockAPI) Targets(ctx context.Context) (v1.TargetsResult, error) {
func (m *mockAPI) Targets(ctx context.Context) (v1.TargetsResult, error) {
panic("Not used")
}

func (m mockAPI) Alerts(ctx context.Context) (v1.AlertsResult, error) {
func (m *mockAPI) Alerts(ctx context.Context) (v1.AlertsResult, error) {
panic("Not used")
}

func (m mockAPI) AlertManagers(ctx context.Context) (v1.AlertManagersResult, error) {
func (m *mockAPI) AlertManagers(ctx context.Context) (v1.AlertManagersResult, error) {
panic("Not used")
}

func (m mockAPI) Config(ctx context.Context) (v1.ConfigResult, error) {
func (m *mockAPI) Config(ctx context.Context) (v1.ConfigResult, error) {
panic("Not used")
}

func (m mockAPI) Flags(ctx context.Context) (v1.FlagsResult, error) {
func (m *mockAPI) Flags(ctx context.Context) (v1.FlagsResult, error) {
panic("Not used")
}

func (m mockAPI) Snapshot(ctx context.Context, skipHead bool) (v1.SnapshotResult, error) {
func (m *mockAPI) Snapshot(ctx context.Context, skipHead bool) (v1.SnapshotResult, error) {
panic("Not used")
}

func (m mockAPI) Rules(ctx context.Context) (v1.RulesResult, error) {
func (m *mockAPI) Rules(ctx context.Context) (v1.RulesResult, error) {
panic("Not used")
}

func (m mockAPI) TargetsMetadata(ctx context.Context, matchTarget string, metric string, limit string) ([]v1.MetricMetadata, error) {
func (m *mockAPI) TargetsMetadata(ctx context.Context, matchTarget string, metric string, limit string) ([]v1.MetricMetadata, error) {
panic("Not used")
}

func (m mockAPI) Runtimeinfo(ctx context.Context) (v1.RuntimeinfoResult, error) {
func (m *mockAPI) Runtimeinfo(ctx context.Context) (v1.RuntimeinfoResult, error) {
panic("Not used")
}

func (m mockAPI) TSDB(ctx context.Context) (v1.TSDBResult, error) {
func (m *mockAPI) TSDB(ctx context.Context) (v1.TSDBResult, error) {
panic("Not used")
}

func (m mockAPI) Buildinfo(ctx context.Context) (v1.BuildinfoResult, error) {
func (m *mockAPI) Buildinfo(ctx context.Context) (v1.BuildinfoResult, error) {
panic("Not used")
}

func (m mockAPI) QueryExemplars(ctx context.Context, query string, startTime time.Time, endTime time.Time) ([]v1.ExemplarQueryResult, error) {
func (m *mockAPI) QueryExemplars(ctx context.Context, query string, startTime time.Time, endTime time.Time) ([]v1.ExemplarQueryResult, error) {
panic("Not used")
}
Loading

0 comments on commit 3e4ea74

Please sign in to comment.