Skip to content

Commit

Permalink
Adding terraform support for metric anomaly monitors
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrew-L-Johnson committed Oct 23, 2024
1 parent 95123da commit bc470be
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 3 deletions.
93 changes: 92 additions & 1 deletion sumologic/resource_sumologic_monitors_library_monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,14 @@ func getMonitorBaseSchema() map[string]*schema.Schema {
Schema: logsAnomalyTriggerConditionSchema,
},
},
metricsAnomalyConditionFieldName: {
Type: schema.TypeList,
MaxItems: 1,
Optional: true,
Elem: &schema.Resource{
Schema: metricsAnomalyTriggerConditionSchema,
},
},
},
},
},
Expand Down Expand Up @@ -446,6 +454,7 @@ var (
"trigger_conditions.0.slo_sli_condition",
"trigger_conditions.0.slo_burn_rate_condition",
fmt.Sprintf("trigger_conditions.0.%s", logsAnomalyConditionFieldName),
fmt.Sprintf("trigger_conditions.0.%s", metricsAnomalyConditionFieldName),
}
logStaticConditionCriticalOrWarningAtleastOneKeys = []string{
"trigger_conditions.0.logs_static_condition.0.warning",
Expand Down Expand Up @@ -651,6 +660,34 @@ var logsAnomalyTriggerConditionSchema = map[string]*schema.Schema{
}),
}

var metricsAnomalyTriggerConditionSchema = map[string]*schema.Schema{
"direction": {
Type: schema.TypeString,
Optional: true,
Default: "Both",
ValidateFunc: validation.StringInSlice([]string{"Both", "Up", "Down"}, false),
},
"anomaly_detector_type": {
Type: schema.TypeString,
Required: true,
ValidateFunc: validation.StringInSlice([]string{"Cluster"}, false),
},
"critical": nested(false, schemaMap{
"sensitivity": {
Type: schema.TypeFloat,
Optional: true,
Default: 0.5,
ValidateFunc: validation.FloatBetween(0.1, 1.0),
},
"min_anomaly_count": {
Type: schema.TypeInt,
Optional: true,
Default: 1,
},
"time_range": &timeRangeWithAllowedValuesSchema,
}),
}

func getBurnRateSchema(triggerType string) *schema.Schema {
burnRateThresholdConflict := fmt.Sprintf("trigger_conditions.0.slo_burn_rate_condition.0.%s.0.burn_rate_threshold", triggerType)
timeRangeConflict := fmt.Sprintf("trigger_conditions.0.slo_burn_rate_condition.0.%s.0.time_range", triggerType)
Expand Down Expand Up @@ -1142,6 +1179,9 @@ func triggerConditionsBlockToJson(block map[string]interface{}) []TriggerConditi
if sc, ok := fromSingletonArray(block, logsAnomalyConditionFieldName); ok {
conditions = append(conditions, logsAnomalyConditionBlockToJson(sc)...)
}
if sc, ok := fromSingletonArray(block, metricsAnomalyConditionFieldName); ok {
conditions = append(conditions, metricsAnomalyConditionBlockToJson(sc)...)
}

return conditions
}
Expand Down Expand Up @@ -1266,6 +1306,21 @@ func logsAnomalyConditionBlockToJson(block map[string]interface{}) []TriggerCond
return base.cloneReadingFromNestedBlocks(block)
}

func metricsAnomalyConditionBlockToJson(block map[string]interface{}) []TriggerCondition {
base := TriggerCondition{
Direction: block["direction"].(string),
AnomalyDetectorType: block["anomaly_detector_type"].(string),
DetectionMethod: metricsAnomalyConditionDetectionMethod,
}
// metric anomaly condition does not have 'alert' and 'resolution' objects. Here we generate empty blocks
// for reading to work
if subBlock, ok := fromSingletonArray(block, "critical"); ok {
subBlock["alert"] = toSingletonArray(map[string]interface{}{})
subBlock["resolution"] = toSingletonArray(map[string]interface{}{})
}
return base.cloneReadingFromNestedBlocks(block)
}

// TriggerCondition JSON model to 'trigger_conditions' block
func jsonToTriggerConditionsBlock(conditions []TriggerCondition) map[string]interface{} {
missingDataConditions := make([]TriggerCondition, 0)
Expand Down Expand Up @@ -1294,6 +1349,9 @@ func jsonToTriggerConditionsBlock(conditions []TriggerCondition) map[string]inte
triggerConditionsBlock[sloBurnRateConditionFieldName] = toSingletonArray(jsonToSloBurnRateConditionBlock(dataConditions))
case logsAnomalyConditionDetectionMethod:
triggerConditionsBlock[logsAnomalyConditionFieldName] = toSingletonArray(jsonToLogsAnomalyConditionBlock(dataConditions))
case metricsAnomalyConditionDetectionMethod:
triggerConditionsBlock[metricsAnomalyConditionFieldName] = toSingletonArray(jsonToMetricsAnomalyConditionBlock(dataConditions))

}
}
if len(missingDataConditions) > 0 {
Expand Down Expand Up @@ -1580,6 +1638,36 @@ func jsonToLogsAnomalyConditionBlock(conditions []TriggerCondition) map[string]i
return block
}

func jsonToMetricsAnomalyConditionBlock(conditions []TriggerCondition) map[string]interface{} {
block := map[string]interface{}{}

block["direction"] = conditions[0].Direction
block["anomaly_detector_type"] = conditions[0].AnomalyDetectorType

var criticalDict = dict{}
block["critical"] = toSingletonArray(criticalDict)

var hasCritical = false
for _, condition := range conditions {
switch condition.TriggerType {
case "Critical":
hasCritical = true
criticalDict["sensitivity"] = condition.Sensitivity
criticalDict["min_anomaly_count"] = condition.MinAnomalyCount
criticalDict["time_range"] = condition.PositiveTimeRange()
case "ResolvedCritical":
hasCritical = true
criticalDict["sensitivity"] = condition.Sensitivity
criticalDict["min_anomaly_count"] = condition.MinAnomalyCount
criticalDict["time_range"] = condition.PositiveTimeRange()
}
}
if !hasCritical {
delete(block, "critical")
}
return block
}

func getAlertBlock(condition TriggerCondition) dict {
var alert = dict{}
burnRates := make([]interface{}, len(condition.BurnRates))
Expand Down Expand Up @@ -1622,6 +1710,7 @@ const metricsMissingDataConditionFieldName = "metrics_missing_data_condition"
const sloSLIConditionFieldName = "slo_sli_condition"
const sloBurnRateConditionFieldName = "slo_burn_rate_condition"
const logsAnomalyConditionFieldName = "logs_anomaly_condition"
const metricsAnomalyConditionFieldName = "metrics_anomaly_condition"

const logsStaticConditionDetectionMethod = "LogsStaticCondition"
const metricsStaticConditionDetectionMethod = "MetricsStaticCondition"
Expand All @@ -1632,6 +1721,7 @@ const metricsMissingDataConditionDetectionMethod = "MetricsMissingDataCondition"
const sloSLIConditionDetectionMethod = "SloSliCondition"
const sloBurnRateConditionDetectionMethod = "SloBurnRateCondition"
const logsAnomalyConditionDetectionMethod = "LogsAnomalyCondition"
const metricsAnomalyConditionDetectionMethod = "MetricsAnomalyCondition"

func getQueries(d *schema.ResourceData) []MonitorQuery {
rawQueries := d.Get("queries").([]interface{})
Expand Down Expand Up @@ -1817,7 +1907,8 @@ func (base TriggerCondition) cloneReadingFromNestedBlocks(block map[string]inter
resolvedCriticalCondition.OccurrenceType = ""
}

if criticalCondition.DetectionMethod == logsAnomalyConditionDetectionMethod {
if (criticalCondition.DetectionMethod == logsAnomalyConditionDetectionMethod) ||
(criticalCondition.DetectionMethod == metricsAnomalyConditionDetectionMethod) {
criticalCondition.MinAnomalyCount = critical["min_anomaly_count"].(int)
criticalCondition.Sensitivity = critical["sensitivity"].(float64)
resolvedCriticalCondition.MinAnomalyCount = criticalCondition.MinAnomalyCount
Expand Down
22 changes: 22 additions & 0 deletions sumologic/resource_sumologic_monitors_library_monitor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1434,6 +1434,16 @@ var exampleLogsAnomalyTriggerConditionBlock = `
}
}`

var exampleMetricsAnomalyTriggerConditionBlock = `
metrics_anomaly_condition {
anomaly_detector_type = "Cluster"
critical {
sensitivity = 0.5
min_anomaly_count = 5
time_range = "-1h"
}
}`

func exampleLogsStaticMonitor(testName string) string {
query := "error | timeslice 1m | count as field by _timeslice"
return exampleMonitorWithTriggerCondition(testName, "Logs", query,
Expand Down Expand Up @@ -1501,6 +1511,17 @@ func exampleLogsAnomalyMonitor(testName string) string {
)
}

func exampleMetricsAnomalyMonitor(testName string) string {
query := "service=auth api=login metric=HTTP_5XX_Count | avg"
return exampleMonitorWithTriggerCondition(
testName,
"Metrics",
query,
exampleMetricsAnomalyTriggerConditionBlock,
[]string{"Critical", "ResolvedCritical"},
)
}

var allExampleMonitors = []func(testName string) string{
exampleLogsStaticMonitor,
exampleLogsStaticMonitorWithResolutionWindow,
Expand All @@ -1513,6 +1534,7 @@ var allExampleMonitors = []func(testName string) string{
exampleSloSliMonitor,
exampleSloBurnRateMonitor,
exampleLogsAnomalyMonitor,
exampleMetricsAnomalyMonitor,
}

func testAccSumologicMonitorsLibraryMonitorWithInvalidTriggerCondition(testName string, triggerCondition string) string {
Expand Down
52 changes: 50 additions & 2 deletions website/docs/r/monitor.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,46 @@ resource "sumologic_monitor" "tf_example_anomaly_monitor" {
}
```

## Example Metrics Anomaly Monitor
```hcl
resource "sumologic_monitor" "tf_example_metrics_anomaly_monitor" {
name = "Example Metrics Anomaly Monitor"
description = "example metrics anomaly monitor"
type = "MonitorsLibraryMonitor"
monitor_type = "Metrics"
is_disabled = false
queries {
row_id = "A"
query = "service=auth api=login metric=HTTP_5XX_Count | avg"
}
trigger_conditions {
metrics_anomaly_condition {
anomaly_detector_type = "Cluster"
critical {
sensitivity = 0.4
min_anomaly_count = 9
time_range = "-3h"
}
}
}
notifications {
notification {
connection_type = "Email"
recipients = [
"[email protected]",
]
subject = "Monitor Alert: {{TriggerType}} on {{Name}}"
time_zone = "PST"
message_body = "Triggered {{TriggerType}} Alert on {{Name}}: {{QueryURL}}"
}
run_for_trigger_types = ["Critical", "ResolvedCritical"]
}
}
```

## Monitor Folders

NOTE: Monitor folders are considered a different resource from Library content folders. See [sumologic_monitor_folder][2] for more details.
Expand Down Expand Up @@ -480,7 +520,8 @@ A `trigger_conditions` block contains one or more subblocks of the following typ
- `metrics_missing_data_condition`
- `slo_sli_condition`
- `slo_burn_rate_condition`
- `log_anomaly_condition`
- `logs_anomaly_condition`
- `metrics_anomaly_condition`

Subblocks should be limited to at most 1 missing data condition and at most 1 static / outlier condition.

Expand Down Expand Up @@ -571,14 +612,21 @@ Here is a summary of arguments for each condition type (fields which are not mar
- `burn_rate_threshold` (Required): The burn rate percentage threshold.
- `time_range` (Required): The relative time range for the burn rate percentage evaluation. Accepted format: Optional `-` sign followed by `<number>` followed by a `<time_unit>` character: `s` for seconds, `m` for minutes, `h` for hours, `d` for days. Examples: `30m`, `-12h`.

#### log_anomaly_condition
#### logs_anomaly_condition
- `field`: The name of the field that the trigger condition will alert on. The trigger could compare the value of specified field with the threshold. If field is not specified, monitor would default to result count instead.
- `anomaly_detector_type`: The type of anomaly model that will be used for evaluating this monitor. Possible values are: `Cluster`.
- `critical`
- `sensitivity`: The triggering sensitivity of the anomaly model used for this monitor.
- `min_anomaly_count` (Required) : The minimum number of anomalies required to exist in the current time range for the condition to trigger.
- `time_range` (Required) : The relative time range for anomaly evaluation. Accepted format: Optional `-` sign followed by `<number>` followed by a `<time_unit>` character: `s` for seconds, `m` for minutes, `h` for hours, `d` for days. Examples: `30m`, `-12h`.

#### metrics_anomaly_condition
- `anomaly_detector_type`: The type of anomaly model that will be used for evaluating this monitor. Possible values are: `Cluster`.
- `critical`
- `sensitivity`: The triggering sensitivity of the anomaly model used for this monitor.
- `min_anomaly_count` (Required) : The minimum number of anomalies required to exist in the current time range for the condition to trigger.
- `time_range` (Required) : The relative time range for anomaly evaluation. Accepted format: Optional `-` sign followed by `<number>` followed by a `<time_unit>` character: `s` for seconds, `m` for minutes, `h` for hours, `d` for days. Examples: `30m`, `-12h`.

## The `triggers` block
The `triggers` block is deprecated. Please use `trigger_conditions` to specify notification conditions.

Expand Down

0 comments on commit bc470be

Please sign in to comment.