From bc470be167330ba7adfb992c40c3a192b37a31c5 Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Wed, 23 Oct 2024 15:22:53 -0600 Subject: [PATCH] Adding terraform support for metric anomaly monitors --- ...urce_sumologic_monitors_library_monitor.go | 93 ++++++++++++++++++- ...sumologic_monitors_library_monitor_test.go | 22 +++++ website/docs/r/monitor.html.markdown | 52 ++++++++++- 3 files changed, 164 insertions(+), 3 deletions(-) diff --git a/sumologic/resource_sumologic_monitors_library_monitor.go b/sumologic/resource_sumologic_monitors_library_monitor.go index 15548e8e..580e00ff 100644 --- a/sumologic/resource_sumologic_monitors_library_monitor.go +++ b/sumologic/resource_sumologic_monitors_library_monitor.go @@ -172,6 +172,14 @@ func getMonitorBaseSchema() map[string]*schema.Schema { Schema: logsAnomalyTriggerConditionSchema, }, }, + metricsAnomalyConditionFieldName: { + Type: schema.TypeList, + MaxItems: 1, + Optional: true, + Elem: &schema.Resource{ + Schema: metricsAnomalyTriggerConditionSchema, + }, + }, }, }, }, @@ -446,6 +454,7 @@ var ( "trigger_conditions.0.slo_sli_condition", "trigger_conditions.0.slo_burn_rate_condition", fmt.Sprintf("trigger_conditions.0.%s", logsAnomalyConditionFieldName), + fmt.Sprintf("trigger_conditions.0.%s", metricsAnomalyConditionFieldName), } logStaticConditionCriticalOrWarningAtleastOneKeys = []string{ "trigger_conditions.0.logs_static_condition.0.warning", @@ -651,6 +660,34 @@ var logsAnomalyTriggerConditionSchema = map[string]*schema.Schema{ }), } +var metricsAnomalyTriggerConditionSchema = map[string]*schema.Schema{ + "direction": { + Type: schema.TypeString, + Optional: true, + Default: "Both", + ValidateFunc: validation.StringInSlice([]string{"Both", "Up", "Down"}, false), + }, + "anomaly_detector_type": { + Type: schema.TypeString, + Required: true, + ValidateFunc: validation.StringInSlice([]string{"Cluster"}, false), + }, + "critical": nested(false, schemaMap{ + "sensitivity": { + Type: schema.TypeFloat, + Optional: true, + Default: 0.5, + ValidateFunc: validation.FloatBetween(0.1, 1.0), + }, + "min_anomaly_count": { + Type: schema.TypeInt, + Optional: true, + Default: 1, + }, + "time_range": &timeRangeWithAllowedValuesSchema, + }), +} + func getBurnRateSchema(triggerType string) *schema.Schema { burnRateThresholdConflict := fmt.Sprintf("trigger_conditions.0.slo_burn_rate_condition.0.%s.0.burn_rate_threshold", triggerType) timeRangeConflict := fmt.Sprintf("trigger_conditions.0.slo_burn_rate_condition.0.%s.0.time_range", triggerType) @@ -1142,6 +1179,9 @@ func triggerConditionsBlockToJson(block map[string]interface{}) []TriggerConditi if sc, ok := fromSingletonArray(block, logsAnomalyConditionFieldName); ok { conditions = append(conditions, logsAnomalyConditionBlockToJson(sc)...) } + if sc, ok := fromSingletonArray(block, metricsAnomalyConditionFieldName); ok { + conditions = append(conditions, metricsAnomalyConditionBlockToJson(sc)...) + } return conditions } @@ -1266,6 +1306,21 @@ func logsAnomalyConditionBlockToJson(block map[string]interface{}) []TriggerCond return base.cloneReadingFromNestedBlocks(block) } +func metricsAnomalyConditionBlockToJson(block map[string]interface{}) []TriggerCondition { + base := TriggerCondition{ + Direction: block["direction"].(string), + AnomalyDetectorType: block["anomaly_detector_type"].(string), + DetectionMethod: metricsAnomalyConditionDetectionMethod, + } + // metric anomaly condition does not have 'alert' and 'resolution' objects. Here we generate empty blocks + // for reading to work + if subBlock, ok := fromSingletonArray(block, "critical"); ok { + subBlock["alert"] = toSingletonArray(map[string]interface{}{}) + subBlock["resolution"] = toSingletonArray(map[string]interface{}{}) + } + return base.cloneReadingFromNestedBlocks(block) +} + // TriggerCondition JSON model to 'trigger_conditions' block func jsonToTriggerConditionsBlock(conditions []TriggerCondition) map[string]interface{} { missingDataConditions := make([]TriggerCondition, 0) @@ -1294,6 +1349,9 @@ func jsonToTriggerConditionsBlock(conditions []TriggerCondition) map[string]inte triggerConditionsBlock[sloBurnRateConditionFieldName] = toSingletonArray(jsonToSloBurnRateConditionBlock(dataConditions)) case logsAnomalyConditionDetectionMethod: triggerConditionsBlock[logsAnomalyConditionFieldName] = toSingletonArray(jsonToLogsAnomalyConditionBlock(dataConditions)) + case metricsAnomalyConditionDetectionMethod: + triggerConditionsBlock[metricsAnomalyConditionFieldName] = toSingletonArray(jsonToMetricsAnomalyConditionBlock(dataConditions)) + } } if len(missingDataConditions) > 0 { @@ -1580,6 +1638,36 @@ func jsonToLogsAnomalyConditionBlock(conditions []TriggerCondition) map[string]i return block } +func jsonToMetricsAnomalyConditionBlock(conditions []TriggerCondition) map[string]interface{} { + block := map[string]interface{}{} + + block["direction"] = conditions[0].Direction + block["anomaly_detector_type"] = conditions[0].AnomalyDetectorType + + var criticalDict = dict{} + block["critical"] = toSingletonArray(criticalDict) + + var hasCritical = false + for _, condition := range conditions { + switch condition.TriggerType { + case "Critical": + hasCritical = true + criticalDict["sensitivity"] = condition.Sensitivity + criticalDict["min_anomaly_count"] = condition.MinAnomalyCount + criticalDict["time_range"] = condition.PositiveTimeRange() + case "ResolvedCritical": + hasCritical = true + criticalDict["sensitivity"] = condition.Sensitivity + criticalDict["min_anomaly_count"] = condition.MinAnomalyCount + criticalDict["time_range"] = condition.PositiveTimeRange() + } + } + if !hasCritical { + delete(block, "critical") + } + return block +} + func getAlertBlock(condition TriggerCondition) dict { var alert = dict{} burnRates := make([]interface{}, len(condition.BurnRates)) @@ -1622,6 +1710,7 @@ const metricsMissingDataConditionFieldName = "metrics_missing_data_condition" const sloSLIConditionFieldName = "slo_sli_condition" const sloBurnRateConditionFieldName = "slo_burn_rate_condition" const logsAnomalyConditionFieldName = "logs_anomaly_condition" +const metricsAnomalyConditionFieldName = "metrics_anomaly_condition" const logsStaticConditionDetectionMethod = "LogsStaticCondition" const metricsStaticConditionDetectionMethod = "MetricsStaticCondition" @@ -1632,6 +1721,7 @@ const metricsMissingDataConditionDetectionMethod = "MetricsMissingDataCondition" const sloSLIConditionDetectionMethod = "SloSliCondition" const sloBurnRateConditionDetectionMethod = "SloBurnRateCondition" const logsAnomalyConditionDetectionMethod = "LogsAnomalyCondition" +const metricsAnomalyConditionDetectionMethod = "MetricsAnomalyCondition" func getQueries(d *schema.ResourceData) []MonitorQuery { rawQueries := d.Get("queries").([]interface{}) @@ -1817,7 +1907,8 @@ func (base TriggerCondition) cloneReadingFromNestedBlocks(block map[string]inter resolvedCriticalCondition.OccurrenceType = "" } - if criticalCondition.DetectionMethod == logsAnomalyConditionDetectionMethod { + if (criticalCondition.DetectionMethod == logsAnomalyConditionDetectionMethod) || + (criticalCondition.DetectionMethod == metricsAnomalyConditionDetectionMethod) { criticalCondition.MinAnomalyCount = critical["min_anomaly_count"].(int) criticalCondition.Sensitivity = critical["sensitivity"].(float64) resolvedCriticalCondition.MinAnomalyCount = criticalCondition.MinAnomalyCount diff --git a/sumologic/resource_sumologic_monitors_library_monitor_test.go b/sumologic/resource_sumologic_monitors_library_monitor_test.go index 022f2d7e..0276bdf7 100644 --- a/sumologic/resource_sumologic_monitors_library_monitor_test.go +++ b/sumologic/resource_sumologic_monitors_library_monitor_test.go @@ -1434,6 +1434,16 @@ var exampleLogsAnomalyTriggerConditionBlock = ` } }` +var exampleMetricsAnomalyTriggerConditionBlock = ` + metrics_anomaly_condition { + anomaly_detector_type = "Cluster" + critical { + sensitivity = 0.5 + min_anomaly_count = 5 + time_range = "-1h" + } + }` + func exampleLogsStaticMonitor(testName string) string { query := "error | timeslice 1m | count as field by _timeslice" return exampleMonitorWithTriggerCondition(testName, "Logs", query, @@ -1501,6 +1511,17 @@ func exampleLogsAnomalyMonitor(testName string) string { ) } +func exampleMetricsAnomalyMonitor(testName string) string { + query := "service=auth api=login metric=HTTP_5XX_Count | avg" + return exampleMonitorWithTriggerCondition( + testName, + "Metrics", + query, + exampleMetricsAnomalyTriggerConditionBlock, + []string{"Critical", "ResolvedCritical"}, + ) +} + var allExampleMonitors = []func(testName string) string{ exampleLogsStaticMonitor, exampleLogsStaticMonitorWithResolutionWindow, @@ -1513,6 +1534,7 @@ var allExampleMonitors = []func(testName string) string{ exampleSloSliMonitor, exampleSloBurnRateMonitor, exampleLogsAnomalyMonitor, + exampleMetricsAnomalyMonitor, } func testAccSumologicMonitorsLibraryMonitorWithInvalidTriggerCondition(testName string, triggerCondition string) string { diff --git a/website/docs/r/monitor.html.markdown b/website/docs/r/monitor.html.markdown index d7b84deb..8a20e278 100644 --- a/website/docs/r/monitor.html.markdown +++ b/website/docs/r/monitor.html.markdown @@ -375,6 +375,46 @@ resource "sumologic_monitor" "tf_example_anomaly_monitor" { } ``` +## Example Metrics Anomaly Monitor +```hcl +resource "sumologic_monitor" "tf_example_metrics_anomaly_monitor" { + name = "Example Metrics Anomaly Monitor" + description = "example metrics anomaly monitor" + type = "MonitorsLibraryMonitor" + monitor_type = "Metrics" + is_disabled = false + + queries { + row_id = "A" + query = "service=auth api=login metric=HTTP_5XX_Count | avg" + } + + trigger_conditions { + metrics_anomaly_condition { + anomaly_detector_type = "Cluster" + critical { + sensitivity = 0.4 + min_anomaly_count = 9 + time_range = "-3h" + } + } + } + + notifications { + notification { + connection_type = "Email" + recipients = [ + "anomaly@example.com", + ] + subject = "Monitor Alert: {{TriggerType}} on {{Name}}" + time_zone = "PST" + message_body = "Triggered {{TriggerType}} Alert on {{Name}}: {{QueryURL}}" + } + run_for_trigger_types = ["Critical", "ResolvedCritical"] + } +} +``` + ## Monitor Folders NOTE: Monitor folders are considered a different resource from Library content folders. See [sumologic_monitor_folder][2] for more details. @@ -480,7 +520,8 @@ A `trigger_conditions` block contains one or more subblocks of the following typ - `metrics_missing_data_condition` - `slo_sli_condition` - `slo_burn_rate_condition` -- `log_anomaly_condition` +- `logs_anomaly_condition` +- `metrics_anomaly_condition` Subblocks should be limited to at most 1 missing data condition and at most 1 static / outlier condition. @@ -571,7 +612,7 @@ Here is a summary of arguments for each condition type (fields which are not mar - `burn_rate_threshold` (Required): The burn rate percentage threshold. - `time_range` (Required): The relative time range for the burn rate percentage evaluation. Accepted format: Optional `-` sign followed by `` followed by a `` character: `s` for seconds, `m` for minutes, `h` for hours, `d` for days. Examples: `30m`, `-12h`. -#### log_anomaly_condition +#### logs_anomaly_condition - `field`: The name of the field that the trigger condition will alert on. The trigger could compare the value of specified field with the threshold. If field is not specified, monitor would default to result count instead. - `anomaly_detector_type`: The type of anomaly model that will be used for evaluating this monitor. Possible values are: `Cluster`. - `critical` @@ -579,6 +620,13 @@ Here is a summary of arguments for each condition type (fields which are not mar - `min_anomaly_count` (Required) : The minimum number of anomalies required to exist in the current time range for the condition to trigger. - `time_range` (Required) : The relative time range for anomaly evaluation. Accepted format: Optional `-` sign followed by `` followed by a `` character: `s` for seconds, `m` for minutes, `h` for hours, `d` for days. Examples: `30m`, `-12h`. +#### metrics_anomaly_condition +- `anomaly_detector_type`: The type of anomaly model that will be used for evaluating this monitor. Possible values are: `Cluster`. +- `critical` + - `sensitivity`: The triggering sensitivity of the anomaly model used for this monitor. + - `min_anomaly_count` (Required) : The minimum number of anomalies required to exist in the current time range for the condition to trigger. + - `time_range` (Required) : The relative time range for anomaly evaluation. Accepted format: Optional `-` sign followed by `` followed by a `` character: `s` for seconds, `m` for minutes, `h` for hours, `d` for days. Examples: `30m`, `-12h`. + ## The `triggers` block The `triggers` block is deprecated. Please use `trigger_conditions` to specify notification conditions.