forked from keephq/keep
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add datadog provider (keephq#136)
- Loading branch information
Showing
6 changed files
with
300 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
123 changes: 123 additions & 0 deletions
123
keep/providers/datadog_provider/datadog_alert_format_description.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
from typing import Literal | ||
|
||
from pydantic import BaseModel, Field | ||
|
||
|
||
class Thresholds(BaseModel): | ||
critical: float | ||
critical_recovery: float | ||
ok: float | ||
warning: float | ||
warning_recovery: float | ||
unknown: float | ||
|
||
|
||
class EvaluationWindow(BaseModel): | ||
day_starts: str | ||
hour_starts: int | ||
month_starts: int | ||
|
||
|
||
class SchedulingOptions(BaseModel): | ||
evaluation_window: EvaluationWindow | ||
|
||
|
||
class ThresholdWindows(BaseModel): | ||
recovery_window: str | ||
trigger_window: str | ||
|
||
|
||
class DatadogOptions(BaseModel): | ||
enable_logs_sample: bool | ||
enable_samples: bool | ||
escalation_message: str | ||
evaluation_delay: int | ||
group_retention_duration: str | ||
grouby_simple_monitor: bool | ||
include_tags: bool | ||
locked: bool | ||
min_failure_duration: int | ||
min_location_failed: int | ||
new_group_delay: int | ||
new_host_delay: int | ||
no_data_timeframe: int | ||
notification_preset_name: Literal[ | ||
"show_all", "hide_query", "hide_handles", "hide_all" | ||
] | ||
notify_audit: bool | ||
notify_by: list[str] | ||
notify_no_data: bool | ||
on_missing_data: Literal[ | ||
"default", "show_no_data", "show_and_notify_no_data", "resolve" | ||
] | ||
renotify_interval: int | ||
renotify_occurrences: int | ||
renotify_statuses: list[str] | ||
require_full_window: bool | ||
cheduling_options: SchedulingOptions | ||
silenced: dict | ||
threshold_windows: ThresholdWindows | ||
# thresholds: Thresholds | ||
timeout_h: int | ||
|
||
|
||
class DatadogAlertFormatDescription(BaseModel): | ||
message: str = Field( | ||
..., description="A message to include with notifications for this monitor." | ||
) | ||
name: str = Field(..., description="The name of the monitor.") | ||
options: DatadogOptions | ||
priority: int = Field(..., description="The priority of the monitor.", min=1, max=5) | ||
query: str = Field(..., description="The query to monitor.", required=True) | ||
tags: list[str] | ||
type: Literal[ | ||
"composite", | ||
"event alert", | ||
"log alert", | ||
"metric alert", | ||
"process alert", | ||
"query alert", | ||
"rum alert", | ||
"service check", | ||
"synthetics alert", | ||
"trace-analytics alert", | ||
"slo alert", | ||
"event-v2 alert", | ||
"audit alert", | ||
"ci-pipelines alert", | ||
"ci-tests alert", | ||
"error-tracking alert", | ||
] | ||
|
||
class Config: | ||
schema_extra = { | ||
"example": { | ||
"name": "Example-Monitor", | ||
"type": "rum alert", | ||
"query": 'formula("query2 / query1 * 100").last("15m") >= 0.8', | ||
"message": "some message Notify: @hipchat-channel", | ||
"tags": ["test:examplemonitor", "env:ci"], | ||
"priority": 3, | ||
"options": { | ||
"thresholds": {"critical": 0.8}, | ||
"variables": [ | ||
{ | ||
"data_source": "rum", | ||
"name": "query2", | ||
"search": {"query": ""}, | ||
"indexes": ["*"], | ||
"compute": {"aggregation": "count"}, | ||
"group_by": [], | ||
}, | ||
{ | ||
"data_source": "rum", | ||
"name": "query1", | ||
"search": {"query": "status:error"}, | ||
"indexes": ["*"], | ||
"compute": {"aggregation": "count"}, | ||
"group_by": [], | ||
}, | ||
], | ||
}, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
""" | ||
Datadog Provider is a class that allows to ingest/digest data from Datadog. | ||
""" | ||
import datetime | ||
import time | ||
|
||
import pydantic | ||
from datadog_api_client import ApiClient, Configuration | ||
from datadog_api_client.v1.api.logs_api import LogsApi | ||
from datadog_api_client.v1.api.metrics_api import MetricsApi | ||
from datadog_api_client.v1.api.monitors_api import MonitorsApi | ||
from datadog_api_client.v1.model.monitor import Monitor | ||
from datadog_api_client.v1.model.monitor_type import MonitorType | ||
|
||
from keep.providers.base.base_provider import BaseProvider | ||
from keep.providers.datadog_provider.datadog_alert_format_description import ( | ||
DatadogAlertFormatDescription, | ||
) | ||
from keep.providers.models.provider_config import ProviderConfig | ||
from keep.providers.providers_factory import ProvidersFactory | ||
|
||
|
||
@pydantic.dataclasses.dataclass | ||
class DatadogAuthConfig: | ||
""" | ||
Datadog authentication configuration. | ||
""" | ||
|
||
api_key: str | ||
app_key: str | ||
|
||
|
||
class DatadogProvider(BaseProvider): | ||
""" | ||
Datadog provider class. | ||
""" | ||
|
||
def convert_to_seconds(s): | ||
seconds_per_unit = {"s": 1, "m": 60, "h": 3600, "d": 86400, "w": 604800} | ||
return int(s[:-1]) * seconds_per_unit[s[-1]] | ||
|
||
def __init__(self, provider_id: str, config: ProviderConfig): | ||
super().__init__(provider_id, config) | ||
self.configuration = Configuration() | ||
self.configuration.api_key["apiKeyAuth"] = self.authentication_config.api_key | ||
self.configuration.api_key["appKeyAuth"] = self.authentication_config.app_key | ||
|
||
def dispose(self): | ||
""" | ||
Dispose the provider. | ||
""" | ||
pass | ||
|
||
def validate_config(self): | ||
""" | ||
Validates required configuration for Datadog provider. | ||
""" | ||
self.authentication_config = DatadogAuthConfig(**self.config.authentication) | ||
|
||
def query(self, **kwargs: dict): | ||
query = kwargs.get("query") | ||
timeframe = kwargs.get("timeframe") | ||
timeframe_in_seconds = DatadogProvider.convert_to_seconds(timeframe) | ||
query_type = kwargs.get("query_type") | ||
if query_type == "logs": | ||
with ApiClient(self.configuration) as api_client: | ||
api = LogsApi(api_client) | ||
results = api.list_logs( | ||
body={ | ||
"query": query, | ||
"time": { | ||
"_from": datetime.datetime.fromtimestamp( | ||
time.time() - (timeframe_in_seconds) | ||
), | ||
"to": datetime.datetime.fromtimestamp(time.time()), | ||
}, | ||
} | ||
) | ||
elif query_type == "metrics": | ||
with ApiClient(self.configuration) as api_client: | ||
api = MetricsApi(api_client) | ||
results = api.query_metrics( | ||
query=query, | ||
_from=time.time() - (timeframe_in_seconds * 1000), | ||
to=time.time(), | ||
) | ||
return results | ||
|
||
def get_alerts(self, alert_id: str | None = None): | ||
with ApiClient(self.configuration) as api_client: | ||
api = MonitorsApi(api_client) | ||
monitors = api.list_monitors() | ||
monitors = [monitor.to_dict() for monitor in monitors] | ||
if alert_id: | ||
monitors = list( | ||
filter(lambda monitor: monitor["id"] == alert_id, monitors) | ||
) | ||
return monitors | ||
|
||
def deploy_alert(self, alert: dict, alert_id: str | None = None): | ||
body = Monitor(**alert) | ||
with ApiClient(self.configuration) as api_client: | ||
api_instance = MonitorsApi(api_client) | ||
try: | ||
response = api_instance.create_monitor(body=body) | ||
except Exception as e: | ||
raise Exception({"message": e.body["errors"][0]}) | ||
return response | ||
|
||
@staticmethod | ||
def get_alert_format_description(): | ||
return DatadogAlertFormatDescription.schema() | ||
|
||
|
||
if __name__ == "__main__": | ||
# Output debug messages | ||
import logging | ||
|
||
logging.basicConfig(level=logging.DEBUG, handlers=[logging.StreamHandler()]) | ||
|
||
# Load environment variables | ||
import os | ||
|
||
api_key = os.environ.get("DATADOG_API_KEY") | ||
app_key = os.environ.get("DATADOG_APP_KEY") | ||
|
||
config = { | ||
"authentication": {"api_key": api_key, "app_key": app_key}, | ||
} | ||
provider = ProvidersFactory.get_provider( | ||
provider_id="datadog-keephq", provider_type="datadog", provider_config=config | ||
) | ||
results = provider.query( | ||
query="service:keep-github-app status:error", timeframe="4w", query_type="logs" | ||
) | ||
""" | ||
alerts = provider.deploy_alert( | ||
{ | ||
"name": "Error Rate Alert", | ||
"type": "metric alert", | ||
"query": "sum:myapp.server.errors{service:talboren/simple-crud-service}.as_count().rollup(sum, 600) > 5", | ||
"message": "The error rate for talboren/simple-crud-service has exceeded 5% in the last 10 minutes. Please investigate immediately", | ||
"tags": ["service:talboren/simple-crud-service", "severity:critical"], | ||
"options": { | ||
"thresholds": {"critical": 5}, | ||
"notify_audit": False, | ||
"notify_no_data": False, | ||
"require_full_window": True, | ||
"timeout_h": 1, | ||
"silenced": {}, | ||
}, | ||
"restricted_roles": [], | ||
"priority": 2, | ||
} | ||
) | ||
""" | ||
print(alerts) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters