diff --git a/frontend/__snapshots__/exporter-exporter--trends-line-insight-detailed--dark.png b/frontend/__snapshots__/exporter-exporter--trends-line-insight-detailed--dark.png index b54d4facb0bfe..e3e93b3dd9678 100644 Binary files a/frontend/__snapshots__/exporter-exporter--trends-line-insight-detailed--dark.png and b/frontend/__snapshots__/exporter-exporter--trends-line-insight-detailed--dark.png differ diff --git a/frontend/__snapshots__/exporter-exporter--trends-line-insight-detailed--light.png b/frontend/__snapshots__/exporter-exporter--trends-line-insight-detailed--light.png index 332ea24ce3084..d4377362d9270 100644 Binary files a/frontend/__snapshots__/exporter-exporter--trends-line-insight-detailed--light.png and b/frontend/__snapshots__/exporter-exporter--trends-line-insight-detailed--light.png differ diff --git a/frontend/src/lib/components/Alerts/insightAlertsLogic.ts b/frontend/src/lib/components/Alerts/insightAlertsLogic.ts index 6bca4dc317fa1..5ef92b84b8de2 100644 --- a/frontend/src/lib/components/Alerts/insightAlertsLogic.ts +++ b/frontend/src/lib/components/Alerts/insightAlertsLogic.ts @@ -3,7 +3,7 @@ import { loaders } from 'kea-loaders' import api from 'lib/api' import { insightVizDataLogic } from 'scenes/insights/insightVizDataLogic' -import { GoalLine, InsightThresholdType } from '~/queries/schema' +import { AlertConditionType, GoalLine, InsightThresholdType } from '~/queries/schema' import { getBreakdown, isInsightVizNode, isTrendsQuery } from '~/queries/utils' import { InsightLogicProps } from '~/types' @@ -67,6 +67,7 @@ export const insightAlertsLogic = kea([ alerts.flatMap((alert) => { if ( alert.threshold.configuration.type !== InsightThresholdType.ABSOLUTE || + alert.condition.type !== AlertConditionType.ABSOLUTE_VALUE || !alert.threshold.configuration.bounds ) { return [] diff --git a/posthog/api/alert.py b/posthog/api/alert.py index 707db62140c4a..a177c61b0322d 100644 --- a/posthog/api/alert.py +++ b/posthog/api/alert.py @@ -168,7 +168,9 @@ def update(self, instance, validated_data): else: # always store snoozed_until as UTC time # as we look at current UTC time to check when to run alerts - snoozed_until = relative_date_parse(snoozed_until_param, ZoneInfo("UTC"), increase=True) + snoozed_until = relative_date_parse( + snoozed_until_param, ZoneInfo("UTC"), increase=True, always_truncate=True + ) instance.state = AlertState.SNOOZED instance.snoozed_until = snoozed_until diff --git a/posthog/tasks/alerts/checks.py b/posthog/tasks/alerts/checks.py index 4986899047faa..d6f8c020f1d7e 100644 --- a/posthog/tasks/alerts/checks.py +++ b/posthog/tasks/alerts/checks.py @@ -1,10 +1,14 @@ +import math +import time +import traceback + from datetime import datetime, timedelta, UTC from typing import cast from dateutil.relativedelta import relativedelta -import traceback from celery import shared_task from celery.canvas import chain +from django.conf import settings from django.db import transaction import structlog from sentry_sdk import capture_exception @@ -39,7 +43,15 @@ logger = structlog.get_logger(__name__) -class AlertCheckException(Exception): ... +class AlertCheckException(Exception): + """ + Required for custom exceptions to pass stack trace to sentry. + Subclassing through other ways doesn't transfer the traceback. + https://stackoverflow.com/a/69963663/5540417 + """ + + def __init__(self, err: Exception): + self.__traceback__ = err.__traceback__ HOURLY_ALERTS_BACKLOG_GAUGE = Gauge( @@ -102,6 +114,9 @@ def alerts_backlog_task() -> None: DAILY_ALERTS_BACKLOG_GAUGE.set(daily_alerts_breaching_sla) + # sleeping 30s for prometheus to pick up the metrics sent during task + time.sleep(30) + @shared_task( ignore_result=True, @@ -158,6 +173,8 @@ def check_alert_task(alert_id: str) -> None: def check_alert(alert_id: str) -> None: + task_start_time = time.time() + try: alert = AlertConfiguration.objects.get(id=alert_id, enabled=True) except AlertConfiguration.DoesNotExist: @@ -199,8 +216,15 @@ def check_alert(alert_id: str) -> None: check_alert_and_notify_atomically(alert) except Exception as err: ALERT_CHECK_ERROR_COUNTER.inc() + logger.exception(AlertCheckException(err)) - capture_exception(AlertCheckException(err)) + capture_exception( + AlertCheckException(err), + tags={ + "alert_configuration_id": alert_id, + }, + ) + # raise again so alert check is retried depending on error type raise finally: @@ -209,6 +233,15 @@ def check_alert(alert_id: str) -> None: alert.is_calculating = False alert.save() + # only in PROD + if not settings.DEBUG and not settings.TEST: + task_duration = time.time() - task_start_time + + # Ensure task runs at least 40s + # for prometheus to pick up the metrics sent during task + time_left_to_run = 40 - math.floor(task_duration) + time.sleep(time_left_to_run) + @transaction.atomic def check_alert_and_notify_atomically(alert: AlertConfiguration) -> None: diff --git a/posthog/tasks/alerts/test/test_trends_absolute_alerts.py b/posthog/tasks/alerts/test/test_trends_absolute_alerts.py index 9402117e79fe0..48e4228ad0079 100644 --- a/posthog/tasks/alerts/test/test_trends_absolute_alerts.py +++ b/posthog/tasks/alerts/test/test_trends_absolute_alerts.py @@ -1,8 +1,7 @@ from typing import Optional, Any -from unittest.mock import MagicMock, patch +from unittest.mock import ANY, MagicMock, patch import dateutil - from freezegun import freeze_time from posthog.models.alert import AlertCheck @@ -29,7 +28,7 @@ FROZEN_TIME = dateutil.parser.parse("2024-06-02T08:55:00.000Z") -@freeze_time("2024-06-02T08:55:00.000Z") +@freeze_time(FROZEN_TIME) @patch("posthog.tasks.alerts.checks.send_notifications_for_errors") @patch("posthog.tasks.alerts.checks.send_notifications_for_breaches") class TestTimeSeriesTrendsAbsoluteAlerts(APIBaseTest, ClickhouseDestroyTablesMixin): @@ -97,7 +96,7 @@ def create_time_series_trend_insight(self, breakdown: Optional[BreakdownFilter] return insight - def test_alert_properties(self, mock_send_breaches: MagicMock, mock_send_errors: MagicMock) -> None: + def test_alert_lower_threshold_breached(self, mock_send_breaches: MagicMock, mock_send_errors: MagicMock) -> None: insight = self.create_time_series_trend_insight() alert = self.create_alert(insight, series_index=0, lower=1) @@ -119,11 +118,15 @@ def test_alert_properties(self, mock_send_breaches: MagicMock, mock_send_errors: assert alert_check.state == AlertState.FIRING assert alert_check.error is None + mock_send_breaches.assert_called_once_with( + ANY, ["The insight value for previous week is (0) less than lower threshold (1.0)"] + ) + def test_trend_high_threshold_breached(self, mock_send_breaches: MagicMock, mock_send_errors: MagicMock) -> None: insight = self.create_time_series_trend_insight() alert = self.create_alert(insight, series_index=0, upper=1) - with freeze_time("2024-06-02T07:55:00.000Z"): + with freeze_time(FROZEN_TIME - dateutil.relativedelta.relativedelta(days=1)): _create_event( team=self.team, event="signed_up", @@ -149,11 +152,15 @@ def test_trend_high_threshold_breached(self, mock_send_breaches: MagicMock, mock assert alert_check.state == AlertState.FIRING assert alert_check.error is None + mock_send_breaches.assert_called_once_with( + ANY, ["The insight value for previous week is (2) more than upper threshold (1.0)"] + ) + def test_trend_no_threshold_breached(self, mock_send_breaches: MagicMock, mock_send_errors: MagicMock) -> None: insight = self.create_time_series_trend_insight() alert = self.create_alert(insight, series_index=0, lower=0, upper=2) - with freeze_time("2024-06-02T07:55:00.000Z"): + with freeze_time(FROZEN_TIME - dateutil.relativedelta.relativedelta(days=1)): _create_event( team=self.team, event="signed_up", @@ -172,45 +179,3 @@ def test_trend_no_threshold_breached(self, mock_send_breaches: MagicMock, mock_s assert alert_check.calculated_value == 1 assert alert_check.state == AlertState.NOT_FIRING assert alert_check.error is None - - # TODO: support breakdowns - def test_trend_with_single_breakdown_threshold_breached( - self, mock_send_breaches: MagicMock, mock_send_errors: MagicMock - ) -> None: - insight = self.create_time_series_trend_insight( - breakdown=BreakdownFilter(breakdown_type="event", breakdown="$browser") - ) - alert = self.create_alert(insight, series_index=0, lower=0, upper=1) - - with freeze_time("2024-06-02T07:55:00.000Z"): - _create_event( - team=self.team, - event="signed_up", - distinct_id="1", - properties={"$browser": "Chrome"}, - ) - _create_event( - team=self.team, - event="signed_up", - distinct_id="2", - properties={"$browser": "Chrome"}, - ) - _create_event( - team=self.team, - event="signed_up", - distinct_id="1", - properties={"$browser": "Firefox"}, - ) - flush_persons_and_events() - - check_alert(alert["id"]) - - updated_alert = AlertConfiguration.objects.get(pk=alert["id"]) - assert updated_alert.state == AlertState.FIRING - assert updated_alert.next_check_at == FROZEN_TIME + dateutil.relativedelta.relativedelta(days=1) - - alert_check = AlertCheck.objects.filter(alert_configuration=alert["id"]).latest("created_at") - # calculated value should only be from browser = Chrome - assert alert_check.calculated_value == 2 - assert alert_check.state == AlertState.FIRING - assert alert_check.error is None diff --git a/posthog/tasks/alerts/test/test_trends_relative_alerts.py b/posthog/tasks/alerts/test/test_trends_relative_alerts.py index 6e5b17b633894..e6007848e3c92 100644 --- a/posthog/tasks/alerts/test/test_trends_relative_alerts.py +++ b/posthog/tasks/alerts/test/test_trends_relative_alerts.py @@ -1,5 +1,5 @@ from typing import Optional, Any -from unittest.mock import MagicMock, patch +from unittest.mock import ANY, MagicMock, patch import dateutil @@ -184,6 +184,10 @@ def test_relative_increase_absolute_upper_threshold_breached( assert alert_check.state == AlertState.FIRING assert alert_check.error is None + mock_send_breaches.assert_called_once_with( + ANY, ["The insight value for previous week increased (2) more than upper threshold (1.0)"] + ) + def test_relative_increase_upper_threshold_breached( self, mock_send_breaches: MagicMock, mock_send_errors: MagicMock ) -> None: @@ -341,6 +345,10 @@ def test_relative_increase_lower_threshold_breached_1( assert alert_check.state == AlertState.FIRING assert alert_check.error is None + mock_send_breaches.assert_called_once_with( + ANY, ["The insight value for previous week increased (-1) less than lower threshold (2.0)"] + ) + # check percentage alert check_alert(percentage_alert["id"]) @@ -354,6 +362,10 @@ def test_relative_increase_lower_threshold_breached_1( assert alert_check.state == AlertState.FIRING assert alert_check.error is None + mock_send_breaches.assert_called_with( + ANY, ["The insight value for previous week increased (-50.00%) less than lower threshold (50.00%)"] + ) + def test_relative_increase_lower_threshold_breached_2( self, mock_send_breaches: MagicMock, mock_send_errors: MagicMock ) -> None: @@ -511,6 +523,10 @@ def test_relative_decrease_upper_threshold_breached( assert alert_check.state == AlertState.FIRING assert alert_check.error is None + mock_send_breaches.assert_called_once_with( + ANY, ["The insight value for previous week decreased (2) more than upper threshold (1.0)"] + ) + check_alert(percentage_alert["id"]) updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) @@ -523,6 +539,10 @@ def test_relative_decrease_upper_threshold_breached( assert alert_check.state == AlertState.FIRING assert alert_check.error is None + mock_send_breaches.assert_called_with( + ANY, ["The insight value for previous week decreased (66.67%) more than upper threshold (20.00%)"] + ) + def test_relative_decrease_lower_threshold_breached( self, mock_send_breaches: MagicMock, mock_send_errors: MagicMock ) -> None: @@ -592,6 +612,10 @@ def test_relative_decrease_lower_threshold_breached( assert alert_check.state == AlertState.FIRING assert alert_check.error is None + mock_send_breaches.assert_called_once_with( + ANY, ["The insight value for previous week decreased (1) less than lower threshold (2.0)"] + ) + check_alert(percentage_alert["id"]) updated_alert = AlertConfiguration.objects.get(pk=percentage_alert["id"]) @@ -604,6 +628,10 @@ def test_relative_decrease_lower_threshold_breached( assert alert_check.state == AlertState.FIRING assert alert_check.error is None + mock_send_breaches.assert_called_with( + ANY, ["The insight value for previous week decreased (50.00%) less than lower threshold (80.00%)"] + ) + def test_relative_increase_no_threshold_breached( self, mock_send_breaches: MagicMock, mock_send_errors: MagicMock ) -> None: diff --git a/posthog/tasks/alerts/trends.py b/posthog/tasks/alerts/trends.py index 3f7fdebae2644..ca357e021470d 100644 --- a/posthog/tasks/alerts/trends.py +++ b/posthog/tasks/alerts/trends.py @@ -62,7 +62,7 @@ def check_trends_alert(alert: AlertConfiguration, insight: Insight, query: Trend # want value for current interval (last hour, last day, last week, last month) # depending on the alert calculation interval if _is_non_time_series_trend(query): - filters_override = _date_range_override_for_intervals(query) + filters_override = _date_range_override_for_intervals(query, last_x_intervals=2) else: # for non time series, it's an aggregated value for full interval # so we need to compute full insight @@ -79,10 +79,12 @@ def check_trends_alert(alert: AlertConfiguration, insight: Insight, query: Trend if not calculation_result.result: raise RuntimeError(f"No results found for insight with alert id = {alert.id}") - current_interval_value = _pick_interval_value_from_trend_result(config, query, calculation_result) - breaches = _validate_bounds(threshold.bounds, current_interval_value) + prev_interval_value = _pick_interval_value_from_trend_result(config, query, calculation_result, -1) + breaches = _validate_bounds( + threshold.bounds, prev_interval_value, threshold.type, condition.type, query.interval + ) - return AlertEvaluationResult(value=current_interval_value, breaches=breaches) + return AlertEvaluationResult(value=prev_interval_value, breaches=breaches) case AlertConditionType.RELATIVE_INCREASE: if _is_non_time_series_trend(query): @@ -107,10 +109,10 @@ def check_trends_alert(alert: AlertConfiguration, insight: Insight, query: Trend if threshold.type == InsightThresholdType.ABSOLUTE: increase = prev_interval_value - prev_prev_interval_value - breaches = _validate_bounds(threshold.bounds, increase) + breaches = _validate_bounds(threshold.bounds, increase, threshold.type, condition.type, query.interval) elif threshold.type == InsightThresholdType.PERCENTAGE: increase = (prev_interval_value - prev_prev_interval_value) / prev_prev_interval_value - breaches = _validate_bounds(threshold.bounds, increase, is_percentage=True) + breaches = _validate_bounds(threshold.bounds, increase, threshold.type, condition.type, query.interval) else: raise ValueError( f"Neither relative nor absolute threshold configured for alert condition RELATIVE_INCREASE" @@ -141,10 +143,10 @@ def check_trends_alert(alert: AlertConfiguration, insight: Insight, query: Trend if threshold.type == InsightThresholdType.ABSOLUTE: decrease = prev_prev_interval_value - prev_interval_value - breaches = _validate_bounds(threshold.bounds, decrease) + breaches = _validate_bounds(threshold.bounds, decrease, threshold.type, condition.type, query.interval) elif threshold.type == InsightThresholdType.PERCENTAGE: decrease = (prev_prev_interval_value - prev_interval_value) / prev_prev_interval_value - breaches = _validate_bounds(threshold.bounds, decrease, is_percentage=True) + breaches = _validate_bounds(threshold.bounds, decrease, threshold.type, condition.type, query.interval) else: raise ValueError( f"Neither relative nor absolute threshold configured for alert condition RELATIVE_INCREASE" @@ -202,18 +204,36 @@ def _pick_interval_value_from_trend_result( def _validate_bounds( - bounds: InsightsThresholdBounds | None, calculated_value: float, is_percentage: bool = False + bounds: InsightsThresholdBounds | None, + calculated_value: float, + threshold_type: InsightThresholdType, + condition_type: AlertConditionType, + interval_type: IntervalType, ) -> list[str]: if not bounds: return [] + is_percentage = threshold_type == InsightThresholdType.PERCENTAGE + formatted_value = f"{calculated_value:.2%}" if is_percentage else calculated_value + match condition_type: + case AlertConditionType.ABSOLUTE_VALUE: + condition_text = "is" + case AlertConditionType.RELATIVE_INCREASE: + condition_text = "increased" + case AlertConditionType.RELATIVE_DECREASE: + condition_text = "decreased" + if bounds.lower is not None and calculated_value < bounds.lower: lower_value = f"{bounds.lower:.2%}" if is_percentage else bounds.lower - return [f"The trend value ({formatted_value}) is below the lower threshold ({lower_value})"] + return [ + f"The insight value for previous {interval_type} {condition_text} ({formatted_value}) less than lower threshold ({lower_value})" + ] if bounds.upper is not None and calculated_value > bounds.upper: upper_value = f"{bounds.upper:.2%}" if is_percentage else bounds.upper - return [f"The trend value ({formatted_value}) is above the upper threshold ({upper_value})"] + return [ + f"The insight value for previous {interval_type} {condition_text} ({formatted_value}) more than upper threshold ({upper_value})" + ] return []