-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
refactor: Remove early row count and update batch export metrics #22810
Changes from 7 commits
b671c33
7490b91
3c9700b
f07d3bd
1fb4d16
94712b6
caab583
1bfe5b4
1e75757
0c31647
72f7d4a
7023c6f
c97a80e
9690202
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,8 +2,7 @@ | |
import uuid | ||
from typing import Any | ||
|
||
from django.db.models import Q, Sum | ||
from django.db.models.functions import Coalesce, TruncDay | ||
from django.db.models.functions import TruncDay | ||
from rest_framework import mixins, request, response, viewsets | ||
from rest_framework.decorators import action | ||
|
||
|
@@ -24,6 +23,7 @@ | |
AppMetricsRequestSerializer, | ||
) | ||
from posthog.utils import relative_date_parse | ||
from posthog.batch_exports.models import fetch_batch_export_run_count | ||
|
||
|
||
class AppMetricsViewSet(TeamAndOrgViewSetMixin, mixins.RetrieveModelMixin, viewsets.GenericViewSet): | ||
|
@@ -32,11 +32,8 @@ class AppMetricsViewSet(TeamAndOrgViewSetMixin, mixins.RetrieveModelMixin, views | |
|
||
def retrieve(self, request: request.Request, *args: Any, **kwargs: Any) -> response.Response: | ||
try: | ||
rows = self.get_batch_export_runs_app_metrics_queryset(batch_export_id=kwargs["pk"]) | ||
dates, successes, failures = self.get_batch_export_runs_app_metrics_queryset(batch_export_id=kwargs["pk"]) | ||
|
||
dates = [row["dates"].strftime("%Y-%m-%d") for row in rows] | ||
successes = [row["successes"] for row in rows] | ||
failures = [row["failures"] for row in rows] | ||
return response.Response( | ||
{ | ||
"metrics": { | ||
|
@@ -83,30 +80,7 @@ def error_details(self, request: request.Request, *args: Any, **kwargs: Any) -> | |
return response.Response({"result": error_details}) | ||
|
||
def get_batch_export_runs_app_metrics_queryset(self, batch_export_id: str): | ||
"""Use the Django ORM to fetch app metrics for batch export runs. | ||
|
||
Attempts to (roughly) match the following (much more readable) query: | ||
``` | ||
select | ||
date_trunc('day', last_updated_at) as dates, | ||
sum(case when status = 'Completed' then coalesce(records_total_count, 0) else 0) as successes, | ||
sum(case when status != 'Completed' then coalesce(records_total_count, 0) else 0) as failures | ||
from | ||
posthog_batchexportrun | ||
where | ||
batch_export_id = :batch_export_id | ||
and last_updated_at between :date_from and :date_to | ||
and status != 'Running' | ||
group by | ||
date_trunc('day', last_updated_at) | ||
order by | ||
dates | ||
``` | ||
|
||
A truncated 'last_updated_at' is used as the grouping date as it reflects when a particular run | ||
was last updated. It feels easier to explain to users that if they see metrics for today, those | ||
correspond to runs that happened today, even if the runs themselves exported data from a year ago | ||
(because it was a backfill). | ||
"""Use the Django ORM and ClickHouse to fetch app metrics for batch export runs. | ||
|
||
Raises: | ||
ValueError: If provided 'batch_export_id' is not a valid UUID. | ||
|
@@ -120,22 +94,52 @@ def get_batch_export_runs_app_metrics_queryset(self, batch_export_id: str): | |
relative_date_parse(before, self.team.timezone_info) if before else dt.datetime.now(dt.timezone.utc) | ||
) | ||
date_range = (after_datetime, before_datetime) | ||
return ( | ||
BatchExportRun.objects.filter(batch_export_id=batch_export_uuid, last_updated_at__range=date_range) | ||
.annotate(dates=TruncDay("last_updated_at")) | ||
.values("dates") | ||
.annotate( | ||
successes=Sum( | ||
Coalesce("records_total_count", 0), filter=Q(status=BatchExportRun.Status.COMPLETED), default=0 | ||
), | ||
failures=Sum( | ||
Coalesce("records_total_count", 0), filter=~Q(status=BatchExportRun.Status.COMPLETED), default=0 | ||
runs = ( | ||
BatchExportRun.objects.filter( | ||
batch_export_id=batch_export_uuid, | ||
last_updated_at__range=date_range, | ||
status__in=( | ||
BatchExportRun.Status.COMPLETED, | ||
BatchExportRun.Status.FAILED, | ||
BatchExportRun.Status.FAILED_RETRYABLE, | ||
), | ||
) | ||
.order_by("dates") | ||
.annotate(day=TruncDay("last_updated_at")) | ||
.order_by("day") | ||
.all() | ||
) | ||
|
||
dates = [] | ||
successes = [] | ||
failures = [] | ||
current_day: dt.datetime | None = None | ||
for run in runs: | ||
if current_day is None: | ||
current_day = run.day | ||
dates.append(run.day.strftime("%Y-%m-%d")) | ||
successes.append(0) | ||
failures.append(0) | ||
|
||
elif current_day < run.day: | ||
current_day = run.day | ||
dates.append(run.day.strftime("%Y-%m-%d")) | ||
successes.append(0) | ||
failures.append(0) | ||
|
||
count = fetch_batch_export_run_count( | ||
team_id=run.batch_export.team_id, | ||
data_interval_start=run.data_interval_start, | ||
data_interval_end=run.data_interval_end, | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
ok, lets instead look at time ranges exported which is easy to do over runs, we can make all the graphs say "exports succeeded" / "exports failed" (because it's the latest run only not all runs) instead of "events sent" / "events failed" in the UI and we can remove this. |
||
|
||
if run.status == BatchExportRun.Status.COMPLETED: | ||
successes[-1] += count | ||
|
||
elif run.status in (BatchExportRun.Status.FAILED, BatchExportRun.Status.FAILED_RETRYABLE): | ||
failures[-1] += count | ||
|
||
return dates, successes, failures | ||
|
||
|
||
class HistoricalExportsAppMetricsViewSet( | ||
TeamAndOrgViewSetMixin, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
wouldn't this get all the runs, not just the latest for that date range? We only want the latest, if something failed, but was retried, then we'd only want to show success in the sparkgraph and metrics (because a user ultimately doesn't care if it was retried as long as it was exported same as webhooks).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if nothing failed but it was retried (manually, by the user) anyways? I think it makes sense to show duplicates if the user requests duplicates. So, we need all runs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moreover, no new runs are automatically created in the event of failure, unless manually requested by us or users. Retries are part of a run, not a separate run.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍 would you mind changing the labels in the UI too with this PR 'events' -> 'runs' in that case?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, latest commit changed this to runs. I'll make the UI change next.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done in latest commit, looks like this: