From e50be8620332eb40cb78b1b48a57c1c4fd87021a Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Sat, 19 Oct 2024 12:00:19 +0100 Subject: [PATCH 01/11] fix: smaller cohort resource usage --- .../session_recording_list_from_filters.py | 75 +++++++++++++++- ...t_session_recording_list_from_filters.ambr | 87 ++++++++++++------- 2 files changed, 126 insertions(+), 36 deletions(-) diff --git a/posthog/session_recordings/queries/session_recording_list_from_filters.py b/posthog/session_recordings/queries/session_recording_list_from_filters.py index 354e4232cee84..1fa9573a1c561 100644 --- a/posthog/session_recordings/queries/session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/session_recording_list_from_filters.py @@ -33,6 +33,14 @@ def is_group_property(p: Property) -> bool: return p.type == "group" +def is_dynamic_cohort_property(p: Property) -> bool: + return p.type == "cohort" + + +def is_static_cohort_property(p: Property) -> bool: + return p.type == "precalculated-cohort" + + class SessionRecordingQueryResult(NamedTuple): results: list has_more_recording: bool @@ -224,7 +232,17 @@ def _where_predicates(self) -> Union[ast.And, ast.Or]: ) ) - remaining_properties = self._strip_person_and_event_properties(self._filter.property_groups) + cohort_subquery = CohortPropertyGroupsSubQuery(self._team, self._filter, self.ttl_days).get_queries() + if cohort_subquery: + optional_exprs.append( + ast.CompareOperation( + op=ast.CompareOperationOp.In, + left=ast.Field(chain=["s", "distinct_id"]), + right=cohort_subquery, + ) + ) + + remaining_properties = self._strip_person_and_event_and_cohort_properties(self._filter.property_groups) if remaining_properties: logger.info( "session_replay_query_builder has unhandled properties", unhandled_properties=remaining_properties @@ -267,11 +285,15 @@ def _where_predicates(self) -> Union[ast.And, ast.Or]: def _having_predicates(self) -> ast.Expr: return property_to_expr(self._filter.having_predicates, team=self._team, scope="replay") - def _strip_person_and_event_properties(self, property_group: PropertyGroup) -> PropertyGroup | None: + def _strip_person_and_event_and_cohort_properties(self, property_group: PropertyGroup) -> PropertyGroup | None: property_groups_to_keep = [ g for g in property_group.flat - if not is_event_property(g) and not is_person_property(g) and not is_group_property(g) + if not is_event_property(g) + and not is_person_property(g) + and not is_group_property(g) + and not is_dynamic_cohort_property(g) + and not is_static_cohort_property(g) ] return ( @@ -334,6 +356,53 @@ def _where_predicates(self) -> ast.Expr: ) +class CohortPropertyGroupsSubQuery: + _team: Team + _filter: SessionRecordingsFilter + _ttl_days: int + + raw_cohort_to_distinct_id = """ + select distinct_id + from person_distinct_ids + where person_id IN ( + SELECT person_id + FROM cohort_people + WHERE + {where_predicates} + ) + """ + + def __init__(self, team: Team, filter: SessionRecordingsFilter, ttl_days: int): + self._team = team + self._filter = filter + self._ttl_days = ttl_days + + def get_queries(self) -> ast.SelectQuery | ast.SelectUnionQuery | None: + if self.cohort_properties: + return parse_select( + self.raw_cohort_to_distinct_id, + {"where_predicates": property_to_expr(self.cohort_properties, team=self._team, scope="replay")}, + ) + + return None + + @cached_property + def cohort_properties(self) -> PropertyGroup | None: + cohort_property_groups = [ + g + for g in self._filter.property_groups.flat + if is_dynamic_cohort_property(g) or is_static_cohort_property(g) + ] + return ( + PropertyGroup( + type=self._filter.property_operand, + values=cohort_property_groups, + ) + if cohort_property_groups + else None + ) + + class PersonsIdCompareOperation: _team: Team _filter: SessionRecordingsFilter diff --git a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr index 72161f92f2223..86ac433b2ca83 100644 --- a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr +++ b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr @@ -3161,17 +3161,24 @@ sum(s.console_error_count) AS console_error_count, ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')), 0) AS ongoing FROM session_replay_events AS s - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS s__pdi ON equals(s.distinct_id, s__pdi.distinct_id) - WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), ifNull(in(s__pdi.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)) + WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), in(s.distinct_id, + (SELECT person_distinct_ids.distinct_id AS distinct_id + FROM + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids + WHERE ifNull(in(person_distinct_ids.person_id, + (SELECT cohort_people.person_id AS person_id + FROM + (SELECT DISTINCT cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(1, 0)]), 0))) AS cohort_people + WHERE ifNull(in(cohort_people.person_id, + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0))), 0)))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -3224,22 +3231,29 @@ sum(s.console_error_count) AS console_error_count, ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')), 0) AS ongoing FROM session_replay_events AS s - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS s__pdi ON equals(s.distinct_id, s__pdi.distinct_id) WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), and(in(s.session_id, (SELECT events.`$session_id` AS session_id FROM events WHERE and(equals(events.team_id, 2), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-08-13 23:58:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), and(equals(events.event, '$pageview'), 1)) GROUP BY events.`$session_id` - HAVING hasAll(groupUniqArray(events.event), ['$pageview']))), ifNull(in(s__pdi.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0))) + HAVING hasAll(groupUniqArray(events.event), ['$pageview']))), in(s.distinct_id, + (SELECT person_distinct_ids.distinct_id AS distinct_id + FROM + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids + WHERE ifNull(in(person_distinct_ids.person_id, + (SELECT cohort_people.person_id AS person_id + FROM + (SELECT DISTINCT cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(3, 0)]), 0))) AS cohort_people + WHERE ifNull(in(cohort_people.person_id, + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0))), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -3272,22 +3286,29 @@ sum(s.console_error_count) AS console_error_count, ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')), 0) AS ongoing FROM session_replay_events AS s - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS s__pdi ON equals(s.distinct_id, s__pdi.distinct_id) WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), and(in(s.session_id, (SELECT events.`$session_id` AS session_id FROM events WHERE and(equals(events.team_id, 2), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-08-13 23:58:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), and(equals(events.event, 'custom_event'), 1)) GROUP BY events.`$session_id` - HAVING hasAll(groupUniqArray(events.event), ['custom_event']))), ifNull(in(s__pdi.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0))) + HAVING hasAll(groupUniqArray(events.event), ['custom_event']))), in(s.distinct_id, + (SELECT person_distinct_ids.distinct_id AS distinct_id + FROM + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids + WHERE ifNull(in(person_distinct_ids.person_id, + (SELECT cohort_people.person_id AS person_id + FROM + (SELECT DISTINCT cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(3, 0)]), 0))) AS cohort_people + WHERE ifNull(in(cohort_people.person_id, + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0))), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC From afa55689273b2371ab22a3733b55868b6a6979df Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Sat, 19 Oct 2024 12:08:27 +0100 Subject: [PATCH 02/11] better way of warning when remaining properties are being queries --- .../queries/session_recording_list_from_filters.py | 10 +++++++--- .../test_session_recording_list_from_filters.ambr | 6 +++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/posthog/session_recordings/queries/session_recording_list_from_filters.py b/posthog/session_recordings/queries/session_recording_list_from_filters.py index 1fa9573a1c561..8f2f475fad372 100644 --- a/posthog/session_recordings/queries/session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/session_recording_list_from_filters.py @@ -2,6 +2,8 @@ from typing import Any, NamedTuple, cast, Optional, Union from datetime import datetime, timedelta +import posthoganalytics + from posthog.hogql import ast from posthog.hogql.ast import CompareOperation from posthog.hogql.parser import parse_select @@ -47,6 +49,10 @@ class SessionRecordingQueryResult(NamedTuple): timings: list[QueryTiming] | None = None +class UnexpectedQueryProperties(Exception): + pass + + class SessionRecordingListFromFilters: SESSION_RECORDINGS_DEFAULT_LIMIT = 50 @@ -244,9 +250,7 @@ def _where_predicates(self) -> Union[ast.And, ast.Or]: remaining_properties = self._strip_person_and_event_and_cohort_properties(self._filter.property_groups) if remaining_properties: - logger.info( - "session_replay_query_builder has unhandled properties", unhandled_properties=remaining_properties - ) + posthoganalytics.capture_exception(UnexpectedQueryProperties(remaining_properties)) optional_exprs.append(property_to_expr(remaining_properties, team=self._team, scope="replay")) if self._filter.console_log_filters.values: diff --git a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr index 86ac433b2ca83..7666d8bcac445 100644 --- a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr +++ b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr @@ -3174,7 +3174,7 @@ FROM (SELECT DISTINCT cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(1, 0)]), 0))) AS cohort_people + WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(13, 0)]), 0))) AS cohort_people WHERE ifNull(in(cohort_people.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople @@ -3249,7 +3249,7 @@ FROM (SELECT DISTINCT cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(3, 0)]), 0))) AS cohort_people + WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(15, 0)]), 0))) AS cohort_people WHERE ifNull(in(cohort_people.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople @@ -3304,7 +3304,7 @@ FROM (SELECT DISTINCT cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(3, 0)]), 0))) AS cohort_people + WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(15, 0)]), 0))) AS cohort_people WHERE ifNull(in(cohort_people.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople From ffe67b6c6db5f6536dfc7c19c27789e86233938b Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 19 Oct 2024 11:42:39 +0000 Subject: [PATCH 03/11] Update query snapshots --- .../test_session_recording_list_from_filters.ambr | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr index 7666d8bcac445..39da8f56c4092 100644 --- a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr +++ b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr @@ -3174,7 +3174,7 @@ FROM (SELECT DISTINCT cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(13, 0)]), 0))) AS cohort_people + WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(35, 0)]), 0))) AS cohort_people WHERE ifNull(in(cohort_people.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople @@ -3249,7 +3249,7 @@ FROM (SELECT DISTINCT cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(15, 0)]), 0))) AS cohort_people + WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(37, 0)]), 0))) AS cohort_people WHERE ifNull(in(cohort_people.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople @@ -3304,7 +3304,7 @@ FROM (SELECT DISTINCT cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(15, 0)]), 0))) AS cohort_people + WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(37, 0)]), 0))) AS cohort_people WHERE ifNull(in(cohort_people.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople From d8c26edcfe9001841466d80ac31d68c2a7f1a187 Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Sat, 19 Oct 2024 13:17:41 +0100 Subject: [PATCH 04/11] Failing test to drive correction --- ...est_session_recording_list_from_filters.py | 118 ++++++++++++++++++ 1 file changed, 118 insertions(+) diff --git a/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py b/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py index eec4a40f74b75..0af85dfa1cb0c 100644 --- a/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py @@ -2171,6 +2171,124 @@ def test_filter_with_cohort_properties(self): assert len(session_recordings) == 1 assert session_recordings[0]["session_id"] == session_id_two + @snapshot_clickhouse_queries + @also_test_with_materialized_columns(person_properties=["$some_prop"]) + def test_filter_with_static_and_dynamic_cohort_properties(self): + with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): + with freeze_time("2021-08-21T20:00:00.000Z"): + user_one = "test_filter_with_cohort_properties-user-in-static-cohort" + user_two = "test_filter_with_cohort_properties-user2-in-dynamic-cohort" + user_three = "test_filter_with_cohort_properties-user3-in-both-cohort" + + session_id_one = f"test_filter_with_static_and_dynamic_cohort_properties-1-{str(uuid4())}" + session_id_two = f"test_filter_with_static_and_dynamic_cohort_properties-2-{str(uuid4())}" + session_id_three = f"test_filter_with_static_and_dynamic_cohort_properties-3-{str(uuid4())}" + + Person.objects.create(team=self.team, distinct_ids=[user_one], properties={"email": "in@static.cohort"}) + Person.objects.create( + team=self.team, + distinct_ids=[user_two], + properties={"email": "in@dynamic.cohort", "$some_prop": "some_val"}, + ) + Person.objects.create( + team=self.team, + distinct_ids=[user_three], + properties={"email": "in@both.cohorts", "$some_prop": "some_val"}, + ) + + dynamic_cohort = Cohort.objects.create( + team=self.team, + name="cohort1", + groups=[ + { + "properties": [ + { + "key": "$some_prop", + "value": "some_val", + "type": "person", + } + ] + } + ], + ) + dynamic_cohort.calculate_people_ch(pending_version=0) + + static_cohort = Cohort.objects.create(team=self.team, name="a static cohort", groups=[], is_static=True) + static_cohort.insert_users_by_list([user_one, user_three]) + + replay_summaries = [ + (user_one, session_id_one), + (user_two, session_id_two), + (user_three, session_id_three), + ] + for distinct_id, session_id in replay_summaries: + produce_replay_summary( + distinct_id=distinct_id, + session_id=session_id, + first_timestamp=self.an_hour_ago, + team_id=self.team.id, + ) + produce_replay_summary( + distinct_id=distinct_id, + session_id=session_id, + first_timestamp=self.an_hour_ago + relativedelta(seconds=30), + team_id=self.team.id, + ) + + (session_recordings, _, _) = self._filter_recordings_by( + { + "properties": [ + { + "key": "id", + "value": static_cohort.pk, + "operator": None, + "type": "precalculated-cohort", + }, + ] + } + ) + + assert len(session_recordings) == 2 + assert [x["session_id"] for x in session_recordings] == [session_id_one, session_id_three] + + (session_recordings, _, _) = self._filter_recordings_by( + { + "properties": [ + { + "key": "id", + "value": dynamic_cohort.pk, + "operator": None, + "type": "cohort", + }, + ] + } + ) + + assert len(session_recordings) == 1 + assert session_recordings[0]["session_id"] == session_id_two + + (session_recordings, _, _) = self._filter_recordings_by( + { + "properties": [ + { + "key": "id", + "value": dynamic_cohort.pk, + "operator": None, + "type": "cohort", + }, + { + "key": "id", + "value": static_cohort.pk, + "operator": None, + "type": "precalculated-cohort", + }, + ] + } + ) + + assert len(session_recordings) == 1 + assert session_recordings[0]["session_id"] == session_id_three + @snapshot_clickhouse_queries @also_test_with_materialized_columns(person_properties=["$some_prop"]) def test_filter_with_events_and_cohorts(self): From e25fb91544a619f90a1ebe9507e12c6d5f734de2 Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Sat, 19 Oct 2024 13:22:46 +0100 Subject: [PATCH 05/11] flapping snapshots --- posthog/test/base.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/posthog/test/base.py b/posthog/test/base.py index ba2f5ea2f460f..385da83ba11ab 100644 --- a/posthog/test/base.py +++ b/posthog/test/base.py @@ -576,6 +576,15 @@ def assertQueryMatchesSnapshot(self, query, params=None, replace_all_numbers=Fal r"_condition_X_level", query, ) + + # replace cohort tuples + # like (tuple(cohortpeople.cohort_id, cohortpeople.version), [(35, 0)]) + query = re.sub( + r"\(tuple\((.*)\.cohort_id, (.*)\.version\), \[\(\d+, \d+\)\]\)", + r"(tuple(\1.cohort_id, \2.version), [(2, 0)])", + query, + ) + #### Cohort replacements end # Replace organization_id and notebook_id lookups, for postgres From 9a8f42012d8fad4c58b539d64839a72f9aef01c2 Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Sat, 19 Oct 2024 15:29:32 +0100 Subject: [PATCH 06/11] wat --- .../session_recording_list_from_filters.py | 71 ++++++++++++++++--- ...est_session_recording_list_from_filters.py | 29 +++++--- 2 files changed, 81 insertions(+), 19 deletions(-) diff --git a/posthog/session_recordings/queries/session_recording_list_from_filters.py b/posthog/session_recordings/queries/session_recording_list_from_filters.py index 8f2f475fad372..107b5eecf8f1a 100644 --- a/posthog/session_recordings/queries/session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/session_recording_list_from_filters.py @@ -238,13 +238,27 @@ def _where_predicates(self) -> Union[ast.And, ast.Or]: ) ) - cohort_subquery = CohortPropertyGroupsSubQuery(self._team, self._filter, self.ttl_days).get_queries() - if cohort_subquery: + dynamic_cohort_subquery = DynamicCohortPropertyGroupsSubQuery( + self._team, self._filter, self.ttl_days + ).get_queries() + if dynamic_cohort_subquery: optional_exprs.append( ast.CompareOperation( op=ast.CompareOperationOp.In, left=ast.Field(chain=["s", "distinct_id"]), - right=cohort_subquery, + right=dynamic_cohort_subquery, + ) + ) + + static_cohort_subquery = StaticCohortPropertyGroupsSubQuery( + self._team, self._filter, self.ttl_days + ).get_queries() + if static_cohort_subquery: + optional_exprs.append( + ast.CompareOperation( + op=ast.CompareOperationOp.In, + left=ast.Field(chain=["s", "distinct_id"]), + right=static_cohort_subquery, ) ) @@ -360,7 +374,50 @@ def _where_predicates(self) -> ast.Expr: ) -class CohortPropertyGroupsSubQuery: +class StaticCohortPropertyGroupsSubQuery: + _team: Team + _filter: SessionRecordingsFilter + _ttl_days: int + + raw_cohort_to_distinct_id = """ + select distinct_id + from person_distinct_ids + where person_id IN ( + SELECT person_id + FROM static_cohort_people + WHERE + {where_predicates} + ) + """ + + def __init__(self, team: Team, filter: SessionRecordingsFilter, ttl_days: int): + self._team = team + self._filter = filter + self._ttl_days = ttl_days + + def get_queries(self) -> ast.SelectQuery | ast.SelectUnionQuery | None: + if self.cohort_properties: + return parse_select( + self.raw_cohort_to_distinct_id, + {"where_predicates": property_to_expr(self.cohort_properties, team=self._team, scope="replay")}, + ) + + return None + + @cached_property + def cohort_properties(self) -> PropertyGroup | None: + cohort_property_groups = [g for g in self._filter.property_groups.flat if is_static_cohort_property(g)] + return ( + PropertyGroup( + type=self._filter.property_operand, + values=cohort_property_groups, + ) + if cohort_property_groups + else None + ) + + +class DynamicCohortPropertyGroupsSubQuery: _team: Team _filter: SessionRecordingsFilter _ttl_days: int @@ -392,11 +449,7 @@ def get_queries(self) -> ast.SelectQuery | ast.SelectUnionQuery | None: @cached_property def cohort_properties(self) -> PropertyGroup | None: - cohort_property_groups = [ - g - for g in self._filter.property_groups.flat - if is_dynamic_cohort_property(g) or is_static_cohort_property(g) - ] + cohort_property_groups = [g for g in self._filter.property_groups.flat if is_dynamic_cohort_property(g)] return ( PropertyGroup( type=self._filter.property_operand, diff --git a/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py b/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py index 0af85dfa1cb0c..792ab6cd5c5dd 100644 --- a/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py @@ -2180,9 +2180,15 @@ def test_filter_with_static_and_dynamic_cohort_properties(self): user_two = "test_filter_with_cohort_properties-user2-in-dynamic-cohort" user_three = "test_filter_with_cohort_properties-user3-in-both-cohort" - session_id_one = f"test_filter_with_static_and_dynamic_cohort_properties-1-{str(uuid4())}" - session_id_two = f"test_filter_with_static_and_dynamic_cohort_properties-2-{str(uuid4())}" - session_id_three = f"test_filter_with_static_and_dynamic_cohort_properties-3-{str(uuid4())}" + session_id_one = ( + f"in-static-cohort-test_filter_with_static_and_dynamic_cohort_properties-1-{str(uuid4())}" + ) + session_id_two = ( + f"in-dynamic-cohort-test_filter_with_static_and_dynamic_cohort_properties-2-{str(uuid4())}" + ) + session_id_three = ( + f"in-both-cohort-test_filter_with_static_and_dynamic_cohort_properties-3-{str(uuid4())}" + ) Person.objects.create(team=self.team, distinct_ids=[user_one], properties={"email": "in@static.cohort"}) Person.objects.create( @@ -2211,11 +2217,13 @@ def test_filter_with_static_and_dynamic_cohort_properties(self): } ], ) - dynamic_cohort.calculate_people_ch(pending_version=0) static_cohort = Cohort.objects.create(team=self.team, name="a static cohort", groups=[], is_static=True) static_cohort.insert_users_by_list([user_one, user_three]) + dynamic_cohort.calculate_people_ch(pending_version=0) + static_cohort.calculate_people_ch(pending_version=0) + replay_summaries = [ (user_one, session_id_one), (user_two, session_id_two), @@ -2248,8 +2256,9 @@ def test_filter_with_static_and_dynamic_cohort_properties(self): } ) - assert len(session_recordings) == 2 - assert [x["session_id"] for x in session_recordings] == [session_id_one, session_id_three] + assert sorted([x["session_id"] for x in session_recordings]) == sorted( + [session_id_one, session_id_three] + ) (session_recordings, _, _) = self._filter_recordings_by( { @@ -2264,8 +2273,9 @@ def test_filter_with_static_and_dynamic_cohort_properties(self): } ) - assert len(session_recordings) == 1 - assert session_recordings[0]["session_id"] == session_id_two + assert sorted([x["session_id"] for x in session_recordings]) == sorted( + [session_id_two, session_id_three] + ) (session_recordings, _, _) = self._filter_recordings_by( { @@ -2286,8 +2296,7 @@ def test_filter_with_static_and_dynamic_cohort_properties(self): } ) - assert len(session_recordings) == 1 - assert session_recordings[0]["session_id"] == session_id_three + assert sorted([x["session_id"] for x in session_recordings]) == [session_id_three] @snapshot_clickhouse_queries @also_test_with_materialized_columns(person_properties=["$some_prop"]) From accaf4e98b45b93e8fcc0550704ddc3ec462f008 Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Sun, 20 Oct 2024 06:35:39 +0100 Subject: [PATCH 07/11] wat --- ...t_session_recording_list_from_filters.ambr | 208 ++++++++++++++++++ ...est_session_recording_list_from_filters.py | 207 +++++++++-------- 2 files changed, 306 insertions(+), 109 deletions(-) diff --git a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr index 39da8f56c4092..e67b7ad37f1ee 100644 --- a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr +++ b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr @@ -3428,6 +3428,214 @@ max_bytes_before_external_group_by=0 ''' # --- +# name: TestSessionRecordingsListFromFilters.test_filter_with_static_and_dynamic_cohort_properties + ''' + + SELECT count(DISTINCT person_id) + FROM person_static_cohort + WHERE team_id = 2 + AND cohort_id = 2 + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_filter_with_static_and_dynamic_cohort_properties.1 + ''' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = NULL + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_filter_with_static_and_dynamic_cohort_properties.2 + ''' + /* cohort_calculation: */ + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_filter_with_static_and_dynamic_cohort_properties.3 + ''' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = NULL + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_filter_with_static_and_dynamic_cohort_properties.4 + ''' + /* cohort_calculation: */ + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_filter_with_static_and_dynamic_cohort_properties.5 + ''' + SELECT s.session_id AS session_id, + any(s.team_id), + any(s.distinct_id), + min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, + max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, + dateDiff('SECOND', start_time, end_time) AS duration, + argMinMerge(s.first_url) AS first_url, + sum(s.click_count) AS click_count, + sum(s.keypress_count) AS keypress_count, + sum(s.mouse_activity_count) AS mouse_activity_count, + divide(sum(s.active_milliseconds), 1000) AS active_seconds, + minus(duration, active_seconds) AS inactive_seconds, + sum(s.console_log_count) AS console_log_count, + sum(s.console_warn_count) AS console_warn_count, + sum(s.console_error_count) AS console_error_count, + ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')), 0) AS ongoing + FROM session_replay_events AS s + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS s__pdi ON equals(s.distinct_id, s__pdi.distinct_id) + WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), ifNull(in(s__pdi.person_id, + (SELECT person_static_cohort.person_id AS person_id + FROM person_static_cohort + WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 2)))), 0)) + GROUP BY s.session_id + HAVING 1 + ORDER BY start_time DESC + LIMIT 51 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_filter_with_static_and_dynamic_cohort_properties.6 + ''' + SELECT s.session_id AS session_id, + any(s.team_id), + any(s.distinct_id), + min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, + max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, + dateDiff('SECOND', start_time, end_time) AS duration, + argMinMerge(s.first_url) AS first_url, + sum(s.click_count) AS click_count, + sum(s.keypress_count) AS keypress_count, + sum(s.mouse_activity_count) AS mouse_activity_count, + divide(sum(s.active_milliseconds), 1000) AS active_seconds, + minus(duration, active_seconds) AS inactive_seconds, + sum(s.console_log_count) AS console_log_count, + sum(s.console_warn_count) AS console_warn_count, + sum(s.console_error_count) AS console_error_count, + ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')), 0) AS ongoing + FROM session_replay_events AS s + WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), in(s.session_id, + (SELECT events.`$session_id` AS session_id + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 2) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + LEFT JOIN + (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) + WHERE and(equals(events.team_id, 2), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-08-13 23:58:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), ifNull(equals(events__person.`properties___$some_prop`, 'some_val'), 0)) + GROUP BY events.`$session_id` + HAVING 1))) + GROUP BY s.session_id + HAVING 1 + ORDER BY start_time DESC + LIMIT 51 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestSessionRecordingsListFromFilters.test_filter_with_static_and_dynamic_cohort_properties.7 + ''' + SELECT s.session_id AS session_id, + any(s.team_id), + any(s.distinct_id), + min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, + max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, + dateDiff('SECOND', start_time, end_time) AS duration, + argMinMerge(s.first_url) AS first_url, + sum(s.click_count) AS click_count, + sum(s.keypress_count) AS keypress_count, + sum(s.mouse_activity_count) AS mouse_activity_count, + divide(sum(s.active_milliseconds), 1000) AS active_seconds, + minus(duration, active_seconds) AS inactive_seconds, + sum(s.console_log_count) AS console_log_count, + sum(s.console_warn_count) AS console_warn_count, + sum(s.console_error_count) AS console_error_count, + ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')), 0) AS ongoing + FROM session_replay_events AS s + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS s__pdi ON equals(s.distinct_id, s__pdi.distinct_id) + WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), and(in(s.session_id, + (SELECT events.`$session_id` AS session_id + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 2) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + LEFT JOIN + (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) + WHERE and(equals(events.team_id, 2), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-08-13 23:58:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), ifNull(equals(events__person.`properties___$some_prop`, 'some_val'), 0)) + GROUP BY events.`$session_id` + HAVING 1)), ifNull(in(s__pdi.person_id, + (SELECT person_static_cohort.person_id AS person_id + FROM person_static_cohort + WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 2)))), 0))) + GROUP BY s.session_id + HAVING 1 + ORDER BY start_time DESC + LIMIT 51 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- # name: TestSessionRecordingsListFromFilters.test_multiple_event_filters ''' SELECT s.session_id AS session_id, diff --git a/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py b/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py index 792ab6cd5c5dd..ae56e53c662b4 100644 --- a/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py @@ -2174,129 +2174,118 @@ def test_filter_with_cohort_properties(self): @snapshot_clickhouse_queries @also_test_with_materialized_columns(person_properties=["$some_prop"]) def test_filter_with_static_and_dynamic_cohort_properties(self): - with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): - with freeze_time("2021-08-21T20:00:00.000Z"): - user_one = "test_filter_with_cohort_properties-user-in-static-cohort" - user_two = "test_filter_with_cohort_properties-user2-in-dynamic-cohort" - user_three = "test_filter_with_cohort_properties-user3-in-both-cohort" + with freeze_time("2021-08-21T20:00:00.000Z"): + user_one = "test_filter_with_cohort_properties-user-in-static-cohort" + user_two = "test_filter_with_cohort_properties-user2-in-dynamic-cohort" + user_three = "test_filter_with_cohort_properties-user3-in-both-cohort" - session_id_one = ( - f"in-static-cohort-test_filter_with_static_and_dynamic_cohort_properties-1-{str(uuid4())}" - ) - session_id_two = ( - f"in-dynamic-cohort-test_filter_with_static_and_dynamic_cohort_properties-2-{str(uuid4())}" - ) - session_id_three = ( - f"in-both-cohort-test_filter_with_static_and_dynamic_cohort_properties-3-{str(uuid4())}" - ) - - Person.objects.create(team=self.team, distinct_ids=[user_one], properties={"email": "in@static.cohort"}) - Person.objects.create( - team=self.team, - distinct_ids=[user_two], - properties={"email": "in@dynamic.cohort", "$some_prop": "some_val"}, - ) - Person.objects.create( - team=self.team, - distinct_ids=[user_three], - properties={"email": "in@both.cohorts", "$some_prop": "some_val"}, - ) + session_id_one = f"in-static-cohort-test_filter_with_static_and_dynamic_cohort_properties-1-{str(uuid4())}" + session_id_two = f"in-dynamic-cohort-test_filter_with_static_and_dynamic_cohort_properties-2-{str(uuid4())}" + session_id_three = f"in-both-cohort-test_filter_with_static_and_dynamic_cohort_properties-3-{str(uuid4())}" - dynamic_cohort = Cohort.objects.create( - team=self.team, - name="cohort1", - groups=[ - { - "properties": [ - { - "key": "$some_prop", - "value": "some_val", - "type": "person", - } - ] - } - ], - ) - - static_cohort = Cohort.objects.create(team=self.team, name="a static cohort", groups=[], is_static=True) - static_cohort.insert_users_by_list([user_one, user_three]) - - dynamic_cohort.calculate_people_ch(pending_version=0) - static_cohort.calculate_people_ch(pending_version=0) - - replay_summaries = [ - (user_one, session_id_one), - (user_two, session_id_two), - (user_three, session_id_three), - ] - for distinct_id, session_id in replay_summaries: - produce_replay_summary( - distinct_id=distinct_id, - session_id=session_id, - first_timestamp=self.an_hour_ago, - team_id=self.team.id, - ) - produce_replay_summary( - distinct_id=distinct_id, - session_id=session_id, - first_timestamp=self.an_hour_ago + relativedelta(seconds=30), - team_id=self.team.id, - ) + Person.objects.create(team=self.team, distinct_ids=[user_one], properties={"email": "in@static.cohort"}) + Person.objects.create( + team=self.team, + distinct_ids=[user_two], + properties={"email": "in@dynamic.cohort", "$some_prop": "some_val"}, + ) + Person.objects.create( + team=self.team, + distinct_ids=[user_three], + properties={"email": "in@both.cohorts", "$some_prop": "some_val"}, + ) - (session_recordings, _, _) = self._filter_recordings_by( + dynamic_cohort = Cohort.objects.create( + team=self.team, + name="cohort1", + groups=[ { "properties": [ { - "key": "id", - "value": static_cohort.pk, - "operator": None, - "type": "precalculated-cohort", - }, + "key": "$some_prop", + "value": "some_val", + "type": "person", + } ] } - ) + ], + ) - assert sorted([x["session_id"] for x in session_recordings]) == sorted( - [session_id_one, session_id_three] - ) + static_cohort = Cohort.objects.create(team=self.team, name="a static cohort", groups=[], is_static=True) + static_cohort.insert_users_by_list([user_one, user_three]) - (session_recordings, _, _) = self._filter_recordings_by( - { - "properties": [ - { - "key": "id", - "value": dynamic_cohort.pk, - "operator": None, - "type": "cohort", - }, - ] - } - ) + dynamic_cohort.calculate_people_ch(pending_version=0) + static_cohort.calculate_people_ch(pending_version=0) - assert sorted([x["session_id"] for x in session_recordings]) == sorted( - [session_id_two, session_id_three] + replay_summaries = [ + (user_one, session_id_one), + (user_two, session_id_two), + (user_three, session_id_three), + ] + for distinct_id, session_id in replay_summaries: + produce_replay_summary( + distinct_id=distinct_id, + session_id=session_id, + first_timestamp=self.an_hour_ago, + team_id=self.team.id, ) - - (session_recordings, _, _) = self._filter_recordings_by( - { - "properties": [ - { - "key": "id", - "value": dynamic_cohort.pk, - "operator": None, - "type": "cohort", - }, - { - "key": "id", - "value": static_cohort.pk, - "operator": None, - "type": "precalculated-cohort", - }, - ] - } + produce_replay_summary( + distinct_id=distinct_id, + session_id=session_id, + first_timestamp=self.an_hour_ago + relativedelta(seconds=30), + team_id=self.team.id, ) - assert sorted([x["session_id"] for x in session_recordings]) == [session_id_three] + (session_recordings, _, _) = self._filter_recordings_by( + { + "properties": [ + { + "key": "id", + "value": static_cohort.pk, + "operator": "in", + "type": "cohort", + }, + ] + } + ) + + assert sorted([x["session_id"] for x in session_recordings]) == sorted([session_id_one, session_id_three]) + + (session_recordings, _, _) = self._filter_recordings_by( + { + "properties": [ + { + "key": "id", + "value": dynamic_cohort.pk, + "operator": "in", + "type": "cohort", + }, + ] + } + ) + + assert sorted([x["session_id"] for x in session_recordings]) == sorted([session_id_two, session_id_three]) + + (session_recordings, _, _) = self._filter_recordings_by( + { + "properties": [ + { + "key": "id", + "value": dynamic_cohort.pk, + "operator": "in", + "type": "cohort", + }, + { + "key": "id", + "value": static_cohort.pk, + "operator": "in", + "type": "cohort", + }, + ] + } + ) + + assert sorted([x["session_id"] for x in session_recordings]) == [session_id_three] @snapshot_clickhouse_queries @also_test_with_materialized_columns(person_properties=["$some_prop"]) From d92924ad82c3ecb1e02c7b52d27735bbc8f88964 Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Sun, 20 Oct 2024 07:21:27 +0100 Subject: [PATCH 08/11] hogql has special syntax for cohorts --- .../session_recording_list_from_filters.py | 80 +++---------------- ...t_session_recording_list_from_filters.ambr | 36 +++------ ...est_session_recording_list_from_filters.py | 12 ++- 3 files changed, 25 insertions(+), 103 deletions(-) diff --git a/posthog/session_recordings/queries/session_recording_list_from_filters.py b/posthog/session_recordings/queries/session_recording_list_from_filters.py index 107b5eecf8f1a..75bb036d5872f 100644 --- a/posthog/session_recordings/queries/session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/session_recording_list_from_filters.py @@ -43,6 +43,10 @@ def is_static_cohort_property(p: Property) -> bool: return p.type == "precalculated-cohort" +def is_cohort_property(p: Property) -> bool: + return is_dynamic_cohort_property(p) or is_static_cohort_property(p) + + class SessionRecordingQueryResult(NamedTuple): results: list has_more_recording: bool @@ -238,27 +242,13 @@ def _where_predicates(self) -> Union[ast.And, ast.Or]: ) ) - dynamic_cohort_subquery = DynamicCohortPropertyGroupsSubQuery( - self._team, self._filter, self.ttl_days - ).get_queries() - if dynamic_cohort_subquery: + cohort_subquery = CohortPropertyGroupsSubQuery(self._team, self._filter, self.ttl_days).get_queries() + if cohort_subquery: optional_exprs.append( ast.CompareOperation( op=ast.CompareOperationOp.In, left=ast.Field(chain=["s", "distinct_id"]), - right=dynamic_cohort_subquery, - ) - ) - - static_cohort_subquery = StaticCohortPropertyGroupsSubQuery( - self._team, self._filter, self.ttl_days - ).get_queries() - if static_cohort_subquery: - optional_exprs.append( - ast.CompareOperation( - op=ast.CompareOperationOp.In, - left=ast.Field(chain=["s", "distinct_id"]), - right=static_cohort_subquery, + right=cohort_subquery, ) ) @@ -374,50 +364,7 @@ def _where_predicates(self) -> ast.Expr: ) -class StaticCohortPropertyGroupsSubQuery: - _team: Team - _filter: SessionRecordingsFilter - _ttl_days: int - - raw_cohort_to_distinct_id = """ - select distinct_id - from person_distinct_ids - where person_id IN ( - SELECT person_id - FROM static_cohort_people - WHERE - {where_predicates} - ) - """ - - def __init__(self, team: Team, filter: SessionRecordingsFilter, ttl_days: int): - self._team = team - self._filter = filter - self._ttl_days = ttl_days - - def get_queries(self) -> ast.SelectQuery | ast.SelectUnionQuery | None: - if self.cohort_properties: - return parse_select( - self.raw_cohort_to_distinct_id, - {"where_predicates": property_to_expr(self.cohort_properties, team=self._team, scope="replay")}, - ) - - return None - - @cached_property - def cohort_properties(self) -> PropertyGroup | None: - cohort_property_groups = [g for g in self._filter.property_groups.flat if is_static_cohort_property(g)] - return ( - PropertyGroup( - type=self._filter.property_operand, - values=cohort_property_groups, - ) - if cohort_property_groups - else None - ) - - -class DynamicCohortPropertyGroupsSubQuery: +class CohortPropertyGroupsSubQuery: _team: Team _filter: SessionRecordingsFilter _ttl_days: int @@ -425,12 +372,7 @@ class DynamicCohortPropertyGroupsSubQuery: raw_cohort_to_distinct_id = """ select distinct_id from person_distinct_ids - where person_id IN ( - SELECT person_id - FROM cohort_people - WHERE - {where_predicates} - ) + where {cohort_predicate} """ def __init__(self, team: Team, filter: SessionRecordingsFilter, ttl_days: int): @@ -442,14 +384,14 @@ def get_queries(self) -> ast.SelectQuery | ast.SelectUnionQuery | None: if self.cohort_properties: return parse_select( self.raw_cohort_to_distinct_id, - {"where_predicates": property_to_expr(self.cohort_properties, team=self._team, scope="replay")}, + {"cohort_predicate": property_to_expr(self.cohort_properties, team=self._team, scope="replay")}, ) return None @cached_property def cohort_properties(self) -> PropertyGroup | None: - cohort_property_groups = [g for g in self._filter.property_groups.flat if is_dynamic_cohort_property(g)] + cohort_property_groups = [g for g in self._filter.property_groups.flat if is_cohort_property(g)] return ( PropertyGroup( type=self._filter.property_operand, diff --git a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr index e67b7ad37f1ee..a888184e60dfc 100644 --- a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr +++ b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr @@ -3170,15 +3170,9 @@ GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids WHERE ifNull(in(person_distinct_ids.person_id, - (SELECT cohort_people.person_id AS person_id - FROM - (SELECT DISTINCT cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(35, 0)]), 0))) AS cohort_people - WHERE ifNull(in(cohort_people.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0))), 0)))) + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -3245,15 +3239,9 @@ GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids WHERE ifNull(in(person_distinct_ids.person_id, - (SELECT cohort_people.person_id AS person_id - FROM - (SELECT DISTINCT cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(37, 0)]), 0))) AS cohort_people - WHERE ifNull(in(cohort_people.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0))), 0))))) + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -3300,15 +3288,9 @@ GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids WHERE ifNull(in(person_distinct_ids.person_id, - (SELECT cohort_people.person_id AS person_id - FROM - (SELECT DISTINCT cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), ifNull(in(tuple(cohortpeople.cohort_id, cohortpeople.version), [(37, 0)]), 0))) AS cohort_people - WHERE ifNull(in(cohort_people.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0))), 0))))) + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC diff --git a/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py b/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py index ae56e53c662b4..178f6684ac352 100644 --- a/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py @@ -2161,15 +2161,14 @@ def test_filter_with_cohort_properties(self): { "key": "id", "value": cohort.pk, - "operator": None, + "operator": "in", "type": "cohort", } ] } ) - assert len(session_recordings) == 1 - assert session_recordings[0]["session_id"] == session_id_two + assert [x["session_id"] for x in session_recordings] == [session_id_two] @snapshot_clickhouse_queries @also_test_with_materialized_columns(person_properties=["$some_prop"]) @@ -2367,7 +2366,7 @@ def test_filter_with_events_and_cohorts(self): { "key": "id", "value": cohort.pk, - "operator": None, + "operator": "in", "type": "cohort", } ], @@ -2390,7 +2389,7 @@ def test_filter_with_events_and_cohorts(self): { "key": "id", "value": cohort.pk, - "operator": None, + "operator": "in", "type": "cohort", } ], @@ -2405,8 +2404,7 @@ def test_filter_with_events_and_cohorts(self): } ) - assert len(session_recordings) == 1 - assert session_recordings[0]["session_id"] == session_id_two + assert [x["session_id"] for x in session_recordings] == [session_id_two] @snapshot_clickhouse_queries @also_test_with_materialized_columns(["$current_url"]) From a1ab553947dc2842f50df1d96e4dfec8be1c9355 Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Sun, 20 Oct 2024 07:51:36 +0100 Subject: [PATCH 09/11] fiddling --- .../session_recording_list_from_filters.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/posthog/session_recordings/queries/session_recording_list_from_filters.py b/posthog/session_recordings/queries/session_recording_list_from_filters.py index 75bb036d5872f..d5a5ed94b849b 100644 --- a/posthog/session_recordings/queries/session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/session_recording_list_from_filters.py @@ -35,16 +35,8 @@ def is_group_property(p: Property) -> bool: return p.type == "group" -def is_dynamic_cohort_property(p: Property) -> bool: - return p.type == "cohort" - - -def is_static_cohort_property(p: Property) -> bool: - return p.type == "precalculated-cohort" - - def is_cohort_property(p: Property) -> bool: - return is_dynamic_cohort_property(p) or is_static_cohort_property(p) + return "cohort" in p.type class SessionRecordingQueryResult(NamedTuple): @@ -54,7 +46,9 @@ class SessionRecordingQueryResult(NamedTuple): class UnexpectedQueryProperties(Exception): - pass + def __init__(self, remaining_properties: PropertyGroup | None): + self.remaining_properties = remaining_properties + super().__init__(f"Unexpected properties in query: {remaining_properties}") class SessionRecordingListFromFilters: @@ -300,8 +294,7 @@ def _strip_person_and_event_and_cohort_properties(self, property_group: Property if not is_event_property(g) and not is_person_property(g) and not is_group_property(g) - and not is_dynamic_cohort_property(g) - and not is_static_cohort_property(g) + and not is_cohort_property(g) ] return ( From f1232e9095c87e8cf48aa767d81f3579fc352b17 Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Sun, 20 Oct 2024 07:53:14 +0100 Subject: [PATCH 10/11] Rename --- .../queries/session_recording_list_from_filters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/posthog/session_recordings/queries/session_recording_list_from_filters.py b/posthog/session_recordings/queries/session_recording_list_from_filters.py index d5a5ed94b849b..38e06c4e4837d 100644 --- a/posthog/session_recordings/queries/session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/session_recording_list_from_filters.py @@ -236,7 +236,7 @@ def _where_predicates(self) -> Union[ast.And, ast.Or]: ) ) - cohort_subquery = CohortPropertyGroupsSubQuery(self._team, self._filter, self.ttl_days).get_queries() + cohort_subquery = CohortPropertyGroupsSubQuery(self._team, self._filter, self.ttl_days).get_query() if cohort_subquery: optional_exprs.append( ast.CompareOperation( @@ -373,7 +373,7 @@ def __init__(self, team: Team, filter: SessionRecordingsFilter, ttl_days: int): self._filter = filter self._ttl_days = ttl_days - def get_queries(self) -> ast.SelectQuery | ast.SelectUnionQuery | None: + def get_query(self) -> ast.SelectQuery | ast.SelectUnionQuery | None: if self.cohort_properties: return parse_select( self.raw_cohort_to_distinct_id, From 4d8e366a82e347a09bc4873ce27d7f49562ee443 Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Sun, 20 Oct 2024 08:02:46 +0100 Subject: [PATCH 11/11] fiddling --- ...t_session_recording_list_from_filters.ambr | 102 ++++----- ...est_session_recording_list_from_filters.py | 207 +++++++++--------- 2 files changed, 148 insertions(+), 161 deletions(-) diff --git a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr index a888184e60dfc..a0fc5699f9ee7 100644 --- a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr +++ b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr @@ -3478,17 +3478,18 @@ sum(s.console_error_count) AS console_error_count, ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')), 0) AS ongoing FROM session_replay_events AS s - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS s__pdi ON equals(s.distinct_id, s__pdi.distinct_id) - WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), ifNull(in(s__pdi.person_id, - (SELECT person_static_cohort.person_id AS person_id - FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 2)))), 0)) + WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), in(s.distinct_id, + (SELECT person_distinct_ids.distinct_id AS distinct_id + FROM + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids + WHERE ifNull(in(person_distinct_ids.person_id, + (SELECT person_static_cohort.person_id AS person_id + FROM person_static_cohort + WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 2)))), 0)))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -3521,27 +3522,18 @@ sum(s.console_error_count) AS console_error_count, ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')), 0) AS ongoing FROM session_replay_events AS s - WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), in(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - LEFT JOIN - (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) - WHERE and(equals(events.team_id, 2), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-08-13 23:58:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), ifNull(equals(events__person.`properties___$some_prop`, 'some_val'), 0)) - GROUP BY events.`$session_id` - HAVING 1))) + WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), in(s.distinct_id, + (SELECT person_distinct_ids.distinct_id AS distinct_id + FROM + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids + WHERE ifNull(in(person_distinct_ids.person_id, + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0)))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -3574,37 +3566,21 @@ sum(s.console_error_count) AS console_error_count, ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-08-21 19:55:00.000000', 6, 'UTC')), 0) AS ongoing FROM session_replay_events AS s - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS s__pdi ON equals(s.distinct_id, s__pdi.distinct_id) - WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), and(in(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - LEFT JOIN - (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) - WHERE and(equals(events.team_id, 2), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-08-13 23:58:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), ifNull(equals(events__person.`properties___$some_prop`, 'some_val'), 0)) - GROUP BY events.`$session_id` - HAVING 1)), ifNull(in(s__pdi.person_id, - (SELECT person_static_cohort.person_id AS person_id - FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 2)))), 0))) + WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-07-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-08-21 20:00:00.000000', 6, 'UTC')), 0), in(s.distinct_id, + (SELECT person_distinct_ids.distinct_id AS distinct_id + FROM + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids + WHERE and(ifNull(in(person_distinct_ids.person_id, + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0)))), 0), ifNull(in(person_distinct_ids.person_id, + (SELECT person_static_cohort.person_id AS person_id + FROM person_static_cohort + WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 2)))), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC diff --git a/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py b/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py index 178f6684ac352..e18cb0941d27a 100644 --- a/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/test/test_session_recording_list_from_filters.py @@ -2173,118 +2173,129 @@ def test_filter_with_cohort_properties(self): @snapshot_clickhouse_queries @also_test_with_materialized_columns(person_properties=["$some_prop"]) def test_filter_with_static_and_dynamic_cohort_properties(self): - with freeze_time("2021-08-21T20:00:00.000Z"): - user_one = "test_filter_with_cohort_properties-user-in-static-cohort" - user_two = "test_filter_with_cohort_properties-user2-in-dynamic-cohort" - user_three = "test_filter_with_cohort_properties-user3-in-both-cohort" + with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): + with freeze_time("2021-08-21T20:00:00.000Z"): + user_one = "test_filter_with_cohort_properties-user-in-static-cohort" + user_two = "test_filter_with_cohort_properties-user2-in-dynamic-cohort" + user_three = "test_filter_with_cohort_properties-user3-in-both-cohort" - session_id_one = f"in-static-cohort-test_filter_with_static_and_dynamic_cohort_properties-1-{str(uuid4())}" - session_id_two = f"in-dynamic-cohort-test_filter_with_static_and_dynamic_cohort_properties-2-{str(uuid4())}" - session_id_three = f"in-both-cohort-test_filter_with_static_and_dynamic_cohort_properties-3-{str(uuid4())}" + session_id_one = ( + f"in-static-cohort-test_filter_with_static_and_dynamic_cohort_properties-1-{str(uuid4())}" + ) + session_id_two = ( + f"in-dynamic-cohort-test_filter_with_static_and_dynamic_cohort_properties-2-{str(uuid4())}" + ) + session_id_three = ( + f"in-both-cohort-test_filter_with_static_and_dynamic_cohort_properties-3-{str(uuid4())}" + ) - Person.objects.create(team=self.team, distinct_ids=[user_one], properties={"email": "in@static.cohort"}) - Person.objects.create( - team=self.team, - distinct_ids=[user_two], - properties={"email": "in@dynamic.cohort", "$some_prop": "some_val"}, - ) - Person.objects.create( - team=self.team, - distinct_ids=[user_three], - properties={"email": "in@both.cohorts", "$some_prop": "some_val"}, - ) + Person.objects.create(team=self.team, distinct_ids=[user_one], properties={"email": "in@static.cohort"}) + Person.objects.create( + team=self.team, + distinct_ids=[user_two], + properties={"email": "in@dynamic.cohort", "$some_prop": "some_val"}, + ) + Person.objects.create( + team=self.team, + distinct_ids=[user_three], + properties={"email": "in@both.cohorts", "$some_prop": "some_val"}, + ) - dynamic_cohort = Cohort.objects.create( - team=self.team, - name="cohort1", - groups=[ + dynamic_cohort = Cohort.objects.create( + team=self.team, + name="cohort1", + groups=[ + { + "properties": [ + { + "key": "$some_prop", + "value": "some_val", + "type": "person", + } + ] + } + ], + ) + + static_cohort = Cohort.objects.create(team=self.team, name="a static cohort", groups=[], is_static=True) + static_cohort.insert_users_by_list([user_one, user_three]) + + dynamic_cohort.calculate_people_ch(pending_version=0) + static_cohort.calculate_people_ch(pending_version=0) + + replay_summaries = [ + (user_one, session_id_one), + (user_two, session_id_two), + (user_three, session_id_three), + ] + for distinct_id, session_id in replay_summaries: + produce_replay_summary( + distinct_id=distinct_id, + session_id=session_id, + first_timestamp=self.an_hour_ago, + team_id=self.team.id, + ) + produce_replay_summary( + distinct_id=distinct_id, + session_id=session_id, + first_timestamp=self.an_hour_ago + relativedelta(seconds=30), + team_id=self.team.id, + ) + + (session_recordings, _, _) = self._filter_recordings_by( { "properties": [ { - "key": "$some_prop", - "value": "some_val", - "type": "person", - } + "key": "id", + "value": static_cohort.pk, + "operator": "in", + "type": "cohort", + }, ] } - ], - ) - - static_cohort = Cohort.objects.create(team=self.team, name="a static cohort", groups=[], is_static=True) - static_cohort.insert_users_by_list([user_one, user_three]) - - dynamic_cohort.calculate_people_ch(pending_version=0) - static_cohort.calculate_people_ch(pending_version=0) - - replay_summaries = [ - (user_one, session_id_one), - (user_two, session_id_two), - (user_three, session_id_three), - ] - for distinct_id, session_id in replay_summaries: - produce_replay_summary( - distinct_id=distinct_id, - session_id=session_id, - first_timestamp=self.an_hour_ago, - team_id=self.team.id, - ) - produce_replay_summary( - distinct_id=distinct_id, - session_id=session_id, - first_timestamp=self.an_hour_ago + relativedelta(seconds=30), - team_id=self.team.id, ) - (session_recordings, _, _) = self._filter_recordings_by( - { - "properties": [ - { - "key": "id", - "value": static_cohort.pk, - "operator": "in", - "type": "cohort", - }, - ] - } - ) - - assert sorted([x["session_id"] for x in session_recordings]) == sorted([session_id_one, session_id_three]) + assert sorted([x["session_id"] for x in session_recordings]) == sorted( + [session_id_one, session_id_three] + ) - (session_recordings, _, _) = self._filter_recordings_by( - { - "properties": [ - { - "key": "id", - "value": dynamic_cohort.pk, - "operator": "in", - "type": "cohort", - }, - ] - } - ) + (session_recordings, _, _) = self._filter_recordings_by( + { + "properties": [ + { + "key": "id", + "value": dynamic_cohort.pk, + "operator": "in", + "type": "cohort", + }, + ] + } + ) - assert sorted([x["session_id"] for x in session_recordings]) == sorted([session_id_two, session_id_three]) + assert sorted([x["session_id"] for x in session_recordings]) == sorted( + [session_id_two, session_id_three] + ) - (session_recordings, _, _) = self._filter_recordings_by( - { - "properties": [ - { - "key": "id", - "value": dynamic_cohort.pk, - "operator": "in", - "type": "cohort", - }, - { - "key": "id", - "value": static_cohort.pk, - "operator": "in", - "type": "cohort", - }, - ] - } - ) + (session_recordings, _, _) = self._filter_recordings_by( + { + "properties": [ + { + "key": "id", + "value": dynamic_cohort.pk, + "operator": "in", + "type": "cohort", + }, + { + "key": "id", + "value": static_cohort.pk, + "operator": "in", + "type": "cohort", + }, + ] + } + ) - assert sorted([x["session_id"] for x in session_recordings]) == [session_id_three] + assert sorted([x["session_id"] for x in session_recordings]) == [session_id_three] @snapshot_clickhouse_queries @also_test_with_materialized_columns(person_properties=["$some_prop"])