diff --git a/posthog/hogql/database/schema/util/test/test_session_where_clause_extractor.py b/posthog/hogql/database/schema/util/test/test_session_where_clause_extractor.py index 1e76ab63fd102..8712559090f21 100644 --- a/posthog/hogql/database/schema/util/test/test_session_where_clause_extractor.py +++ b/posthog/hogql/database/schema/util/test/test_session_where_clause_extractor.py @@ -465,16 +465,6 @@ def test_session_breakdown(self): multiIf(and(ifNull(greaterOrEquals(e__session.`$session_duration`, 2.0), 0), ifNull(less(e__session.`$session_duration`, 4.5), 0)), %(hogql_val_10)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 4.5), 0), ifNull(less(e__session.`$session_duration`, 27.0), 0)), %(hogql_val_11)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 27.0), 0), ifNull(less(e__session.`$session_duration`, 44.0), 0)), %(hogql_val_12)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 44.0), 0), ifNull(less(e__session.`$session_duration`, 48.0), 0)), %(hogql_val_13)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 48.0), 0), ifNull(less(e__session.`$session_duration`, 57.5), 0)), %(hogql_val_14)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 57.5), 0), ifNull(less(e__session.`$session_duration`, 61.0), 0)), %(hogql_val_15)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 61.0), 0), ifNull(less(e__session.`$session_duration`, 74.0), 0)), %(hogql_val_16)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 74.0), 0), ifNull(less(e__session.`$session_duration`, 90.0), 0)), %(hogql_val_17)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 90.0), 0), ifNull(less(e__session.`$session_duration`, 98.5), 0)), %(hogql_val_18)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 98.5), 0), ifNull(less(e__session.`$session_duration`, 167.01), 0)), %(hogql_val_19)s, %(hogql_val_20)s) AS breakdown_value FROM events AS e SAMPLE 1 - LEFT JOIN (SELECT - dateDiff(%(hogql_val_0)s, min(toTimeZone(sessions.min_timestamp, %(hogql_val_1)s)), max(toTimeZone(sessions.max_timestamp, %(hogql_val_2)s))) AS `$session_duration`, - sessions.session_id AS session_id - FROM - sessions - WHERE - and(equals(sessions.team_id, {self.team.id}), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, %(hogql_val_3)s), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_4)s, 6, %(hogql_val_5)s)))), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, %(hogql_val_6)s), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_7)s, 6, %(hogql_val_8)s))), 0)) - GROUP BY - sessions.session_id, - sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id @@ -487,6 +477,16 @@ def test_session_breakdown(self): HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN (SELECT + dateDiff(%(hogql_val_0)s, min(toTimeZone(sessions.min_timestamp, %(hogql_val_1)s)), max(toTimeZone(sessions.max_timestamp, %(hogql_val_2)s))) AS `$session_duration`, + sessions.session_id AS session_id + FROM + sessions + WHERE + and(equals(sessions.team_id, {self.team.id}), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, %(hogql_val_3)s), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_4)s, 6, %(hogql_val_5)s)))), 0), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, %(hogql_val_6)s), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_7)s, 6, %(hogql_val_8)s))), 0)) + GROUP BY + sessions.session_id, + sessions.session_id) AS e__session ON equals(e.`$session_id`, e__session.session_id) WHERE and(equals(e.team_id, {self.team.id}), and(greaterOrEquals(toTimeZone(e.timestamp, %(hogql_val_21)s), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_22)s, 6, %(hogql_val_23)s)))), lessOrEquals(toTimeZone(e.timestamp, %(hogql_val_24)s), assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_25)s, 6, %(hogql_val_26)s))), equals(e.event, %(hogql_val_27)s), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id diff --git a/posthog/hogql/transforms/lazy_tables.py b/posthog/hogql/transforms/lazy_tables.py index 78aed48862ad2..affb991d37316 100644 --- a/posthog/hogql/transforms/lazy_tables.py +++ b/posthog/hogql/transforms/lazy_tables.py @@ -41,17 +41,6 @@ def visit_field(self, node: ast.Field): node.chain = [constraint.table_name, constraint.alias] -class FieldFinder(TraversingVisitor): - field_chains: list[list[str | int]] = [] - - def __init__(self) -> None: - super().__init__() - self.field_chains = [] - - def visit_field(self, node: ast.Field): - self.field_chains.append(node.chain) - - class LazyFinder(TraversingVisitor): found_lazy: bool = False max_type_visits: int = 1 @@ -348,9 +337,6 @@ def create_override(table_name: str, field_chain: list[str | int]) -> None: break join_ptr = join_ptr.next_join - # Store all the constraint fields we've seen for each join to decide where in the order of joins the next join should be added - field_chain_finder = FieldFinder() - # For all the collected joins, create the join subqueries, and add them to the table. for to_table, join_scope in joins_to_add.items(): join_to_add: ast.JoinExpr = join_scope.lazy_join.join_function( @@ -380,31 +366,14 @@ def create_override(table_name: str, field_chain: list[str | int]) -> None: if join_to_add.type is not None: select_type.tables[to_table] = join_to_add.type - field_chain_finder.visit(join_to_add.constraint) - - select_from_alias: str | int | None = None - if node.select_from and node.select_from.alias: - select_from_alias = node.select_from.alias - else: - if node.select_from and node.select_from.table and isinstance(node.select_from.table, ast.Field): - select_from_alias = node.select_from.table.chain[0] - - constraint_tables = [x[0] for x in field_chain_finder.field_chains if x[0] != select_from_alias] - join_ptr = node.select_from added = False while join_ptr: if join_scope.from_table == join_ptr.alias or ( isinstance(join_ptr.table, ast.Field) and join_scope.from_table == join_ptr.table.chain[0] ): - # If the `join_to_add` is reliant on the existing `next_join`, then just append after instead of before - if join_ptr.next_join and join_ptr.next_join.alias in constraint_tables: - if join_ptr.next_join.next_join: - join_to_add.next_join = join_ptr.next_join.next_join - join_ptr.next_join.next_join = join_to_add - else: - join_to_add.next_join = join_ptr.next_join - join_ptr.next_join = join_to_add + join_to_add.next_join = join_ptr.next_join + join_ptr.next_join = join_to_add added = True break if join_ptr.next_join: diff --git a/posthog/hogql/transforms/test/__snapshots__/test_lazy_tables.ambr b/posthog/hogql/transforms/test/__snapshots__/test_lazy_tables.ambr index e7635b7c529f1..f21100141939e 100644 --- a/posthog/hogql/transforms/test/__snapshots__/test_lazy_tables.ambr +++ b/posthog/hogql/transforms/test/__snapshots__/test_lazy_tables.ambr @@ -93,19 +93,19 @@ ''' SELECT if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id) AS person_id, events__person.properties AS properties - FROM events LEFT OUTER JOIN ( - SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 420) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) - SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) LEFT JOIN ( + FROM events LEFT JOIN ( SELECT argMax(person.properties, person.version) AS properties, person.id AS id FROM person WHERE equals(person.team_id, 420) GROUP BY person.id HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, %(hogql_val_0)s), person.version), plus(now64(6, %(hogql_val_1)s), toIntervalDay(1))), 0)) - SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) + SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) LEFT OUTER JOIN ( + SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 420) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) WHERE equals(events.team_id, 420) LIMIT 50000 ''' diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index 6f72f195eedac..1dbe7a3fd269f 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -1687,6 +1687,13 @@ prop_basic AS prop, argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) LEFT JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, groups.group_type_index AS index, @@ -1695,13 +1702,6 @@ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict.ambr index 829311d65ddb5..7e7d0e0586b32 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict.ambr @@ -612,6 +612,13 @@ prop_basic AS prop, argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) LEFT JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, groups.group_type_index AS index, @@ -620,13 +627,6 @@ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered.ambr index b6b1455c1b42e..47d77c0817e46 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered.ambr @@ -1043,6 +1043,13 @@ prop_basic AS prop, argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) LEFT JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, groups.group_type_index AS index, @@ -1051,13 +1058,6 @@ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))) WHERE ifNull(equals(step_0, 1), 0) UNION ALL SELECT aggregation_target AS aggregation_target, @@ -1114,6 +1114,13 @@ prop_basic AS prop, argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) LEFT JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, groups.group_type_index AS index, @@ -1122,13 +1129,6 @@ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))) WHERE ifNull(equals(step_0, 1), 0) UNION ALL SELECT aggregation_target AS aggregation_target, @@ -1185,6 +1185,13 @@ prop_basic AS prop, argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) LEFT JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, groups.group_type_index AS index, @@ -1193,13 +1200,6 @@ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, diff --git a/posthog/hogql_queries/insights/test/__snapshots__/test_paths_query_runner_ee.ambr b/posthog/hogql_queries/insights/test/__snapshots__/test_paths_query_runner_ee.ambr index d876863ee6417..d4975a5c677b7 100644 --- a/posthog/hogql_queries/insights/test/__snapshots__/test_paths_query_runner_ee.ambr +++ b/posthog/hogql_queries/insights/test/__snapshots__/test_paths_query_runner_ee.ambr @@ -958,6 +958,13 @@ multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) LEFT JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, groups.group_type_index AS index, @@ -966,13 +973,6 @@ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, groups.group_key) AS events__group_0 ON equals(events.`$group_0`, events__group_0.key) - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) WHERE and(equals(events.team_id, 2), ifNull(equals(events__group_0.properties___industry, 'finance'), 0), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-02-01 23:59:59', 6, 'UTC')))), or(equals(events.event, '$pageview'), equals(events.event, '$screen'), not(startsWith(events.event, '$')))) ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) GROUP BY person_id) ARRAY diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index 83065a0d6f1a3..69d6e856f5540 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -804,6 +804,13 @@ toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 2) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) LEFT JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, groups.group_type_index AS index, @@ -812,13 +819,6 @@ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)) GROUP BY day_start, breakdown_value) @@ -1190,21 +1190,21 @@ toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'name'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___name, groups.group_type_index AS index, groups.group_key AS key FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 2), 0)) GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + groups.group_key) AS e__group_2 ON equals(e.`$group_2`, e__group_2.key) LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'name'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___name, + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, groups.group_type_index AS index, groups.group_key AS key FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 2), 0)) + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, - groups.group_key) AS e__group_2 ON equals(e.`$group_2`, e__group_2.key) + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__group_0.properties___industry, 'finance'), 0), ifNull(equals(e__group_2.properties___name, 'six'), 0))) GROUP BY day_start) GROUP BY day_start @@ -1237,21 +1237,21 @@ e.`$window_id` AS `$window_id` FROM events AS e LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'name'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___name, groups.group_type_index AS index, groups.group_key AS key FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 2), 0)) GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + groups.group_key) AS e__group_2 ON equals(e.`$group_2`, e__group_2.key) LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'name'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___name, + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, groups.group_type_index AS index, groups.group_key AS key FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 2), 0)) + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) GROUP BY groups.group_type_index, - groups.group_key) AS e__group_2 ON equals(e.`$group_2`, e__group_2.key) + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) WHERE and(equals(e.team_id, 2), and(ifNull(equals(e__group_0.properties___industry, 'finance'), 0), ifNull(equals(e__group_2.properties___name, 'six'), 0)), ifNull(greaterOrEquals(timestamp, toDateTime64('2020-01-02 00:00:00.000000', 6, 'UTC')), 0), ifNull(less(timestamp, toDateTime64('2020-01-03 00:00:00.000000', 6, 'UTC')), 0), equals(e.event, 'sign up'))) GROUP BY actor_id) AS source INNER JOIN @@ -1265,16 +1265,16 @@ FROM (SELECT e.person_id AS actor_id, toTimeZone(e.timestamp, 'UTC') AS timestamp, e.uuid AS uuid, e.`$session_id` AS `$session_id`, e.`$window_id` AS `$window_id` FROM events AS e - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, groups.group_type_index AS index, groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) - GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) LEFT JOIN (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'name'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___name, groups.group_type_index AS index, groups.group_key AS key FROM groups WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 2), 0)) GROUP BY groups.group_type_index, groups.group_key) AS e__group_2 ON equals(e.`$group_2`, e__group_2.key) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, groups.group_type_index AS index, groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) WHERE and(equals(e.team_id, 2), and(ifNull(equals(e__group_0.properties___industry, 'finance'), 0), ifNull(equals(e__group_2.properties___name, 'six'), 0)), ifNull(greaterOrEquals(timestamp, toDateTime64('2020-01-02 00:00:00.000000', 6, 'UTC')), 0), ifNull(less(timestamp, toDateTime64('2020-01-03 00:00:00.000000', 6, 'UTC')), 0), equals(e.event, 'sign up'))) GROUP BY actor_id) AS source))) GROUP BY person.id @@ -4296,6 +4296,13 @@ (SELECT any(e__session.`$session_duration`) AS session_duration, ifNull(nullIf(toString(e__pdi__person.`properties___$some_prop`), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT dateDiff('second', min(toTimeZone(raw_sessions.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions.max_timestamp, 'UTC'))) AS `$session_duration`, + raw_sessions.session_id_v7 AS session_id_v7 + FROM raw_sessions + WHERE and(equals(raw_sessions.team_id, 2), ifNull(greaterOrEquals(plus(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY raw_sessions.session_id_v7, + raw_sessions.session_id_v7) AS e__session ON equals(toUInt128(accurateCastOrNull(e.`$session_id`, 'UUID')), e__session.session_id_v7) INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -4304,13 +4311,6 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT dateDiff('second', min(toTimeZone(raw_sessions.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions.max_timestamp, 'UTC'))) AS `$session_duration`, - raw_sessions.session_id_v7 AS session_id_v7 - FROM raw_sessions - WHERE and(equals(raw_sessions.team_id, 2), ifNull(greaterOrEquals(plus(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) - GROUP BY raw_sessions.session_id_v7, - raw_sessions.session_id_v7) AS e__session ON equals(toUInt128(accurateCastOrNull(e.`$session_id`, 'UUID')), e__session.session_id_v7) LEFT JOIN (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` @@ -4360,6 +4360,13 @@ (SELECT any(e__session.`$session_duration`) AS session_duration, ifNull(nullIf(toString(e__pdi__person.`properties___$some_prop`), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value_1 FROM events AS e SAMPLE 1 + LEFT JOIN + (SELECT dateDiff('second', min(toTimeZone(raw_sessions.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions.max_timestamp, 'UTC'))) AS `$session_duration`, + raw_sessions.session_id_v7 AS session_id_v7 + FROM raw_sessions + WHERE and(equals(raw_sessions.team_id, 2), ifNull(greaterOrEquals(plus(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) + GROUP BY raw_sessions.session_id_v7, + raw_sessions.session_id_v7) AS e__session ON equals(toUInt128(accurateCastOrNull(e.`$session_id`, 'UUID')), e__session.session_id_v7) INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, @@ -4368,13 +4375,6 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT dateDiff('second', min(toTimeZone(raw_sessions.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions.max_timestamp, 'UTC'))) AS `$session_duration`, - raw_sessions.session_id_v7 AS session_id_v7 - FROM raw_sessions - WHERE and(equals(raw_sessions.team_id, 2), ifNull(greaterOrEquals(plus(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), toIntervalDay(3)), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(minus(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), toIntervalDay(3)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) - GROUP BY raw_sessions.session_id_v7, - raw_sessions.session_id_v7) AS e__session ON equals(toUInt128(accurateCastOrNull(e.`$session_id`, 'UUID')), e__session.session_id_v7) LEFT JOIN (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` diff --git a/posthog/temporal/tests/data_imports/test_end_to_end.py b/posthog/temporal/tests/data_imports/test_end_to_end.py index 0292fe2d83f52..6ce2ef6e83e02 100644 --- a/posthog/temporal/tests/data_imports/test_end_to_end.py +++ b/posthog/temporal/tests/data_imports/test_end_to_end.py @@ -9,19 +9,8 @@ import pytest import pytest_asyncio import psycopg -from posthog.hogql.modifiers import create_default_modifiers_for_team from posthog.hogql.query import execute_hogql_query -from posthog.hogql_queries.insights.funnels.funnel import Funnel -from posthog.hogql_queries.insights.funnels.funnel_query_context import FunnelQueryContext from posthog.models.team.team import Team -from posthog.schema import ( - BreakdownFilter, - BreakdownType, - EventsNode, - FunnelsQuery, - HogQLQueryModifiers, - PersonsOnEventsMode, -) from posthog.temporal.data_imports import ACTIVITIES from posthog.temporal.data_imports.external_data_job import ExternalDataJobWorkflow from posthog.temporal.utils import ExternalDataWorkflowInputs @@ -39,8 +28,6 @@ from dlt.sources.helpers.rest_client.client import RESTClient from dlt.common.configuration.specs.aws_credentials import AwsCredentials -from posthog.warehouse.models.join import DataWarehouseJoin - BUCKET_NAME = "test-pipeline" SESSION = aioboto3.Session() @@ -584,47 +571,6 @@ def get_jobs(): assert len(s3_objects["Contents"]) != 0 -@pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_funnels_lazy_joins_ordering(team, stripe_customer): - # Tests that funnels work in PERSON_ID_OVERRIDE_PROPERTIES_JOINED PoE mode when using extended person properties - await _run( - team=team, - schema_name="Customer", - table_name="stripe_customer", - source_type="Stripe", - job_inputs={"stripe_secret_key": "test-key", "stripe_account_id": "acct_id"}, - mock_data_response=stripe_customer["data"], - ) - - await sync_to_async(DataWarehouseJoin.objects.create)( - team=team, - source_table_name="persons", - source_table_key="properties.email", - joining_table_name="stripe_customer", - joining_table_key="email", - field_name="stripe_customer", - ) - - query = FunnelsQuery( - series=[EventsNode(), EventsNode()], - breakdownFilter=BreakdownFilter( - breakdown_type=BreakdownType.DATA_WAREHOUSE_PERSON_PROPERTY, breakdown="stripe_customer.email" - ), - ) - funnel_class = Funnel(context=FunnelQueryContext(query=query, team=team)) - - query_ast = funnel_class.get_query() - await sync_to_async(execute_hogql_query)( - query_type="FunnelsQuery", - query=query_ast, - team=team, - modifiers=create_default_modifiers_for_team( - team, HogQLQueryModifiers(personsOnEventsMode=PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_JOINED) - ), - ) - - @pytest.mark.django_db(transaction=True) @pytest.mark.asyncio async def test_postgres_schema_evolution(team, postgres_config, postgres_connection):