-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix "Filter internal and test users" when personsOnEventsMode = PersonsOnEventsMode.PERSON_ID_NO_OVERRIDE_PROPERTIES_ON_EVENTS #27132
Comments
Query failure comes from posthog/posthog/hogql/printer.py Line 1315 in 89522b1
|
@tkaemming and I spent some time debugging this today. In #27067, ifNull(
notILike(e__events__person.properties___email, %(hogql_val_26)s),
1
)' not found in 'SELECT
groupArray(1)(date)[1] AS date,
arrayFold(
(acc, x) -> arrayMap(
i -> plus(acc[i], x[i]),
range(1, plus(length(date), 1))
),
groupArray(ifNull(total, 0)),
arrayWithConstant(length(date), reinterpretAsFloat64(0))
) AS total,
if(
ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0),
%(hogql_val_41)s,
breakdown_value
) AS breakdown_value
FROM (
SELECT
arrayMap(
number -> plus(
toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_32)s, 6, %(hogql_val_33)s))),
toIntervalDay(number)
),
range(0, plus(
coalesce(
dateDiff(
%(hogql_val_34)s,
toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_35)s, 6, %(hogql_val_36)s))),
toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_37)s, 6, %(hogql_val_38)s)))
)
),
1
))
) AS date,
arrayFill(
x -> ifNull(greater(x, 0), 0),
arrayMap(
_match_date -> arraySum(
arraySlice(
groupArray(ifNull(count, 0)),
indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index,
plus(minus(
arrayLastIndex(
x -> ifNull(equals(x, _match_date), isNull(x) and isNull(_match_date)),
_days_for_count
),
_index
), 1)
)
),
date
)
) AS total,
breakdown_value AS breakdown_value,
rowNumberInAllBlocks() AS row_number
FROM (
SELECT
day_start AS day_start,
sum(count) OVER (
PARTITION BY breakdown_value
ORDER BY day_start ASC
) AS count,
breakdown_value AS breakdown_value
FROM (
SELECT
sum(total) AS count,
day_start AS day_start,
breakdown_value AS breakdown_value
FROM (
SELECT
ifNull(avg(accurateCastOrNull(e.usage, %(hogql_val_17)s)), 0) AS total,
toStartOfDay(toDateTime(e.ds, %(hogql_val_18)s)) AS day_start,
ifNull(
nullIf(toString(e__events.`properties___$feature/test-experiment`), %(hogql_val_19)s),
%(hogql_val_20)s
) AS breakdown_value
FROM (
SELECT *
FROM s3(%(hogql_val_0_sensitive)s, %(hogql_val_3_sensitive)s, %(hogql_val_4_sensitive)s, %(hogql_val_1)s, %(hogql_val_2)s)
) AS e
ASOF LEFT JOIN (
SELECT
replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_5)s), ''), 'null'), '^"|"$', '') AS `properties___$feature/test-experiment`,
replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_6)s), ''), 'null'), '^"|"$', '') AS `properties___$host`,
events.event AS event,
replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_7)s), ''), 'null'), '^"|"$', '') AS `e__events___properties___$user_id`,
if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id) AS e__events___person_id,
toTimeZone(events.timestamp, %(hogql_val_8)s) AS timestamp,
events.distinct_id AS distinct_id,
events.properties AS properties
FROM events
LEFT OUTER JOIN (
SELECT
argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
person_distinct_id_overrides.distinct_id AS distinct_id
FROM person_distinct_id_overrides
WHERE equals(person_distinct_id_overrides.team_id, 2686)
GROUP BY person_distinct_id_overrides.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)
SETTINGS optimize_aggregation_in_order=1
) AS events__override ON equals(events.distinct_id, events__override.distinct_id)
WHERE and(
equals(events.team_id, 2686),
equals(event, %(hogql_val_9)s),
greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_10)s, 6, %(hogql_val_11)s))),
lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_12)s, 6, %(hogql_val_13)s)))
)
) AS e__events ON and(
equals(e__events.event, %(hogql_val_14)s),
equals(e.userid, e__events.`e__events___properties___$user_id`),
greaterOrEquals(e.ds, e__events.timestamp)
)
LEFT JOIN (
SELECT
person.id AS id,
nullIf(nullIf(person.pmat_email, ''), 'null') AS properties___email
FROM person
WHERE and(
equals(person.team_id, 2686),
ifNull(
in(
tuple(person.id, person.version),
(
SELECT
person.id AS id,
max(person.version) AS version
FROM person
WHERE equals(person.team_id, 2686)
GROUP BY person.id
HAVING and(
ifNull(equals(argMax(person.is_deleted, person.version), 0), 0),
ifNull(
less(
argMax(toTimeZone(person.created_at, %(hogql_val_15)s), person.version),
plus(now64(6, %(hogql_val_16)s), toIntervalDay(1))
),
0
)
)
)
),
0
)
)
SETTINGS optimize_aggregation_in_order=1
) AS e__events__person ON equals(e__events.e__events___person_id, e__events__person.id)
WHERE and(
ifNull(
greaterOrEquals(
toDateTime(e.ds, %(hogql_val_21)s),
assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_22)s, 6, %(hogql_val_23)s))
),
0
),
ifNull(
lessOrEquals(
toDateTime(e.ds, %(hogql_val_24)s),
assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_25)s, 6, %(hogql_val_26)s))
),
0
),
ifNull(notILike(e__events__person.properties___email, %(hogql_val_27)s), 1),
ifNull(not(match(toString(e__events.`properties___$host`), %(hogql_val_28)s)), 1),
and(
ifNull(equals(e__events.event, %(hogql_val_29)s), 0),
or(
ifNull(equals(e__events.`properties___$feature/test-experiment`, %(hogql_val_30)s), 0),
ifNull(equals(e__events.`properties___$feature/test-experiment`, %(hogql_val_31)s), 0)
)
)
)
GROUP BY
day_start,
breakdown_value
)
GROUP BY
day_start,
breakdown_value
ORDER BY
day_start ASC,
breakdown_value ASC
)
ORDER BY day_start ASC
)
GROUP BY breakdown_value
ORDER BY
if(
ifNull(equals(breakdown_value, %(hogql_val_39)s), 0),
2,
if(ifNull(equals(breakdown_value, %(hogql_val_40)s), 0), 1, 0)
) ASC,
arraySum(total) DESC,
breakdown_value ASC
)
WHERE isNotNull(breakdown_value)
GROUP BY breakdown_value
ORDER BY
if(
ifNull(equals(breakdown_value, %(hogql_val_42)s), 0),
2,
if(ifNull(equals(breakdown_value, %(hogql_val_43)s), 0), 1, 0)
) ASC,
arraySum(total) DESC,
breakdown_value ASC
LIMIT 50000
SETTINGS
readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
max_ast_elements=4000000,
max_expanded_ast_elements=4000000,
max_bytes_before_external_group_by=0 However, SELECT
groupArray(1)(date)[1] AS date,
arrayFold(
(acc, x) -> arrayMap(
i -> plus(acc[i], x[i]),
range(1, plus(length(date), 1))
),
groupArray(ifNull(total, 0)),
arrayWithConstant(length(date), reinterpretAsFloat64(0))
) AS total,
if(
ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0),
%(hogql_val_39)s,
breakdown_value
) AS breakdown_value
FROM (
SELECT
arrayMap(
number -> plus(
toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_30)s, 6, %(hogql_val_31)s))),
toIntervalDay(number)
),
range(0, plus(
coalesce(
dateDiff(
%(hogql_val_32)s,
toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_33)s, 6, %(hogql_val_34)s))),
toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_35)s, 6, %(hogql_val_36)s)))
)
),
1
))
) AS date,
arrayFill(
x -> ifNull(greater(x, 0), 0),
arrayMap(
_match_date -> arraySum(
arraySlice(
groupArray(ifNull(count, 0)),
indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index,
plus(minus(
arrayLastIndex(
x -> ifNull(equals(x, _match_date), isNull(x) and isNull(_match_date)),
_days_for_count
),
_index
), 1)
)
),
date
)
) AS total,
breakdown_value AS breakdown_value,
rowNumberInAllBlocks() AS row_number
FROM (
SELECT
day_start AS day_start,
sum(count) OVER (
PARTITION BY breakdown_value
ORDER BY day_start ASC
) AS count,
breakdown_value AS breakdown_value
FROM (
SELECT
sum(total) AS count,
day_start AS day_start,
breakdown_value AS breakdown_value
FROM (
SELECT
ifNull(avg(accurateCastOrNull(e.usage, %(hogql_val_15)s)), 0) AS total,
toStartOfDay(toDateTime(e.ds, %(hogql_val_16)s)) AS day_start,
ifNull(
nullIf(toString(e__events.`properties___$feature/test-experiment`), %(hogql_val_17)s),
%(hogql_val_18)s
) AS breakdown_value
FROM (
SELECT *
FROM s3(
%(hogql_val_0_sensitive)s,
%(hogql_val_3_sensitive)s,
%(hogql_val_4_sensitive)s,
%(hogql_val_1)s,
%(hogql_val_2)s
)
) AS e
ASOF LEFT JOIN (
SELECT
replaceRegexpAll(
nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_5)s), ''), 'null'),
'^"|"$',
''
) AS `properties___$feature/test-experiment`,
replaceRegexpAll(
nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_6)s), ''), 'null'),
'^"|"$',
''
) AS `properties___$host`,
events.event AS event,
replaceRegexpAll(
nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_7)s), ''), 'null'),
'^"|"$',
''
) AS `e__events___properties___$user_id`,
toTimeZone(events.timestamp, %(hogql_val_8)s) AS timestamp,
events.distinct_id AS distinct_id,
events.properties AS properties
FROM events
WHERE and(
equals(events.team_id, 2683),
equals(event, %(hogql_val_9)s),
greaterOrEquals(
timestamp,
assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_10)s, 6, %(hogql_val_11)s))
),
lessOrEquals(
timestamp,
assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_12)s, 6, %(hogql_val_13)s))
)
)
) AS e__events ON and(
equals(e__events.event, %(hogql_val_14)s),
equals(e.userid, e__events.`e__events___properties___$user_id`),
greaterOrEquals(e.ds, e__events.timestamp)
)
WHERE and(
ifNull(
greaterOrEquals(
toDateTime(e.ds, %(hogql_val_19)s),
assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_20)s, 6, %(hogql_val_21)s))
),
0
),
ifNull(
lessOrEquals(
toDateTime(e.ds, %(hogql_val_22)s),
assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_23)s, 6, %(hogql_val_24)s))
),
0
),
ifNull(notILike(nullIf(nullIf(mat_pp_email, ''), 'null')`, %(hogql_val_25)s), 1),
ifNull(not(match(toString(e__events.`properties___$host`), %(hogql_val_26)s)), 1),
and(
ifNull(equals(e__events.event, %(hogql_val_27)s), 0),
or(
ifNull(equals(e__events.`properties___$feature/test-experiment`, %(hogql_val_28)s), 0),
ifNull(equals(e__events.`properties___$feature/test-experiment`, %(hogql_val_29)s), 0)
)
)
)
GROUP BY
day_start,
breakdown_value
)
GROUP BY
day_start,
breakdown_value
ORDER BY
day_start ASC,
breakdown_value ASC
)
ORDER BY
day_start ASC
)
GROUP BY
breakdown_value
ORDER BY
if(
ifNull(equals(breakdown_value, %(hogql_val_37)s), 0),
2,
if(ifNull(equals(breakdown_value, %(hogql_val_38)s), 0), 1, 0)
) ASC,
arraySum(total) DESC,
breakdown_value ASC
)
WHERE
isNotNull(breakdown_value)
GROUP BY
breakdown_value
ORDER BY
if(
ifNull(equals(breakdown_value, %(hogql_val_40)s), 0),
2,
if(ifNull(equals(breakdown_value, %(hogql_val_41)s), 0), 1, 0)
) ASC,
arraySum(total) DESC,
breakdown_value ASC
LIMIT 50000
SETTINGS
readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
max_ast_elements=4000000,
max_expanded_ast_elements=4000000,
max_bytes_before_external_group_by=0 The relevant diff between the two is the transformation of diff --git a/query1.sql b/query2.sql
--- a/query1.sql
+++ b/query2.sql
@@ -1,11 +1,11 @@
-ifNull(notILike(e__events__person.properties___email, %(hogql_val)s), 1),
+ifNull(notILike(nullIf(nullIf(mat_pp_email, ''), 'null')`, %(hogql_val)s), 1),
ifNull(not(match(toString(e__events.`properties___$host`), %(hogql_val)s)), 1),
and(
ifNull(equals(e__events.event, %(hogql_val)s), 0),
or(
ifNull(equals(e__events.`properties___$feature/test-experiment`, %(hogql_val)s), 0),
ifNull(equals(e__events.`properties___$feature/test-experiment`, %(hogql_val)s), 0)
)
) It's caused by this use of person on events mode: posthog/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py Lines 1051 to 1055 in 537dd4a
The posthog/posthog/hogql/printer.py Lines 1355 to 1370 in 537dd4a
When person on events mode is used, |
Fixed 🔨 |
When using a data warehouse table with experiments, "Filter internal and test users" fails with this SQL error when personsOnEventsMode = PersonsOnEventsMode.PERSON_ID_NO_OVERRIDE_PROPERTIES_ON_EVENTS:
Here's an example of the full failing query from #27067:
The underlying problem is that
e__events__person.properties___email
is resolving tomat_pp_email
, but there's nomat_pp_email
materialized column on the data warehouse table. The query should resolve toe__events.mat_pp_email
.The text was updated successfully, but these errors were encountered: