Skip to content

Commit

Permalink
feat(web-analytics): Add more tests for the raw_sessions table (#24706)
Browse files Browse the repository at this point in the history
  • Loading branch information
robbie-c authored Aug 30, 2024
1 parent db40489 commit 496d4c0
Show file tree
Hide file tree
Showing 2 changed files with 292 additions and 3 deletions.
3 changes: 2 additions & 1 deletion posthog/clickhouse/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
DISTRIBUTED_RAW_SESSIONS_TABLE_SQL,
WRITABLE_RAW_SESSIONS_TABLE_SQL,
RAW_SESSIONS_TABLE_MV_SQL,
RAW_SESSIONS_VIEW_SQL,
)
from posthog.models.sessions.sql import (
SESSIONS_TABLE_SQL,
Expand Down Expand Up @@ -211,7 +212,7 @@

CREATE_DATA_QUERIES = (CHANNEL_DEFINITION_DATA_SQL(),)

CREATE_VIEW_QUERIES = (SESSIONS_VIEW_SQL,)
CREATE_VIEW_QUERIES = (SESSIONS_VIEW_SQL, RAW_SESSIONS_VIEW_SQL)

build_query = lambda query: query if isinstance(query, str) else query()
get_table_name = lambda query: re.findall(r"[\.\s]`?([a-z0-9_]+)`?\s+ON CLUSTER", build_query(query))[0]
292 changes: 290 additions & 2 deletions posthog/clickhouse/test/test_raw_sessions_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from posthog.clickhouse.client import query_with_columns
from posthog.clickhouse.client import sync_execute
from posthog.models.raw_sessions.sql import RAW_SESSION_TABLE_BACKFILL_SELECT_SQL
from posthog.models.utils import uuid7
Expand All @@ -7,11 +8,298 @@
BaseTest,
)

distinct_id_counter = 0
session_id_counter = 0


def create_distinct_id():
global distinct_id_counter
distinct_id_counter += 1
return f"d{distinct_id_counter}"


def create_session_id():
global session_id_counter
session_id_counter += 1
return str(uuid7(random=session_id_counter))


class TestRawSessionsModel(ClickhouseTestMixin, BaseTest):
def select_by_session_id(self, session_id):
return query_with_columns(
"""
select
*
from raw_sessions_v
where
session_id_v7 = toUInt128(toUUID(%(session_id)s)) AND
team_id = %(team_id)s
""",
{
"session_id": session_id,
"team_id": self.team.id,
},
)

def test_it_creates_session_when_creating_event(self):
distinct_id = create_distinct_id()
session_id = create_session_id()
_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id,
properties={"$current_url": "/", "$session_id": session_id},
timestamp="2024-03-08",
)

response = sync_execute(
"""
select
session_id_v7,
team_id
from raw_sessions_v
where
session_id_v7 = toUInt128(toUUID(%(session_id)s)) AND
team_id = %(team_id)s
""",
{
"session_id": session_id,
"team_id": self.team.id,
},
)

self.assertEqual(len(response), 1)

def test_handles_different_distinct_id_across_same_session(self):
distinct_id1 = create_distinct_id()
distinct_id2 = create_distinct_id()
session_id = create_session_id()

_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id1,
properties={"$session_id": session_id},
timestamp="2024-03-08",
)
_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id2,
properties={"$session_id": session_id},
timestamp="2024-03-08",
)

responses = self.select_by_session_id(session_id)
self.assertEqual(len(responses), 1)
self.assertIn(responses[0]["distinct_id"], {distinct_id1, distinct_id2})
self.assertEqual(responses[0]["pageview_count"], 2)

def test_handles_entry_and_exit_urls(self):
distinct_id = create_distinct_id()
session_id = create_session_id()

_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id,
properties={"$current_url": "/entry", "$session_id": session_id},
timestamp="2024-03-08:01",
)
_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id,
properties={"$current_url": "/middle", "$session_id": session_id},
timestamp="2024-03-08:02",
)
_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id,
properties={"$current_url": "/middle", "$session_id": session_id},
timestamp="2024-03-08:03",
)
_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id,
properties={"$current_url": "/exit", "$session_id": session_id},
timestamp="2024-03-08:04",
)

responses = self.select_by_session_id(session_id)
self.assertEqual(len(responses), 1)
self.assertEqual(responses[0]["entry_url"], "/entry")
self.assertEqual(responses[0]["end_url"], "/exit")
self.assertEqual(len(responses[0]["urls"]), 3)
self.assertEqual(set(responses[0]["urls"]), {"/entry", "/middle", "/exit"}) # order is not guaranteed

def test_handles_initial_utm_properties(self):
distinct_id = create_distinct_id()
session_id = create_session_id()

_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id,
properties={"$session_id": session_id, "utm_source": "source"},
timestamp="2024-03-08",
)
_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id,
properties={"$session_id": session_id, "utm_source": "other_source"},
timestamp="2024-03-08",
)

responses = self.select_by_session_id(session_id)
self.assertEqual(len(responses), 1)
self.assertEqual(responses[0]["initial_utm_source"], "source")

def test_counts_pageviews_autocaptures_and_events(self):
distinct_id = create_distinct_id()
session_id = create_session_id()

_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id,
properties={"$session_id": session_id},
timestamp="2024-03-08",
)
_create_event(
team=self.team,
event="$autocapture",
distinct_id=distinct_id,
properties={"$session_id": session_id},
timestamp="2024-03-08",
)
_create_event(
team=self.team,
event="$autocapture",
distinct_id=distinct_id,
properties={"$session_id": session_id},
timestamp="2024-03-08",
)
_create_event(
team=self.team,
event="other event",
distinct_id=distinct_id,
properties={"$session_id": session_id},
timestamp="2024-03-08",
)
_create_event(
team=self.team,
event="$pageleave",
distinct_id=distinct_id,
properties={"$session_id": session_id},
timestamp="2024-03-08",
)

responses = self.select_by_session_id(session_id)
self.assertEqual(len(responses), 1)
self.assertEqual(responses[0]["pageview_count"], 1)
self.assertEqual(responses[0]["autocapture_count"], 2)

def test_separates_sessions_across_same_user(self):
distinct_id = create_distinct_id()
session_id1 = create_session_id()
session_id2 = create_session_id()
session_id3 = create_session_id()

_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id,
properties={"$session_id": session_id1},
timestamp="2024-03-08",
)
_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id,
properties={"$session_id": session_id2},
timestamp="2024-03-08",
)

responses = self.select_by_session_id(session_id1)
self.assertEqual(len(responses), 1)
responses = self.select_by_session_id(session_id2)
self.assertEqual(len(responses), 1)
responses = self.select_by_session_id(session_id3)
self.assertEqual(len(responses), 0)

def test_select_from_sessions(self):
# just make sure that we can select from the sessions table without error
distinct_id = create_distinct_id()
session_id = create_session_id()
_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id,
properties={"$session_id": session_id},
timestamp="2024-03-08",
)

# we can't include all the columns as this clickhouse driver doesn't support selecting states
responses = sync_execute(
"""
SELECT
session_id_v7,
team_id,
min_timestamp,
max_timestamp,
urls,
pageview_count,
autocapture_count
FROM raw_sessions
WHERE session_id_v7 = toUInt128(toUUID(%(session_id)s)) AND team_id = %(team_id)s
""",
{
"session_id": session_id,
"team_id": self.team.id,
},
)
self.assertEqual(len(responses), 1)

def test_select_from_sessions_mv(self):
# just make sure that we can select from the sessions mv without error
distinct_id = create_distinct_id()
session_id = create_session_id()
_create_event(
team=self.team,
event="$pageview",
distinct_id=distinct_id,
properties={"$session_id": session_id},
timestamp="2024-03-08",
)

# we can't include all the columns as this clickhouse driver doesn't support selecting states
responses = sync_execute(
"""
SELECT
session_id_v7,
team_id,
min_timestamp,
max_timestamp,
urls,
pageview_count,
autocapture_count
FROM raw_sessions_mv
WHERE session_id_v7 = toUInt128(toUUID(%(session_id)s)) AND team_id = %(team_id)s
""",
{
"session_id": session_id,
"team_id": self.team.id,
},
)
self.assertEqual(len(responses), 1)

def test_backfill_sql(self):
distinct_id = str(uuid7())
session_id = str(uuid7())
distinct_id = create_distinct_id()
session_id = create_session_id()
_create_event(
team=self.team,
event="$pageview",
Expand Down

0 comments on commit 496d4c0

Please sign in to comment.