From d3773c159191cf12667a09c3ee63ddc8f98009a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szczur?= Date: Thu, 12 Dec 2024 14:45:02 +0000 Subject: [PATCH] feat(query_log): add query_log table to hogql (#26822) --- posthog/hogql/database/database.py | 4 + posthog/hogql/database/schema/query_log.py | 131 ++++++++ .../schema/test/test_table_query_log.py | 53 ++++ .../test/__snapshots__/test_database.ambr | 294 ++++++++++++++++++ posthog/hogql/printer.py | 7 +- 5 files changed, 487 insertions(+), 2 deletions(-) create mode 100644 posthog/hogql/database/schema/query_log.py create mode 100644 posthog/hogql/database/schema/test/test_table_query_log.py diff --git a/posthog/hogql/database/database.py b/posthog/hogql/database/database.py index 309f0751e5fa7..dd8ffc8a377a9 100644 --- a/posthog/hogql/database/database.py +++ b/posthog/hogql/database/database.py @@ -59,6 +59,7 @@ RawPersonsTable, join_with_persons_table, ) +from posthog.hogql.database.schema.query_log import QueryLogTable, RawQueryLogTable from posthog.hogql.database.schema.session_replay_events import ( RawSessionReplayEventsTable, SessionReplayEventsTable, @@ -117,6 +118,7 @@ class Database(BaseModel): cohort_people: CohortPeople = CohortPeople() static_cohort_people: StaticCohortPeople = StaticCohortPeople() log_entries: LogEntriesTable = LogEntriesTable() + query_log: QueryLogTable = QueryLogTable() app_metrics: AppMetrics2Table = AppMetrics2Table() console_logs_log_entries: ReplayConsoleLogsLogEntriesTable = ReplayConsoleLogsLogEntriesTable() batch_export_log_entries: BatchExportLogEntriesTable = BatchExportLogEntriesTable() @@ -133,6 +135,7 @@ class Database(BaseModel): RawErrorTrackingIssueFingerprintOverridesTable() ) raw_sessions: Union[RawSessionsTableV1, RawSessionsTableV2] = RawSessionsTableV1() + raw_query_log: RawQueryLogTable = RawQueryLogTable() # system tables numbers: NumbersTable = NumbersTable() @@ -150,6 +153,7 @@ class Database(BaseModel): "app_metrics", "sessions", "heatmaps", + "query_log", ] _warehouse_table_names: list[str] = [] diff --git a/posthog/hogql/database/schema/query_log.py b/posthog/hogql/database/schema/query_log.py new file mode 100644 index 0000000000000..873ebfa50a239 --- /dev/null +++ b/posthog/hogql/database/schema/query_log.py @@ -0,0 +1,131 @@ +from typing import Any + +from posthog.hogql import ast +from posthog.hogql.database.models import ( + IntegerDatabaseField, + StringDatabaseField, + DateTimeDatabaseField, + LazyTable, + FieldOrTable, + LazyTableToAdd, + FloatDatabaseField, + FunctionCallTable, + BooleanDatabaseField, +) + +QUERY_LOG_FIELDS: dict[str, FieldOrTable] = { + "query_id": StringDatabaseField(name="query_id"), + "query": StringDatabaseField(name="query"), # + "query_start_time": DateTimeDatabaseField(name="event_time"), # + "query_duration_ms": FloatDatabaseField(name="query_duration_ms"), # + "created_by": IntegerDatabaseField(name="created_by"), + "read_rows": IntegerDatabaseField(name="read_rows"), + "read_bytes": IntegerDatabaseField(name="read_bytes"), + "result_rows": IntegerDatabaseField(name="result_rows"), + "result_bytes": IntegerDatabaseField(name="result_bytes"), + "memory_usage": IntegerDatabaseField(name="memory_usage"), + "status": StringDatabaseField(name="type"), + "kind": StringDatabaseField(name="kind"), + "query_type": StringDatabaseField(name="query_type"), + "is_personal_api_key_request": BooleanDatabaseField(name="is_personal_api_key_request"), +} + +RAW_QUERY_LOG_FIELDS: dict[str, FieldOrTable] = QUERY_LOG_FIELDS | { + # below fields are necessary to compute some of the resulting fields + "type": StringDatabaseField(name="type"), + "is_initial_query": BooleanDatabaseField(name="is_initial_query"), + "log_comment": StringDatabaseField(name="log_comment"), +} + +STRING_FIELDS = { + "query_type": ["query_type"], + "query_id": ["client_query_id"], + "query": ["query", "query"], + "kind": ["query", "kind"], +} +INT_FIELDS = {"created_by": ["user_id"]} + + +class QueryLogTable(LazyTable): + fields: dict[str, FieldOrTable] = QUERY_LOG_FIELDS + + def to_printed_clickhouse(self, context) -> str: + return "query_log" + + def to_printed_hogql(self) -> str: + return "query_log" + + def lazy_select(self, table_to_add: LazyTableToAdd, context, node) -> Any: + requested_fields = table_to_add.fields_accessed + + raw_table_name = "raw_query_log" + + def get_alias(name, chain): + if name in STRING_FIELDS: + keys = STRING_FIELDS[name] + expr = ast.Call( + name="JSONExtractString", + args=[ast.Field(chain=[raw_table_name, "log_comment"])] + [ast.Constant(value=v) for v in keys], + ) + return ast.Alias(alias=name, expr=expr) + if name in INT_FIELDS: + keys = INT_FIELDS[name] + expr = ast.Call( + name="JSONExtractInt", + args=[ast.Field(chain=[raw_table_name, "log_comment"])] + [ast.Constant(value=v) for v in keys], + ) + return ast.Alias(alias=name, expr=expr) + if name == "is_personal_api_key_request": + cmp_expr = ast.CompareOperation( + op=ast.CompareOperationOp.Eq, + left=ast.Constant(value="personal_api_key"), + right=ast.Call( + name="JSONExtractString", + args=[ast.Field(chain=["log_comment"]), ast.Constant(value="access_method")], + ), + ) + return ast.Alias(alias=name, expr=cmp_expr) + return ast.Alias(alias=name, expr=ast.Field(chain=[raw_table_name, *chain])) + + fields: list[ast.Expr] = [get_alias(name, chain) for name, chain in requested_fields.items()] + + return ast.SelectQuery( + select=fields, + select_from=ast.JoinExpr(table=ast.Field(chain=[raw_table_name])), + where=ast.And( + exprs=[ + ast.CompareOperation( + op=ast.CompareOperationOp.Eq, + left=ast.Constant(value=context.project_id), + right=ast.Call( + name="JSONExtractInt", + args=[ast.Field(chain=["log_comment"]), ast.Constant(value="user_id")], + ), + ), + ast.CompareOperation( + op=ast.CompareOperationOp.In, + left=ast.Field(chain=["type"]), + right=ast.Array( + exprs=[ + ast.Constant(value="QueryFinish"), + ast.Constant(value="ExceptionBeforeStart"), + ast.Constant(value="ExceptionWhileProcessing"), + ] + ), + ), + ast.Field(chain=["is_initial_query"]), + ] + ), + ) + + +class RawQueryLogTable(FunctionCallTable): + fields: dict[str, FieldOrTable] = RAW_QUERY_LOG_FIELDS + + name: str = "raw_query_log" + + def to_printed_clickhouse(self, context) -> str: + return "clusterAllReplicas(posthog, system.query_log)" + + def to_printed_hogql(self) -> str: + return "query_log" diff --git a/posthog/hogql/database/schema/test/test_table_query_log.py b/posthog/hogql/database/schema/test/test_table_query_log.py new file mode 100644 index 0000000000000..d892520f31ca0 --- /dev/null +++ b/posthog/hogql/database/schema/test/test_table_query_log.py @@ -0,0 +1,53 @@ +from unittest.mock import MagicMock, patch +from posthog.clickhouse.client import sync_execute +from posthog.hogql.context import HogQLContext +from posthog.hogql.database.database import create_hogql_database +from posthog.hogql.query import execute_hogql_query +from posthog.test.base import ( + APIBaseTest, + ClickhouseTestMixin, +) + + +class TestPersonOptimization(ClickhouseTestMixin, APIBaseTest): + """ + Mostly tests for the optimization of pre-filtering before aggregating. See https://github.com/PostHog/posthog/pull/25604 + """ + + def setUp(self): + super().setUp() + self.database = create_hogql_database(self.team.pk) + self.context = HogQLContext(database=self.database, team_id=self.team.pk, enable_select_queries=True) + + @patch("posthog.hogql.query.sync_execute", wraps=sync_execute) + def test_dumb_query(self, mock_sync_execute: MagicMock): + response = execute_hogql_query("select query_start_time from query_log limit 10", self.team) + + ch_query = f"""SELECT + query_log.query_start_time AS query_start_time +FROM + (SELECT + toTimeZone(raw_query_log.event_time, %(hogql_val_0)s) AS query_start_time + FROM + clusterAllReplicas(posthog, system.query_log) AS raw_query_log + WHERE + and(ifNull(equals({self.team.pk}, JSONExtractInt(raw_query_log.log_comment, %(hogql_val_1)s)), 0), in(raw_query_log.type, [%(hogql_val_2)s, %(hogql_val_3)s, %(hogql_val_4)s]), raw_query_log.is_initial_query)) AS query_log +LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, format_csv_allow_double_quotes=0, max_ast_elements=4000000, max_expanded_ast_elements=4000000, max_bytes_before_external_group_by=0""" + + from unittest.mock import ANY + + mock_sync_execute.assert_called_once_with( + ch_query, + { + "hogql_val_0": "UTC", + "hogql_val_1": "user_id", + "hogql_val_2": "QueryFinish", + "hogql_val_3": "ExceptionBeforeStart", + "hogql_val_4": "ExceptionWhileProcessing", + }, + with_column_types=True, + workload=ANY, + team_id=self.team.pk, + readonly=True, + ) + assert response.results is not None diff --git a/posthog/hogql/database/test/__snapshots__/test_database.ambr b/posthog/hogql/database/test/__snapshots__/test_database.ambr index 86f71a6b3d0e1..d5d1d67d7c8b5 100644 --- a/posthog/hogql/database/test/__snapshots__/test_database.ambr +++ b/posthog/hogql/database/test/__snapshots__/test_database.ambr @@ -1704,6 +1704,153 @@ "id": "heatmaps", "name": "heatmaps", "type": "posthog" + }, + "query_log": { + "fields": { + "query_id": { + "chain": null, + "fields": null, + "hogql_value": "query_id", + "id": null, + "name": "query_id", + "schema_valid": true, + "table": null, + "type": "string" + }, + "query": { + "chain": null, + "fields": null, + "hogql_value": "query", + "id": null, + "name": "query", + "schema_valid": true, + "table": null, + "type": "string" + }, + "query_start_time": { + "chain": null, + "fields": null, + "hogql_value": "query_start_time", + "id": null, + "name": "query_start_time", + "schema_valid": true, + "table": null, + "type": "datetime" + }, + "query_duration_ms": { + "chain": null, + "fields": null, + "hogql_value": "query_duration_ms", + "id": null, + "name": "query_duration_ms", + "schema_valid": true, + "table": null, + "type": "float" + }, + "created_by": { + "chain": null, + "fields": null, + "hogql_value": "created_by", + "id": null, + "name": "created_by", + "schema_valid": true, + "table": null, + "type": "integer" + }, + "read_rows": { + "chain": null, + "fields": null, + "hogql_value": "read_rows", + "id": null, + "name": "read_rows", + "schema_valid": true, + "table": null, + "type": "integer" + }, + "read_bytes": { + "chain": null, + "fields": null, + "hogql_value": "read_bytes", + "id": null, + "name": "read_bytes", + "schema_valid": true, + "table": null, + "type": "integer" + }, + "result_rows": { + "chain": null, + "fields": null, + "hogql_value": "result_rows", + "id": null, + "name": "result_rows", + "schema_valid": true, + "table": null, + "type": "integer" + }, + "result_bytes": { + "chain": null, + "fields": null, + "hogql_value": "result_bytes", + "id": null, + "name": "result_bytes", + "schema_valid": true, + "table": null, + "type": "integer" + }, + "memory_usage": { + "chain": null, + "fields": null, + "hogql_value": "memory_usage", + "id": null, + "name": "memory_usage", + "schema_valid": true, + "table": null, + "type": "integer" + }, + "status": { + "chain": null, + "fields": null, + "hogql_value": "status", + "id": null, + "name": "status", + "schema_valid": true, + "table": null, + "type": "string" + }, + "kind": { + "chain": null, + "fields": null, + "hogql_value": "kind", + "id": null, + "name": "kind", + "schema_valid": true, + "table": null, + "type": "string" + }, + "query_type": { + "chain": null, + "fields": null, + "hogql_value": "query_type", + "id": null, + "name": "query_type", + "schema_valid": true, + "table": null, + "type": "string" + }, + "is_personal_api_key_request": { + "chain": null, + "fields": null, + "hogql_value": "is_personal_api_key_request", + "id": null, + "name": "is_personal_api_key_request", + "schema_valid": true, + "table": null, + "type": "boolean" + } + }, + "id": "query_log", + "name": "query_log", + "type": "posthog" } } ''' @@ -3366,6 +3513,153 @@ "id": "heatmaps", "name": "heatmaps", "type": "posthog" + }, + "query_log": { + "fields": { + "query_id": { + "chain": null, + "fields": null, + "hogql_value": "query_id", + "id": null, + "name": "query_id", + "schema_valid": true, + "table": null, + "type": "string" + }, + "query": { + "chain": null, + "fields": null, + "hogql_value": "query", + "id": null, + "name": "query", + "schema_valid": true, + "table": null, + "type": "string" + }, + "query_start_time": { + "chain": null, + "fields": null, + "hogql_value": "query_start_time", + "id": null, + "name": "query_start_time", + "schema_valid": true, + "table": null, + "type": "datetime" + }, + "query_duration_ms": { + "chain": null, + "fields": null, + "hogql_value": "query_duration_ms", + "id": null, + "name": "query_duration_ms", + "schema_valid": true, + "table": null, + "type": "float" + }, + "created_by": { + "chain": null, + "fields": null, + "hogql_value": "created_by", + "id": null, + "name": "created_by", + "schema_valid": true, + "table": null, + "type": "integer" + }, + "read_rows": { + "chain": null, + "fields": null, + "hogql_value": "read_rows", + "id": null, + "name": "read_rows", + "schema_valid": true, + "table": null, + "type": "integer" + }, + "read_bytes": { + "chain": null, + "fields": null, + "hogql_value": "read_bytes", + "id": null, + "name": "read_bytes", + "schema_valid": true, + "table": null, + "type": "integer" + }, + "result_rows": { + "chain": null, + "fields": null, + "hogql_value": "result_rows", + "id": null, + "name": "result_rows", + "schema_valid": true, + "table": null, + "type": "integer" + }, + "result_bytes": { + "chain": null, + "fields": null, + "hogql_value": "result_bytes", + "id": null, + "name": "result_bytes", + "schema_valid": true, + "table": null, + "type": "integer" + }, + "memory_usage": { + "chain": null, + "fields": null, + "hogql_value": "memory_usage", + "id": null, + "name": "memory_usage", + "schema_valid": true, + "table": null, + "type": "integer" + }, + "status": { + "chain": null, + "fields": null, + "hogql_value": "status", + "id": null, + "name": "status", + "schema_valid": true, + "table": null, + "type": "string" + }, + "kind": { + "chain": null, + "fields": null, + "hogql_value": "kind", + "id": null, + "name": "kind", + "schema_valid": true, + "table": null, + "type": "string" + }, + "query_type": { + "chain": null, + "fields": null, + "hogql_value": "query_type", + "id": null, + "name": "query_type", + "schema_valid": true, + "table": null, + "type": "string" + }, + "is_personal_api_key_request": { + "chain": null, + "fields": null, + "hogql_value": "is_personal_api_key_request", + "id": null, + "name": "is_personal_api_key_request", + "schema_valid": true, + "table": null, + "type": "boolean" + } + }, + "id": "query_log", + "name": "query_log", + "type": "posthog" } } ''' diff --git a/posthog/hogql/printer.py b/posthog/hogql/printer.py index 418e2f6354807..27a4d85a84c66 100644 --- a/posthog/hogql/printer.py +++ b/posthog/hogql/printer.py @@ -18,6 +18,7 @@ MAX_SELECT_RETURNED_ROWS, HogQLGlobalSettings, ) +from posthog.hogql.database.schema.query_log import RawQueryLogTable from posthog.hogql.functions import ( ADD_OR_NULL_DATETIME_FUNCTIONS, FIRST_ARG_DATETIME_FUNCTIONS, @@ -494,7 +495,9 @@ def visit_join_expr(self, node: ast.JoinExpr) -> JoinExprResponse: else: sql = table_type.table.to_printed_hogql() - if isinstance(table_type.table, FunctionCallTable) and not isinstance(table_type.table, S3Table): + if isinstance(table_type.table, FunctionCallTable) and not ( + isinstance(table_type.table, S3Table) or isinstance(table_type.table, RawQueryLogTable) + ): if node.table_args is None: raise QueryError(f"Table function '{table_type.table.name}' requires arguments") @@ -1157,7 +1160,7 @@ def visit_call(self, node: ast.Call): args_part = f"({', '.join(args)})" return f"{relevant_clickhouse_name}{params_part}{args_part}" else: - return f"{node.name}({', '.join([self.visit(arg) for arg in node.args ])})" + return f"{node.name}({', '.join([self.visit(arg) for arg in node.args])})" elif func_meta := find_hogql_posthog_function(node.name): validate_function_args(node.args, func_meta.min_args, func_meta.max_args, node.name) args = [self.visit(arg) for arg in node.args]