Skip to content

Commit

Permalink
SNOW-1060150: Add session.cte_optimization_enabled parameter (#1402)
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-jdu authored Apr 19, 2024
1 parent 30a77e7 commit 7c3afe1
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 1 deletion.
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,13 @@
- snowflake.snowpark.DataFrameWriter:
- save_as_table
- Added support for snow:// URLs to `snowflake.snowpark.Session.file.get` and `snowflake.snowpark.Session.file.get_stream`
- Added support support to register stored procedures and UDxFs with a `comment`.
- Added support to register stored procedures and UDxFs with a `comment`.
- UDAF client support is ready for public preview. Please stay tuned for the Snowflake announcement of UDAF public preview.
- Added support for dynamic pivot. This feature is currently in private preview.

### Improvements
- Improved the generated query performance for both compilation and execution by converting duplicate subqueries to Common Table Expressions (CTEs). It is still an experimental feature, and can be enabled by setting `session.cte_optimization_enabled` to `True`.

### Bug Fixes

- Fixed a bug in local testing that null filled columns for constant functions.
Expand Down
14 changes: 14 additions & 0 deletions src/snowflake/snowpark/_internal/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class TelemetryField(Enum):
TYPE_FUNCTION_USAGE = "snowpark_function_usage"
TYPE_SESSION_CREATED = "snowpark_session_created"
TYPE_SQL_SIMPLIFIER_ENABLED = "snowpark_sql_simplifier_enabled"
TYPE_CTE_OPTIMIZATION_ENABLED = "snowpark_cte_optimization_enabled"
TYPE_ERROR = "snowpark_error"
# Message keys for telemetry
KEY_START_TIME = "start_time"
Expand Down Expand Up @@ -62,6 +63,7 @@ class TelemetryField(Enum):
# sql simplifier
SESSION_ID = "session_id"
SQL_SIMPLIFIER_ENABLED = "sql_simplifier_enabled"
CTE_OPTIMIZATION_ENABLED = "cte_optimization_enabled"
# dataframe query stats
QUERY_PLAN_HEIGHT = "query_plan_height"
QUERY_PLAN_NUM_DUPLICATE_NODES = "query_plan_num_duplicate_nodes"
Expand Down Expand Up @@ -355,3 +357,15 @@ def send_sql_simplifier_telemetry(
},
}
self.send(message)

def send_cte_optimization_telemetry(self, session_id: str) -> None:
message = {
**self._create_basic_telemetry_data(
TelemetryField.TYPE_CTE_OPTIMIZATION_ENABLED.value
),
TelemetryField.KEY_DATA.value: {
TelemetryField.SESSION_ID.value: session_id,
TelemetryField.CTE_OPTIMIZATION_ENABLED.value: True,
},
}
self.send(message)
16 changes: 16 additions & 0 deletions src/snowflake/snowpark/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,13 @@ def sql_simplifier_enabled(self) -> bool:
"""
return self._sql_simplifier_enabled

@property
def cte_optimization_enabled(self) -> bool:
"""Set to ``True`` to enable the CTE optimization (defaults to ``False``).
The generated SQLs from ``DataFrame`` transformations would have duplicate subquery as CTEs if the CTE optimization is enabled.
"""
return self._cte_optimization_enabled

@property
def custom_package_usage_config(self) -> Dict:
"""Get or set configuration parameters related to usage of custom Python packages in Snowflake.
Expand Down Expand Up @@ -590,6 +597,15 @@ def sql_simplifier_enabled(self, value: bool) -> None:
pass
self._sql_simplifier_enabled = value

@cte_optimization_enabled.setter
@experimental_parameter(version="1.15.0")
def cte_optimization_enabled(self, value: bool) -> None:
if value:
self._conn._telemetry_client.send_cte_optimization_telemetry(
self._session_id
)
self._cte_optimization_enabled = value

@custom_package_usage_config.setter
@experimental_parameter(version="1.6.0")
def custom_package_usage_config(self, config: Dict) -> None:
Expand Down
7 changes: 7 additions & 0 deletions tests/integ/test_cte.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
#

import logging
import re

import pytest
Expand Down Expand Up @@ -433,3 +434,9 @@ def test_window_function(session):
df_result = df.union_all(df).select("*")
check_result(session, df_result, expect_cte_optimized=True)
assert count_number_of_ctes(df_result.queries["queries"][-1]) == 1


def test_cte_optimization_enabled_parameter(session, caplog):
with caplog.at_level(logging.WARNING):
session.cte_optimization_enabled = True
assert "cte_optimization_enabled is experimental" in caplog.text

0 comments on commit 7c3afe1

Please sign in to comment.