From f1d4b34492b15bdb092171e4f18ffd3d8454be8c Mon Sep 17 00:00:00 2001 From: Yun Zou Date: Fri, 4 Oct 2024 16:57:15 -0700 Subject: [PATCH] move definition --- .../analyzer/query_plan_analysis_utils.py | 8 ++++++++ .../_internal/compiler/telemetry_constants.py | 8 ++++++++ src/snowflake/snowpark/_internal/telemetry.py | 19 ++++++------------- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/snowflake/snowpark/_internal/analyzer/query_plan_analysis_utils.py b/src/snowflake/snowpark/_internal/analyzer/query_plan_analysis_utils.py index b692695f95..3d79fafb93 100644 --- a/src/snowflake/snowpark/_internal/analyzer/query_plan_analysis_utils.py +++ b/src/snowflake/snowpark/_internal/analyzer/query_plan_analysis_utils.py @@ -45,7 +45,15 @@ def __repr__(self) -> str: class PlanState(Enum): + """ + This is an enum class for the state that are extracted for a given SnowflakePlan + or SelectStatement. + """ + + # the height of the given plan PLAN_HEIGHT = "plan_height" + # the number of SelectStatement nodes in the plan that have + # _merge_projection_complexity_with_subquery set to True NUM_SELECTS_WITH_COMPLEXITY_MERGED = "num_selects_with_complexity_merged" diff --git a/src/snowflake/snowpark/_internal/compiler/telemetry_constants.py b/src/snowflake/snowpark/_internal/compiler/telemetry_constants.py index 7793677699..7b9da7ec3e 100644 --- a/src/snowflake/snowpark/_internal/compiler/telemetry_constants.py +++ b/src/snowflake/snowpark/_internal/compiler/telemetry_constants.py @@ -6,6 +6,14 @@ class CompilationStageTelemetryField(Enum): + # dataframe query stats + QUERY_PLAN_HEIGHT = "query_plan_height" + QUERY_PLAN_NUM_SELECTS_WITH_COMPLEXITY_MERGED = ( + "query_plan_num_selects_with_complexity_merged" + ) + QUERY_PLAN_NUM_DUPLICATE_NODES = "query_plan_num_duplicate_nodes" + QUERY_PLAN_COMPLEXITY = "query_plan_complexity" + # types TYPE_LARGE_QUERY_BREAKDOWN_OPTIMIZATION_SKIPPED = ( "snowpark_large_query_breakdown_optimization_skipped" diff --git a/src/snowflake/snowpark/_internal/telemetry.py b/src/snowflake/snowpark/_internal/telemetry.py index 9bc0bb70cf..19ab8f0652 100644 --- a/src/snowflake/snowpark/_internal/telemetry.py +++ b/src/snowflake/snowpark/_internal/telemetry.py @@ -76,13 +76,6 @@ class TelemetryField(Enum): SQL_SIMPLIFIER_ENABLED = "sql_simplifier_enabled" CTE_OPTIMIZATION_ENABLED = "cte_optimization_enabled" LARGE_QUERY_BREAKDOWN_ENABLED = "large_query_breakdown_enabled" - # dataframe query stats - QUERY_PLAN_HEIGHT = "query_plan_height" - QUERY_PLAN_NUM_SELECTS_WITH_COMPLEXITY_MERGED = ( - "query_plan_num_selects_with_complexity_merged" - ) - QUERY_PLAN_NUM_DUPLICATE_NODES = "query_plan_num_duplicate_nodes" - QUERY_PLAN_COMPLEXITY = "query_plan_complexity" # temp table cleanup TYPE_TEMP_TABLE_CLEANUP = "snowpark_temp_table_cleanup" NUM_TEMP_TABLES_CLEANED = "num_temp_tables_cleaned" @@ -185,11 +178,11 @@ def wrap(*args, **kwargs): 0 ]._session.sql_simplifier_enabled try: - api_calls[0][TelemetryField.QUERY_PLAN_HEIGHT.value] = plan.plan_state[ - PlanState.PLAN_HEIGHT - ] api_calls[0][ - TelemetryField.QUERY_PLAN_NUM_SELECTS_WITH_COMPLEXITY_MERGED.value + CompilationStageTelemetryField.QUERY_PLAN_HEIGHT.value + ] = plan.plan_state[PlanState.PLAN_HEIGHT] + api_calls[0][ + CompilationStageTelemetryField.QUERY_PLAN_NUM_SELECTS_WITH_COMPLEXITY_MERGED.value ] = plan.plan_state[PlanState.NUM_SELECTS_WITH_COMPLEXITY_MERGED] # The uuid for df._select_statement can be different from df._plan. Since plan # can take both values, we cannot use plan.uuid. We always use df._plan.uuid @@ -197,9 +190,9 @@ def wrap(*args, **kwargs): uuid = args[0]._plan.uuid api_calls[0][CompilationStageTelemetryField.PLAN_UUID.value] = uuid api_calls[0][ - TelemetryField.QUERY_PLAN_NUM_DUPLICATE_NODES.value + CompilationStageTelemetryField.QUERY_PLAN_NUM_DUPLICATE_NODES.value ] = plan.num_duplicate_nodes - api_calls[0][TelemetryField.QUERY_PLAN_COMPLEXITY.value] = { + api_calls[0][CompilationStageTelemetryField.QUERY_PLAN_COMPLEXITY.value] = { key.value: value for key, value in plan.cumulative_node_complexity.items() }