Skip to content

Commit

Permalink
SNOW-1539454: Fix Star expression column dependency (#1945)
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-jdu authored Jul 19, 2024
1 parent 2fbd524 commit c482c53
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 2 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
### Snowpark Python API Updates

#### Improvements

- Added support server side string size limitations.

#### Bug Fixes
- Fixed a bug where SQL generated for selecting `*` column has an incorrect subquery.

### Snowpark Local Testing Updates
#### New Features

Expand Down
9 changes: 8 additions & 1 deletion src/snowflake/snowpark/_internal/analyzer/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,14 @@ def __init__(
self.df_alias = df_alias

def dependent_column_names(self) -> Optional[AbstractSet[str]]:
return derive_dependent_columns(*self.expressions)
# When the column is `df['*']`, `expressions` contains Attributes from all columns
# When the column is `col('*')` or just '*' string, `expressions` is empty,
# but its dependent columns should be all columns too
return (
derive_dependent_columns(*self.expressions)
if self.expressions
else COLUMN_DEPENDENCY_ALL
)

@property
def individual_node_complexity(self) -> Dict[PlanNodeCategory, int]:
Expand Down
17 changes: 17 additions & 0 deletions tests/integ/test_simplifier_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
iff,
lit,
min as min_,
object_construct_keep_null,
row_number,
seq1,
sql_expr,
Expand Down Expand Up @@ -1331,3 +1332,19 @@ def test_data_generator_with_filter(session):
df.with_column("B", seq1()).with_column("C", min_("B").over()).filter(df.A == 1)
)
Utils.check_answer(df, [Row(1, 1, 0)])


def test_star_column(session):
# convert to a table
df = session.create_dataframe(
[[0, "a"], [1, "b"]], schema=["a", "b"]
).cache_result()
# select a column and rename it twice
df1 = df.select(col("a").as_("x"), "b").select(col("x").as_("y"), "b")
df2 = df1.select(object_construct_keep_null("*"))
# expect that no subquery is flattened
query = df2.queries["queries"][0]
assert query.count("SELECT") == 3
Utils.check_answer(
df2, [Row('{\n "B": "a",\n "Y": 0\n}'), Row('{\n "B": "b",\n "Y": 1\n}')]
)

0 comments on commit c482c53

Please sign in to comment.