From f82b0b41fbb40dcdead991f87fbd48489bb1ee59 Mon Sep 17 00:00:00 2001 From: Afroz Alam Date: Thu, 1 Feb 2024 13:49:20 -0800 Subject: [PATCH 1/6] use split_blocks=True by default --- src/snowflake/snowpark/_internal/server_connection.py | 4 ++-- src/snowflake/snowpark/mock/_connection.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/snowflake/snowpark/_internal/server_connection.py b/src/snowflake/snowpark/_internal/server_connection.py index a442d724553..402ee6b63aa 100644 --- a/src/snowflake/snowpark/_internal/server_connection.py +++ b/src/snowflake/snowpark/_internal/server_connection.py @@ -443,11 +443,11 @@ def _to_data_or_iter( functools.partial( _fix_pandas_df_fixed_type, results_cursor=results_cursor ), - results_cursor.fetch_pandas_batches(), + results_cursor.fetch_pandas_batches(split_blocks=True), ) if to_iter else _fix_pandas_df_fixed_type( - results_cursor.fetch_pandas_all(), results_cursor + results_cursor.fetch_pandas_all(split_blocks=True), results_cursor ) ) except NotSupportedError: diff --git a/src/snowflake/snowpark/mock/_connection.py b/src/snowflake/snowpark/mock/_connection.py index 27c54c166c6..2968131cb34 100644 --- a/src/snowflake/snowpark/mock/_connection.py +++ b/src/snowflake/snowpark/mock/_connection.py @@ -339,11 +339,11 @@ def _to_data_or_iter( functools.partial( _fix_pandas_df_fixed_type, results_cursor=results_cursor ), - results_cursor.fetch_pandas_batches(), + results_cursor.fetch_pandas_batches(split_blocks=True), ) if to_iter else _fix_pandas_df_fixed_type( - results_cursor.fetch_pandas_all(), results_cursor + results_cursor.fetch_pandas_all(split_blocks=True), results_cursor ) ) except NotSupportedError: From 9f81be9b4e78eb93936552d9eb6f3b1fbec84319 Mon Sep 17 00:00:00 2001 From: Afroz Alam Date: Fri, 2 Feb 2024 10:37:09 -0800 Subject: [PATCH 2/6] changelog updates --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 639afd8db39..533ae2627d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## 1.13.0 (TBD) +### New Features + +- Use `split_blocks=True` by default during `to_pandas` conversion for optimal memory allocation. + ### Behavior Changes (API Compatible) - Added support for an optional `date_part` argument in function `last_day` From 479ad7982fb58e420c091f3359531f73b861519c Mon Sep 17 00:00:00 2001 From: Afroz Alam Date: Mon, 5 Feb 2024 11:45:09 -0800 Subject: [PATCH 3/6] more details in changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 17569304a55..42c1291b62d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ ### New Features -- Use `split_blocks=True` by default during `to_pandas` conversion for optimal memory allocation. +- Use `split_blocks=True` by default during `to_pandas` conversion for optimal memory allocation. This parameter is passed to `pyarrow.Table.to_pandas` to that enables `PyArrow` to split the memory allocation into smaller, more manageable blocks instead of allocating a single contiguous block thus giving better memory management when dealing with larger datasets. - Added support for an optional `date_part` argument in function `last_day` ### Bug Fixes From 1b2d8795b1c2dd3eb7b3670c5a04c020aa890eb8 Mon Sep 17 00:00:00 2001 From: Afroz Alam Date: Mon, 5 Feb 2024 11:46:21 -0800 Subject: [PATCH 4/6] fix grammar --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42c1291b62d..76621463067 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ ### New Features -- Use `split_blocks=True` by default during `to_pandas` conversion for optimal memory allocation. This parameter is passed to `pyarrow.Table.to_pandas` to that enables `PyArrow` to split the memory allocation into smaller, more manageable blocks instead of allocating a single contiguous block thus giving better memory management when dealing with larger datasets. +- Use `split_blocks=True` by default during `to_pandas` conversion for optimal memory allocation. This parameter is passed to `pyarrow.Table.to_pandas` that enables `PyArrow` to split the memory allocation into smaller, more manageable blocks instead of allocating a single contiguous block thus giving better memory management when dealing with larger datasets. - Added support for an optional `date_part` argument in function `last_day` ### Bug Fixes From 06462af1a828a1f0f972abab2ba03e539f5abfd0 Mon Sep 17 00:00:00 2001 From: Afroz Alam Date: Tue, 6 Feb 2024 18:09:19 -0800 Subject: [PATCH 5/6] fix changelog --- CHANGELOG.md | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e71558a1620..62fdc208ac0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,18 +4,25 @@ ### New Features -- Use `split_blocks=True` by default during `to_pandas` conversion for optimal memory allocation. This parameter is passed to `pyarrow.Table.to_pandas` that enables `PyArrow` to split the memory allocation into smaller, more manageable blocks instead of allocating a single contiguous block thus giving better memory management when dealing with larger datasets. - Added support for an optional `date_part` argument in function `last_day` +- Added the following functions to `DataFrame.analytics`: + - Added the `compute_lag` and `compute_lead` functions in `DataFrame.analytics` for enabling lead and lag calculations on multiple columns. + - Added the `time_series_agg` function in `DataFrame.analytics` to enable time series aggregations like sums and averages with multiple time windows. + +### Bug Fixes + +- Fixed a bug in `DataFrame.to_local_iterator` where the iterator could yield wrong results if another query is executed before the iterator finishes due to wrong isolation level. For details, please see #945. +- Fixed a bug that truncated table names in error messages while running a plan with local testing enabled. ## 1.12.1 (TBD) ### New Features +- Use `split_blocks=True` by default during `to_pandas` conversion for optimal memory allocation. This parameter is passed to `pyarrow.Table.to_pandas` that enables `PyArrow` to split the memory allocation into smaller, more manageable blocks instead of allocating a single contiguous block thus giving better memory management when dealing with larger datasets. + ### Bug Fixes - Fixed a bug in `DataFrame.to_pandas` that caused an error when evaluating on a dataframe with an IntergerType column with null values. -- Fixed a bug in `DataFrame.to_local_iterator` where the iterator could yield wrong results if another query is executed before the iterator finishes due to wrong isolation level. For details, please see #945. -- Fixed a bug that truncated table names in error messages while running a plan with local testing enabled. ## 1.12.0 (2024-01-30) @@ -35,9 +42,7 @@ - `sign`/`signum` - Added the following functions to `DataFrame.analytics`: - Added the `moving_agg` function in `DataFrame.analytics` to enable moving aggregations like sums and averages with multiple window sizes. - - Added the `cummulative_agg` function in `DataFrame.analytics` to enable commulative aggregations like sums and averages on multiple columns. - - Added the `compute_lag` and `compute_lead` functions in `DataFrame.analytics` for enabling lead and lag calculations on multiple columns. - - Added the `time_series_agg` function in `DataFrame.analytics` to enable time series aggregations like sums and averages with multiple time windows. + - Added the `cumulative_agg` function in `DataFrame.analytics` to enable cumulative aggregations like sums and averages on multiple columns. ### Bug Fixes From 32931105704a3e57a02073994e04f7d4f2dfe524 Mon Sep 17 00:00:00 2001 From: Afroz Alam Date: Wed, 7 Feb 2024 08:50:08 -0800 Subject: [PATCH 6/6] revert to minimal changelog --- CHANGELOG.md | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 62fdc208ac0..2ff82348e93 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,16 +3,7 @@ ## 1.13.0 (TBD) ### New Features - - Added support for an optional `date_part` argument in function `last_day` -- Added the following functions to `DataFrame.analytics`: - - Added the `compute_lag` and `compute_lead` functions in `DataFrame.analytics` for enabling lead and lag calculations on multiple columns. - - Added the `time_series_agg` function in `DataFrame.analytics` to enable time series aggregations like sums and averages with multiple time windows. - -### Bug Fixes - -- Fixed a bug in `DataFrame.to_local_iterator` where the iterator could yield wrong results if another query is executed before the iterator finishes due to wrong isolation level. For details, please see #945. -- Fixed a bug that truncated table names in error messages while running a plan with local testing enabled. ## 1.12.1 (TBD) @@ -23,6 +14,8 @@ ### Bug Fixes - Fixed a bug in `DataFrame.to_pandas` that caused an error when evaluating on a dataframe with an IntergerType column with null values. +- Fixed a bug in `DataFrame.to_local_iterator` where the iterator could yield wrong results if another query is executed before the iterator finishes due to wrong isolation level. For details, please see #945. +- Fixed a bug that truncated table names in error messages while running a plan with local testing enabled. ## 1.12.0 (2024-01-30) @@ -42,7 +35,9 @@ - `sign`/`signum` - Added the following functions to `DataFrame.analytics`: - Added the `moving_agg` function in `DataFrame.analytics` to enable moving aggregations like sums and averages with multiple window sizes. - - Added the `cumulative_agg` function in `DataFrame.analytics` to enable cumulative aggregations like sums and averages on multiple columns. + - Added the `cummulative_agg` function in `DataFrame.analytics` to enable commulative aggregations like sums and averages on multiple columns. + - Added the `compute_lag` and `compute_lead` functions in `DataFrame.analytics` for enabling lead and lag calculations on multiple columns. + - Added the `time_series_agg` function in `DataFrame.analytics` to enable time series aggregations like sums and averages with multiple time windows. ### Bug Fixes