diff --git a/src/snowflake/snowpark/_internal/server_connection.py b/src/snowflake/snowpark/_internal/server_connection.py index cd15ff0f137..e6763ed6a29 100644 --- a/src/snowflake/snowpark/_internal/server_connection.py +++ b/src/snowflake/snowpark/_internal/server_connection.py @@ -695,24 +695,28 @@ def _fix_pandas_df_integer( if ( FIELD_ID_TO_NAME.get(column_metadata.type_code) == "FIXED" and column_metadata.precision is not None - and column_metadata.scale == 0 and not str(pandas_dtype).startswith("int") ): - # When scale = 0 and precision values are between 10-20, the integers fit into int64. - # If we rely only on pandas.to_numeric, it loses precision value on large integers, therefore - # we try to strictly use astype("int64") in this scenario. If the values are too large to - # fit in int64, an OverflowError is thrown and we rely on to_numeric to choose and appropriate - # floating datatype to represent the number. - if column_metadata.precision > 10: - try: - pd_df[pandas_col_name] = pd_df[pandas_col_name].astype("int64") - except OverflowError: + if column_metadata.scale == 0: + # When scale = 0 and precision values are between 10-20, the integers fit into int64. + # If we rely only on pandas.to_numeric, it loses precision value on large integers, therefore + # we try to strictly use astype("int64") in this scenario. If the values are too large to + # fit in int64, an OverflowError is thrown and we rely on to_numeric to choose and appropriate + # floating datatype to represent the number. + if column_metadata.precision > 10: + try: + pd_df[pandas_col_name] = pd_df[pandas_col_name].astype("int64") + except OverflowError: + pd_df[pandas_col_name] = pandas.to_numeric( + pd_df[pandas_col_name], downcast="integer" + ) + else: pd_df[pandas_col_name] = pandas.to_numeric( pd_df[pandas_col_name], downcast="integer" ) else: - pd_df[pandas_col_name] = pandas.to_numeric( - pd_df[pandas_col_name], downcast="integer" - ) + # For decimal columns, we want to cast it into float64 because pandas doesn't + # recognize decimal type. + pd_df[pandas_col_name] = pd_df[pandas_col_name].astype("float64") return pd_df diff --git a/tests/integ/test_df_to_pandas.py b/tests/integ/test_df_to_pandas.py index 23f861f4c94..3cc28412efd 100644 --- a/tests/integ/test_df_to_pandas.py +++ b/tests/integ/test_df_to_pandas.py @@ -141,6 +141,28 @@ def test_to_pandas_precision_for_number_38_0(session): assert pdf["A"].min() == -9223372036854775808 +def test_to_pandas_precision_for_number_38_6_and_others(session): + df = session.sql( + """ + SELECT + num1, + num2, + DIV0(num1, num2) AS division, + DIV0(CAST(num1 AS INTEGER), CAST(num2 AS INTEGER)) AS division_cast, + ROUND(division_cast, 2) as rnd_cast + FROM (VALUES + (1, 11) + ) X(num1, num2); + """ + ) + + pdf = df.to_pandas() + + assert pdf["division"].dtype == "float64" + assert pdf["division_cast"].dtype == "float64" + assert pdf["rnd_cast"].dtype == "float64" + + def test_to_pandas_non_select(session): # `with ... select ...` is also a SELECT statement isinstance(session.sql("select 1").to_pandas(), PandasDF)