From ce2008e45d7152386ac4e3255602214f42276222 Mon Sep 17 00:00:00 2001 From: Jamison Rose Date: Mon, 29 Apr 2024 16:01:14 -0700 Subject: [PATCH] SNOW-1269037: [Local Testing] Add support for NaT and NaN values (#1393) --- CHANGELOG.md | 4 +++ .../snowpark/_internal/type_utils.py | 9 ++++++ tests/mock_unit/test_create_df_from_pandas.py | 31 ++++++++++++++++++- 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64d248304bc..05c01f11891 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,10 @@ - concat - concat_ws +#### Bug Fixes + +- Fixed a bug that caused NaT and NaN values to not be recognized. + ## 1.15.0 (2024-04-24) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index bea013890e3..026064fbc54 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -264,6 +264,15 @@ def convert_sp_to_sf_type(datatype: DataType) -> str: datetime.time: TimeType, bytes: BinaryType, } +if installed_pandas: + import numpy + + PYTHON_TO_SNOW_TYPE_MAPPINGS.update( + { + type(pandas.NaT): TimestampType, + numpy.float64: DecimalType, + } + ) VALID_PYTHON_TYPES_FOR_LITERAL_VALUE = ( diff --git a/tests/mock_unit/test_create_df_from_pandas.py b/tests/mock_unit/test_create_df_from_pandas.py index 68af20c2a8a..56c7e88ce1c 100644 --- a/tests/mock_unit/test_create_df_from_pandas.py +++ b/tests/mock_unit/test_create_df_from_pandas.py @@ -10,7 +10,15 @@ import pytz from snowflake.snowpark import Row, Table -from snowflake.snowpark.types import BooleanType, DoubleType, LongType, StringType +from snowflake.snowpark.types import ( + BooleanType, + DoubleType, + LongType, + StringType, + StructField, + StructType, + TimestampType, +) try: import pandas as pd @@ -344,3 +352,24 @@ def test_na_and_null_data(session): ) sp_df = session.create_dataframe(data=pandas_df) assert sp_df.select("A").collect() == [Row("abc"), Row(None), Row("a"), Row("")] + + +@pytest.mark.localtest +def test_datetime_nat_nan(session): + df = pd.DataFrame( + { + "date": pd.to_datetime( + [None, "2020-01-13", "2020-02-01", "2020-02-23", "2020-03-05"], utc=True + ), + "num": [None, 1.0, 2.0, 3.0, 4.0], + } + ) + + expected_schema = StructType( + [ + StructField('"date"', TimestampType(), nullable=True), + StructField('"num"', DoubleType(), nullable=True), + ] + ) + sf_df = session.create_dataframe(data=df) + assert sf_df.schema == expected_schema