Skip to content

Commit

Permalink
SNOW-1284674: fix timestamp data comparison beyond datetime64[ns] ran…
Browse files Browse the repository at this point in the history
…ge (#1387)
  • Loading branch information
sfc-gh-aling authored Apr 20, 2024
1 parent 38b27df commit 1eb7a17
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
- Fixed a bug where `statement_params` was not passed to query executions that register stored procedures and user defined functions.
- Fixed a bug causing `snowflake.snowpark.Session.file.get_stream` to fail for quoted stage locations
- Fixed a bug in local testing implementation of to_object, to_array and to_binary to better handle null inputs.
- Fixed a bug in local testing implementation that timestamp data comparison can not handle year beyond 2262.
- Fixed a bug in local testing that `Session.builder.getOrCreate` should return the created mock session.

## 1.14.0 (2024-03-20)
Expand Down
21 changes: 20 additions & 1 deletion src/snowflake/snowpark/mock/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,25 @@ def fix_drift_between_column_sf_type_and_dtype(col: ColumnEmulator):
and col.apply(lambda x: x is None).any()
): # non-object dtype converts None to NaN for numeric columns
return col
"""
notes for the timestamp object type drift here, ideally datetime64[us] should be used here because:
1. python doesn't have built-in datetime nanosecond support:
https://github.com/python/cpython/blob/3.12/Lib/_pydatetime.py
2. numpy datetime64 restrictions, https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-units:
datetime64[ns] supports nanoseconds, the year range is limited to [ 1678 AD, 2262 AD]
datetime64[us] supports milliseconds, the year range is more relaxed [290301 BC, 294241 AD]
3. snowflake date range recommendation
according to snowflake https://docs.snowflake.com/en/sql-reference/data-types-datetime#date
the recommend year range is 1582, 9999
however, on Python 3.8 max supported version pandas 2.0.3 + version numpy 1.24.4 does not recognize datetime64[us],
always defaults to unit ns, leading to time out of band error.
based upon these information and for simplicity, we can use object for now, then move onto datetime64[us],
then seek solution for nanosecond.
"""
sf_type_to_dtype = {
ArrayType: object,
BinaryType: object,
Expand All @@ -215,7 +234,7 @@ def fix_drift_between_column_sf_type_and_dtype(col: ColumnEmulator):
NullType: object,
ShortType: numpy.int8 if not col.sf_type.nullable else "Int8",
StringType: object,
TimestampType: "datetime64[ns]",
TimestampType: object, # "datetime64[us]", not working on Python3.8 pandas 2.0.8 + numpy 1.24.4
TimeType: object,
VariantType: object,
MapType: object,
Expand Down
14 changes: 10 additions & 4 deletions tests/integ/scala/test_column_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,15 @@ def test_gt_and_lt(session):
assert test_data1.where(test_data1["NUM"] < 2).collect() == [Row(1, True, "a")]
assert test_data1.where(test_data1["NUM"] < lit(2)).collect() == [Row(1, True, "a")]

test_data_datetime = TestData.datetime_primitives2(session)
res = datetime.datetime(2000, 5, 6, 0, 0, 0)
assert test_data_datetime.where(
test_data_datetime["timestamp"] > res
).collect() == [Row(datetime.datetime(9999, 12, 31, 0, 0, 0, 123456))]
assert test_data_datetime.where(
test_data_datetime["timestamp"] < res
).collect() == [Row(datetime.datetime(1583, 1, 1, 23, 59, 59, 567890))]


@pytest.mark.localtest
def test_leq_and_geq(session):
Expand Down Expand Up @@ -754,11 +763,8 @@ def test_in_expression_2_in_with_subquery(session):
Utils.check_answer(df4, [Row(False), Row(True), Row(True)])


@pytest.mark.localtest
def test_in_expression_3_with_all_types(session, local_testing_mode):
# TODO: local testing support to_timestamp_ntz
# stored proc by default uses timestime type according to:
# https://docs.snowflake.com/en/sql-reference/parameters#timestamp-type-mapping
# we keep the test here for future reference
schema = StructType(
[
StructField("id", LongType()),
Expand Down
13 changes: 13 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,19 @@ def datetime_primitives1(cls, session: "Session") -> DataFrame:
)
return session.create_dataframe(data, schema)

@classmethod
def datetime_primitives2(cls, session: "Session") -> DataFrame:
data = [
"9999-12-31 00:00:00.123456",
"1583-01-01 23:59:59.56789",
]
schema = StructType(
[
StructField("timestamp", TimestampType(TimestampTimeZone.NTZ)),
]
)
return session.create_dataframe(data, schema)

@classmethod
def time_primitives1(cls, session: "Session") -> DataFrame:
# simple string data
Expand Down

0 comments on commit 1eb7a17

Please sign in to comment.