Skip to content

Commit

Permalink
Merge branch 'main' into jrose_snow_1063716_local_testing_get_support
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-jrose authored Apr 29, 2024
2 parents 75090bb + ce2008e commit c420f75
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 3 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
- concat
- concat_ws

#### Bug Fixes

- Fixed a bug that caused NaT and NaN values to not be recognized.

## 1.15.0 (2024-04-24)

### New Features
Expand Down
3 changes: 3 additions & 0 deletions scripts/jenkins_regress_snowpandas.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,8 @@ set -euxo pipefail
# decrypt profile
gpg --quiet --batch --yes --decrypt --passphrase="$GPG_KEY" --output "tests/parameters.py" $@

# Install tox, which is by default not present in the environment.
python -m pip install tox

# Run snowpandas tests
python -m tox -c $WORKING_DIR -e snowparkpandasjenkins-modin
5 changes: 4 additions & 1 deletion scripts/tox_install_cmd.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env sh
#!/usr/bin/env bash

set -e

Expand All @@ -13,6 +13,9 @@ done

echo "${pip_options[*]}"

# Default to empty, to ensure snowflake_path variable is defined.
snowflake_path=${snowflake_path:-""}

if [[ -z "${snowflake_path}" ]]; then
echo "Using Python Connector from PyPI"
python -m pip install -U ${pip_options[@]}
Expand Down
9 changes: 9 additions & 0 deletions src/snowflake/snowpark/_internal/type_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,15 @@ def convert_sp_to_sf_type(datatype: DataType) -> str:
datetime.time: TimeType,
bytes: BinaryType,
}
if installed_pandas:
import numpy

PYTHON_TO_SNOW_TYPE_MAPPINGS.update(
{
type(pandas.NaT): TimestampType,
numpy.float64: DecimalType,
}
)


VALID_PYTHON_TYPES_FOR_LITERAL_VALUE = (
Expand Down
4 changes: 3 additions & 1 deletion src/snowflake/snowpark/modin/config/envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,7 +884,9 @@ def _check_vars() -> None: # pragma: no cover
and issubclass(obj, EnvironmentVariable)
and not obj.is_abstract
}
valid_names.update(["MODIN_PYTEST_CMD", "MODIN_PYTEST_DAILY_CMD"])
valid_names.update(
["MODIN_PYTEST_CMD", "MODIN_PYTEST_DAILY_CMD", "MODIN_PYTEST_NO_COV_CMD"]
)
found_names = {name for name in os.environ if name.startswith("MODIN_")}
unknown = found_names - valid_names
deprecated: dict[str, DeprecationDescriptor] = {
Expand Down
1 change: 1 addition & 0 deletions src/snowflake/snowpark/modin/plugin/PANDAS_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
- Fixed overriding of subclasses' property docstrings for modin issue https://github.com/modin-project/modin/issues/7113.
- Fixed `@udf` decorator when `packages=None` are specified preventing error in `Series.apply`.
- Fixed incorrect regex used in `Series.str.contains`.
- Fixed DataFrame's `__getitem__` with boolean DataFrame key.

## 1.14.0a2 (2024-04-18)

Expand Down
31 changes: 30 additions & 1 deletion tests/mock_unit/test_create_df_from_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,15 @@
import pytz

from snowflake.snowpark import Row, Table
from snowflake.snowpark.types import BooleanType, DoubleType, LongType, StringType
from snowflake.snowpark.types import (
BooleanType,
DoubleType,
LongType,
StringType,
StructField,
StructType,
TimestampType,
)

try:
import pandas as pd
Expand Down Expand Up @@ -344,3 +352,24 @@ def test_na_and_null_data(session):
)
sp_df = session.create_dataframe(data=pandas_df)
assert sp_df.select("A").collect() == [Row("abc"), Row(None), Row("a"), Row("")]


@pytest.mark.localtest
def test_datetime_nat_nan(session):
df = pd.DataFrame(
{
"date": pd.to_datetime(
[None, "2020-01-13", "2020-02-01", "2020-02-23", "2020-03-05"], utc=True
),
"num": [None, 1.0, 2.0, 3.0, 4.0],
}
)

expected_schema = StructType(
[
StructField('"date"', TimestampType(), nullable=True),
StructField('"num"', DoubleType(), nullable=True),
]
)
sf_df = session.create_dataframe(data=df)
assert sf_df.schema == expected_schema
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ setenv =
SNOW_1314507_WORKAROUND_RERUN_FLAGS = --reruns 2 --only-rerun "Insufficient resource during interleaved execution."
MODIN_PYTEST_CMD = pytest {env:SNOWFLAKE_PYTEST_VERBOSITY:} {env:SNOWFLAKE_PYTEST_PARALLELISM:} {env:SNOWFLAKE_PYTEST_COV_CMD} --ignore=tests/resources
MODIN_PYTEST_DAILY_CMD = pytest {env:SNOWFLAKE_PYTEST_VERBOSITY:} {env:SNOWFLAKE_PYTEST_DAILY_PARALLELISM:} {env:SNOWFLAKE_PYTEST_COV_CMD} --ignore=tests/resources
MODIN_PYTEST_NO_COV_CMD = pytest {env:SNOWFLAKE_PYTEST_VERBOSITY:} {env:SNOWFLAKE_PYTEST_PARALLELISM:} --ignore=tests/resources

passenv =
AWS_ACCESS_KEY_ID
Expand Down

0 comments on commit c420f75

Please sign in to comment.