Skip to content

Commit

Permalink
SNOW-1316977: Remove fallbacks and raise not implemented error (#1435)
Browse files Browse the repository at this point in the history
Please answer these questions before submitting your pull requests.
Thanks!

1. What GitHub issue is this PR addressing? Make sure that there is an
accompanying issue to your PR.

   Fixes SNOW-1316977

2. Fill out the following pre-review checklist:

- [x] I am adding a new automated test(s) to verify correctness of my
new code
   - [ ] I am adding new logging messages
   - [ ] I am adding a new telemetry message
   - [ ] I am adding new credentials
   - [ ] I am adding a new dependency

3. Please describe how your code solves the related issue.

Remove fallback for all dataframe and series APIs. This list is
primarily driven from
https://docs.google.com/document/d/1uMwNgLqFhtoAFeEj59XjR3uKQ77QjA84aTmv8Erbvw0/edit#heading=h.qhdbmdhgqdh5.
There still might be more fallback which are not documented. This will
be cleaned up in
https://snowflakecomputing.atlassian.net/browse/SNOW-1347394
NOTE: I have kept the existing tests mostly unmodified even though there
is redundancy. This can be useful when we implement these APIs.
NOTE: This also require updates in docs/*supported.rst files. These
files are not yet ported from snowpandas. So will do a follow up PR to
update the docs.
  • Loading branch information
sfc-gh-nkumar authored Apr 30, 2024
1 parent c470057 commit 1309d80
Show file tree
Hide file tree
Showing 28 changed files with 561 additions and 1,156 deletions.
4 changes: 3 additions & 1 deletion src/snowflake/snowpark/modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,7 +802,9 @@ def dot(self, other): # noqa: PR01, RT01, D200
Compute the matrix multiplication between the ``DataFrame`` and `other`.
"""
# TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions
ErrorMessage.not_implemented() # pragma: no cover
ErrorMessage.not_implemented(
"Snowpark pandas doesn't yet support 'dot' binary operation"
)

if isinstance(other, BasePandasDataset):
common = self.columns.union(other.index)
Expand Down
4 changes: 3 additions & 1 deletion src/snowflake/snowpark/modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,7 +998,9 @@ def dot(self, other): # noqa: PR01, RT01, D200
Compute the dot product between the Series and the columns of `other`.
"""
# TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
ErrorMessage.not_implemented() # pragma: no cover
ErrorMessage.not_implemented(
"Snowpark pandas doesn't yet support 'dot' binary operation"
)

if isinstance(other, BasePandasDataset):
common = self.index.union(other.index)
Expand Down
22 changes: 22 additions & 0 deletions src/snowflake/snowpark/modin/plugin/PANDAS_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,28 @@
- Fixed DataFrame's `__getitem__` with boolean DataFrame key.
- Fixed incorrect regex used in `DataFrame/Series.replace`.

### Behavior Changes
- Raise not implemented error instead of fallback to pandas in following APIs:
- `pd.merge`, `DataFrame.merge` and `DataFrame.join` if given the `validate` parameter.
- `pd.to_numeric` if `error == 'ignore'`.
- `pd.to_datetime` if `format` is None or not supported in Snowflake or if `exact`, `infer_datetime_format` parameters are given or `origin == 'julian'` or `error == 'ignore'`.
- `DataFrame/Series.all` if called on non-integer/boolean columns.
- `DataFrame/Series.any` if called on non-integer/boolean columns.
- `DataFrame/Series.astype` if casting from string to datetime or `errors == 'ignore'`.
- `DataFrame/Series.dropna` if `axis == 1`
- `DataFrame/Series.mask` if given `axis` or `level` parameters.
- `DataFrame/Series.rename` if `mapper` is callable or the DataFrame/Series has MultiIndex.
- `DataFrame/Series.sort_values` if given the `key` parameter.
- `DataFrame/Series.sort_index` if given the `key` parameter.
- `DataFrame.nunique` if `axis == 1`
- `DataFrame.apply` if `axis == 0` or `func` is not callable or `result_type` is given or `args` and `kwargs` contain DataFrame or Series.
- `Series.apply` if `axis == 0` or `func` is not callable or `result_type` is given.
- `Series.applymap` if `na_action == 'igonre'`.
- `DataFrame/Series.ffill` if given the `limit` or `downcast` parameter.
- `DataFrame/Series.fillna` if given the `limit` or `downcast` parameter.
- `dot` binary operation between `DataFrame/Series`.
- `xor` binary operation between `DataFrame/Series`.

## 1.14.0a2 (2024-04-18)

### Behavior Changes
Expand Down
39 changes: 22 additions & 17 deletions src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
to_decimal,
)
from snowflake.snowpark.modin.plugin._internal.utils import pandas_lit
from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage
from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
from snowflake.snowpark.types import (
BooleanType,
Expand Down Expand Up @@ -304,44 +305,48 @@ def generate_timestamp_col(
return new_col


def to_datetime_require_fallback(
def raise_if_to_datetime_not_supported(
format: str,
exact: Union[bool, lib.NoDefault] = lib.no_default,
infer_datetime_format: Union[lib.NoDefault, bool] = lib.no_default,
origin: DateTimeOrigin = "unix",
errors: DateTimeErrorChoices = "raise",
) -> bool:
) -> None:
"""
check whether to_datetime requires fallback
Raise not implemented error to_datetime API has any unsupported parameter or
parameter value
Args:
format: the format argument for to_datetime
exact: the exact argument for to_datetime
infer_datetime_format: the infer_datetime_format argument for to_datetime
origin: the origin argument for to_datetime
errors: the errors argument for to_datetime
Returns:
True if fallback is required; otherwise False
"""
error_message = None
if format is not None and not is_snowflake_timestamp_format_valid(
to_snowflake_timestamp_format(format)
):
# if format is not given, Snowflake's auto format detection may be different from pandas behavior
return True

if not exact:
error_message = (
f"Snowpark pandas to_datetime API doesn't yet support given format {format}"
)
elif not exact:
# Snowflake does not allow the format to match anywhere in the target string when exact is False
return True
if infer_datetime_format != lib.no_default:
error_message = "Snowpark pandas to_datetime API doesn't yet support non exact format matching"
elif infer_datetime_format != lib.no_default:
# infer_datetime_format is deprecated since version 2.0.0
return True
if origin == "julian":
error_message = "Snowpark pandas to_datetime API doesn't support 'infer_datetime_format' parameter"
elif origin == "julian":
# default for julian calendar support
return True
if errors == "ignore":
error_message = (
"Snowpark pandas to_datetime API doesn't yet support julian calendar"
)
elif errors == "ignore":
# ignore requires return the whole original input which is not applicable in Snowfalke
return True
return False
error_message = "Snowpark pandas to_datetime API doesn't yet support 'ignore' value for errors parameter"

if error_message:
ErrorMessage.not_implemented(error_message)


def convert_dateoffset_to_interval(
Expand Down
34 changes: 0 additions & 34 deletions src/snowflake/snowpark/modin/plugin/compiler/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1476,33 +1476,6 @@ def melt(self, *args, **kwargs): # noqa: PR02
"""
return DataFrameDefault.register(pandas.DataFrame.melt)(self, *args, **kwargs)

@doc_utils.add_refer_to("DataFrame.sort_values")
def sort_columns_by_row_values(self, rows, ascending=True, **kwargs): # noqa: PR02
"""
Reorder the columns based on the lexicographic order of the given rows.
Parameters
----------
rows : label or list of labels
The row or rows to sort by.
ascending : bool, default: True
Sort in ascending order (True) or descending order (False).
kind : {"quicksort", "mergesort", "heapsort"}
na_position : {"first", "last"}
ignore_index : bool
key : callable(pandas.Index) -> pandas.Index, optional
**kwargs : dict
Serves the compatibility purpose. Does not affect the result.
Returns
-------
BaseQueryCompiler
New QueryCompiler that contains result of the sort.
"""
return DataFrameDefault.register(pandas.DataFrame.sort_values)(
self, by=rows, axis=1, ascending=ascending, **kwargs
)

# END Abstract map across rows/columns

# Map across rows/columns
Expand Down Expand Up @@ -3610,13 +3583,6 @@ def str_cat(self, others, sep=None, na_rep=None, join="left"):
self, others, sep, na_rep, join
)

@doc_utils.doc_str_method(
refer_to="casefold",
params="",
)
def str_casefold(self):
return StrDefault.register(pandas.Series.str.casefold)(self)

@doc_utils.doc_str_method(refer_to="__getitem__", params="key : object")
def str___getitem__(self, key):
return StrDefault.register(pandas.Series.str.__getitem__)(self, key)
Expand Down
Loading

0 comments on commit 1309d80

Please sign in to comment.