Skip to content

Commit

Permalink
SNOW-1552804, SNOW-1552806: Add support for Series.dt.day_name/month_…
Browse files Browse the repository at this point in the history
…name (#1975)
  • Loading branch information
sfc-gh-helmeleegy authored Jul 27, 2024
1 parent c3d30cd commit 7c9df84
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 12 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
- Added support for `Index.is_unique` and `Index.has_duplicates`.
- Added support for `Index.equals`.
- Added support for `Index.value_counts`.
- Added support for `Series.dt.day_name` and `Series.dt.month_name`.

#### Improvements
- Removed the public preview warning message upon importing Snowpark pandas.
Expand Down
2 changes: 2 additions & 0 deletions docs/source/modin/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ Series
Series.dt.day_of_year
Series.dt.quarter
Series.dt.isocalendar
Series.dt.month_name
Series.dt.day_name


.. rubric:: String accessor methods
Expand Down
4 changes: 2 additions & 2 deletions docs/source/modin/supported/series_dt_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ the method in the left column.
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``ceil`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``month_name`` | N | |
| ``month_name`` | P | ``N`` if `locale` is set. |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``day_name`` | N | |
| ``day_name`` | P | ``N`` if `locale` is set. |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``total_seconds`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#
# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
#
import calendar
import functools
import itertools
import json
Expand Down Expand Up @@ -15366,28 +15367,91 @@ def dt_normalize(self) -> None:
"Snowpark pandas doesn't yet support the method 'Series.dt.normalize'"
)

def dt_month_name(self, locale: Optional[str] = None) -> None:
def dt_month_name(self, locale: Optional[str] = None) -> "SnowflakeQueryCompiler":
"""
Args:
locale: Locale determining the language in which to return the month name.

Returns:
New QueryCompiler containing month name.
"""
ErrorMessage.not_implemented(
"Snowpark pandas doesn't yet support the method 'Series.dt.month_name'"
if locale is not None:
ErrorMessage.not_implemented(
"Snowpark pandas 'Series.dt.month_name' method doesn't yet support 'locale' parameter"
)
internal_frame = self._modin_frame

# The following generates a mapping list of the form:
# [1, "January", 2, "February", ..., 12, "December"]
mapping_list = [
int(i / 2) if i % 2 == 0 else calendar.month_name[int(i / 2)]
for i in range(2, 26)
]
snowpark_column = builtin("decode")(
builtin("extract")(
"month", col(internal_frame.data_column_snowflake_quoted_identifiers[0])
),
*mapping_list,
)
internal_frame = internal_frame.append_column(
internal_frame.data_column_snowflake_quoted_identifiers[0], snowpark_column
)

def dt_day_name(self, locale: Optional[str] = None) -> None:
return SnowflakeQueryCompiler(
InternalFrame.create(
ordered_dataframe=internal_frame.ordered_dataframe,
data_column_pandas_labels=[None],
data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
data_column_snowflake_quoted_identifiers=internal_frame.data_column_snowflake_quoted_identifiers[
-1:
],
index_column_pandas_labels=internal_frame.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=internal_frame.index_column_snowflake_quoted_identifiers,
)
)

def dt_day_name(self, locale: Optional[str] = None) -> "SnowflakeQueryCompiler":
"""
Args:
locale: Locale determining the language in which to return the month name.

Returns:
New QueryCompiler containing day name.
"""
ErrorMessage.not_implemented(
"Snowpark pandas doesn't yet support the method 'Series.dt.day_name'"
if locale is not None:
ErrorMessage.not_implemented(
"Snowpark pandas 'Series.dt.day_name' method doesn't yet support 'locale' parameter"
)
internal_frame = self._modin_frame

# The following generates a mapping list of the form:
# [1, "Monday", 2, "Tuesday", ..., 7, "Sunday"]
mapping_list = [
int(i / 2) + 1 if i % 2 == 0 else calendar.day_name[int(i / 2)]
for i in range(0, 14)
]
snowpark_column = builtin("decode")(
builtin("dayofweekiso")(
col(internal_frame.data_column_snowflake_quoted_identifiers[0])
),
*mapping_list,
)

internal_frame = internal_frame.append_column(
internal_frame.data_column_pandas_labels[0], snowpark_column
)

return SnowflakeQueryCompiler(
InternalFrame.create(
ordered_dataframe=internal_frame.ordered_dataframe,
data_column_pandas_labels=[None],
data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
data_column_snowflake_quoted_identifiers=internal_frame.data_column_snowflake_quoted_identifiers[
-1:
],
index_column_pandas_labels=internal_frame.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=internal_frame.index_column_snowflake_quoted_identifiers,
)
)

def dt_total_seconds(self) -> None:
Expand Down
88 changes: 86 additions & 2 deletions src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1517,10 +1517,94 @@ def ceil():
pass

def month_name():
pass
"""
Return the month names with specified locale.
Parameters
----------
locale : str, optional
Locale determining the language in which to return the month name. Default is English locale ('en_US.utf8'). Use the command locale -a on your terminal on Unix systems to find your locale language code.
Returns
-------
Series or Index
Series or Index of month names.
Examples
--------
>>> s = pd.Series(pd.date_range(start='2018-01', freq='ME', periods=3))
>>> s
0 2018-01-31
1 2018-02-28
2 2018-03-31
dtype: datetime64[ns]
>>> s.dt.month_name()
0 January
1 February
2 March
dtype: object
>>> idx = pd.date_range(start='2018-01', freq='ME', periods=3)
>>> idx # doctest: +SKIP
DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],
dtype='datetime64[ns]', freq='ME')
>>> idx.month_name() # doctest: +SKIP
Index(['January', 'February', 'March'], dtype='object')
Using the locale parameter you can set a different locale language, for example: idx.month_name(locale='pt_BR.utf8') will return month names in Brazilian Portuguese language.
>>> idx = pd.date_range(start='2018-01', freq='ME', periods=3)
>>> idx # doctest: +SKIP
DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],
dtype='datetime64[ns]', freq='ME')
>>> idx.month_name(locale='pt_BR.utf8') # doctest: +SKIP
Index(['Janeiro', 'Fevereiro', 'Março'], dtype='object')
"""

def day_name():
pass
"""
Return the day names with specified locale.
Parameters
----------
locale : str, optional
Locale determining the language in which to return the day name. Default is English locale ('en_US.utf8'). Use the command locale -a on your terminal on Unix systems to find your locale language code.
Returns
-------
Series or Index
Series or Index of day names.
Examples
--------
>>> s = pd.Series(pd.date_range(start='2018-01-01', freq='D', periods=3))
>>> s
0 2018-01-01
1 2018-01-02
2 2018-01-03
dtype: datetime64[ns]
>>> s.dt.day_name()
0 Monday
1 Tuesday
2 Wednesday
dtype: object
>>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)
>>> idx # doctest: +SKIP
DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
dtype='datetime64[ns]', freq='D')
>>> idx.day_name() # doctest: +SKIP
Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object')
Using the locale parameter you can set a different locale language, for example: idx.day_name(locale='pt_BR.utf8') will return day names in Brazilian Portuguese language.
>>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)
>>> idx # doctest: +SKIP
DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
dtype='datetime64[ns]', freq='D')
>>> idx.day_name(locale='pt_BR.utf8') # doctest: +SKIP
Index(['Segunda', 'Terça', 'Quarta'], dtype='object')
"""

def total_seconds():
pass
Expand Down
23 changes: 23 additions & 0 deletions tests/integ/modin/series/test_dt_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,29 @@ def test_day_of_week(property, day_of_week_or_year_data, set_week_start):
)


@sql_count_checker(query_count=1)
@pytest.mark.parametrize("method", ["day_name", "month_name"])
def test_day_month_name(method):
date_range = native_pd.date_range("2020-05-01", periods=5, freq="17D")
native_ser = native_pd.Series(date_range)
snow_ser = pd.Series(native_ser)
eval_snowpark_pandas_result(
snow_ser,
native_ser,
lambda s: getattr(s.dt, method)(),
)


@sql_count_checker(query_count=0)
@pytest.mark.parametrize("method", ["day_name", "month_name"])
def test_day_month_name_negative(method):
date_range = native_pd.date_range("2020-05-01", periods=5, freq="17D")
native_ser = native_pd.Series(date_range)
snow_ser = pd.Series(native_ser)
with pytest.raises(NotImplementedError):
getattr(snow_ser.dt, method)(locale="pt_BR.utf8")


@dt_properties
@sql_count_checker(query_count=1)
def test_dt_property_with_tz(property_name):
Expand Down
2 changes: 0 additions & 2 deletions tests/unit/modin/test_series_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@ def mock_query_compiler_for_dt_series() -> SnowflakeQueryCompiler:
(lambda s: s.dt.round(freq="1D"), "round"),
(lambda s: s.dt.floor(freq="1D"), "floor"),
(lambda s: s.dt.ceil(freq="1D"), "ceil"),
(lambda s: s.dt.month_name(), "month_name"),
(lambda s: s.dt.day_name(), "day_name"),
(lambda s: s.dt.total_seconds(), "total_seconds"),
(lambda s: s.dt.seconds, "seconds"),
(lambda s: s.dt.days, "days"),
Expand Down

0 comments on commit 7c9df84

Please sign in to comment.