Skip to content

Commit

Permalink
fix: single-element .dt.time() and .dt.date() should always preserve …
Browse files Browse the repository at this point in the history
…sortedness (#13808)
  • Loading branch information
MarcoGorelli authored Jan 23, 2024
1 parent 168985a commit 9cf6af0
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 22 deletions.
4 changes: 0 additions & 4 deletions crates/polars-core/src/chunked_array/ops/chunkops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,6 @@ impl<T: PolarsDataType> ChunkedArray<T> {
.iter()
.map(|arr| arr.null_count())
.sum::<usize>() as IdxSize;

if self.length <= 1 {
self.set_sorted_flag(IsSorted::Ascending)
}
}

pub fn rechunk(&self) -> Self {
Expand Down
3 changes: 3 additions & 0 deletions crates/polars-core/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,9 @@ impl Series {
}

pub fn is_sorted_flag(&self) -> IsSorted {
if self.len() <= 1 {
return IsSorted::Ascending;
}
let flags = self.get_flags();
if flags.contains(Settings::SORTED_DSC) {
IsSorted::Descending
Expand Down
43 changes: 26 additions & 17 deletions py-polars/tests/unit/namespaces/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,20 +135,38 @@ def test_dt_datetime_deprecated() -> None:
("UTC", True),
],
)
def test_local_datetime_sortedness(time_zone: str | None, expected: bool) -> None:
def test_local_date_sortedness(time_zone: str | None, expected: bool) -> None:
# singleton - always sorted
ser = (pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone(time_zone)).sort()
result = ser.dt.date()
assert result.flags["SORTED_ASC"]
assert result.flags["SORTED_DESC"] is False

# 2 elements - depends on time zone
ser = (
pl.Series([datetime(2022, 1, 1, 23)] * 2).dt.replace_time_zone(time_zone)
).sort()
result = ser.dt.date()
assert result.flags["SORTED_ASC"] == expected
assert result.flags["SORTED_DESC"] is False


@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu", "UTC"])
def test_local_time_sortedness(time_zone: str | None) -> None:
# singleton - always sorted
ser = (pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone(time_zone)).sort()
result = ser.dt.time()
assert result.flags["SORTED_ASC"]
assert not result.flags["SORTED_DESC"]

# two elements - not sorted
ser = (
pl.Series([datetime(2022, 1, 1, 23)] * 2).dt.replace_time_zone(time_zone)
).sort()
result = ser.dt.time()
assert not result.flags["SORTED_ASC"]
assert not result.flags["SORTED_DESC"]


@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
def test_local_time_before_epoch(time_unit: TimeUnit) -> None:
Expand Down Expand Up @@ -857,22 +875,13 @@ def test_offset_by_expressions() -> None:
f=pl.col("a").dt.date().dt.offset_by(pl.col("b")),
)
assert_frame_equal(result, expected[i : i + 1])
if df_slice["b"].item() is None:
# Offset is None, so result will be all-None, so sortedness isn't preserved.
assert result.flags == {
"c": {"SORTED_ASC": False, "SORTED_DESC": False},
"d": {"SORTED_ASC": False, "SORTED_DESC": False},
"e": {"SORTED_ASC": False, "SORTED_DESC": False},
"f": {"SORTED_ASC": False, "SORTED_DESC": False},
}
else:
# For tz-aware, sortedness is not preserved.
assert result.flags == {
"c": {"SORTED_ASC": True, "SORTED_DESC": False},
"d": {"SORTED_ASC": True, "SORTED_DESC": False},
"e": {"SORTED_ASC": False, "SORTED_DESC": False},
"f": {"SORTED_ASC": True, "SORTED_DESC": False},
}
# single-row Series are always sorted
assert result.flags == {
"c": {"SORTED_ASC": True, "SORTED_DESC": False},
"d": {"SORTED_ASC": True, "SORTED_DESC": False},
"e": {"SORTED_ASC": True, "SORTED_DESC": False},
"f": {"SORTED_ASC": True, "SORTED_DESC": False},
}


@pytest.mark.parametrize(
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/operations/test_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ def test_sorted_flag_singletons(value: Any) -> None:


def test_sorted_flag_null() -> None:
assert pl.DataFrame({"x": [None]})["x"].flags["SORTED_ASC"] is False
assert pl.DataFrame({"x": [None] * 2})["x"].flags["SORTED_ASC"] is False


def test_sorted_update_flags_10327() -> None:
Expand Down

0 comments on commit 9cf6af0

Please sign in to comment.