Skip to content

Commit

Permalink
fix: Fix perfect groupby (#19461)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Oct 26, 2024
1 parent d616866 commit ce001f1
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 0 deletions.
5 changes: 5 additions & 0 deletions crates/polars-core/src/frame/group_by/perfect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ where
per_thread_offsets.push(std::cmp::min(cache_aligned_offset, len));
}
}
*per_thread_offsets.last_mut().unwrap() = len;

let groups_ptr = unsafe { SyncPtr::new(groups.as_mut_ptr()) };
let first_ptr = unsafe { SyncPtr::new(first.as_mut_ptr()) };
Expand All @@ -75,6 +76,10 @@ where
let end = per_thread_offsets[thread_no + 1];
let end = T::Native::from_usize(end).unwrap();

if start == end {
return;
}

let push_to_group = |cat, row_nr| unsafe {
debug_assert!(cat < len);
let buf = &mut *groups.add(cat);
Expand Down
12 changes: 12 additions & 0 deletions py-polars/tests/unit/datatypes/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,3 +864,15 @@ def test_nested_categorical_concat(

with pytest.raises(pl.exceptions.StringCacheMismatchError):
pl.concat([a, b])


def test_perfect_group_by_19452() -> None:
n = 40
df2 = pl.DataFrame(
{
"a": pl.int_range(n, eager=True).cast(pl.String).cast(pl.Categorical),
"b": pl.int_range(n, eager=True),
}
)

assert df2.with_columns(a=(pl.col("b")).over(pl.col("a")))["a"].is_sorted()

0 comments on commit ce001f1

Please sign in to comment.