Skip to content

Commit

Permalink
fix: Don't produce duplicate column names in Series.to_dummies (#19326)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli authored Oct 20, 2024
1 parent 463ed66 commit 94b7e89
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
3 changes: 2 additions & 1 deletion crates/polars-ops/src/series/ops/to_dummies.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ impl ToDummies for Series {
})
.collect::<Vec<_>>();

Ok(unsafe { DataFrame::new_no_checks_height_from_first(sort_columns(columns)) })
// SAFETY: `dummies_helper` functions preserve `self.len()` length
unsafe { DataFrame::new_no_length_checks(sort_columns(columns)) }
}
}

Expand Down
8 changes: 8 additions & 0 deletions py-polars/tests/unit/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
Unknown,
)
from polars.exceptions import (
DuplicateError,
InvalidOperationError,
PolarsInefficientMapWarning,
ShapeError,
Expand Down Expand Up @@ -1356,6 +1357,13 @@ def test_to_dummies_drop_first() -> None:
assert_frame_equal(result, expected)


def test_to_dummies_null_clash_19096() -> None:
with pytest.raises(
DuplicateError, match="column with name '_null' has more than one occurrence"
):
pl.Series([None, "null"]).to_dummies()


def test_chunk_lengths() -> None:
s = pl.Series("a", [1, 2, 2, 3])
# this is a Series with one chunk, of length 4
Expand Down

0 comments on commit 94b7e89

Please sign in to comment.