diff --git a/crates/polars-plan/src/plans/hive.rs b/crates/polars-plan/src/plans/hive.rs index a711aeb11848..d99054cb405c 100644 --- a/crates/polars-plan/src/plans/hive.rs +++ b/crates/polars-plan/src/plans/hive.rs @@ -231,21 +231,18 @@ pub fn hive_partitions_from_paths( } /// Determine the path separator for identifying Hive partitions. -#[cfg(target_os = "windows")] -fn separator(url: &Path) -> char { - if polars_io::path_utils::is_cloud_url(url) { - '/' +fn separator(url: &Path) -> &[char] { + if cfg!(target_family = "windows") { + if polars_io::path_utils::is_cloud_url(url) { + &['/'] + } else { + &['/', '\\'] + } } else { - '\\' + &['/'] } } -/// Determine the path separator for identifying Hive partitions. -#[cfg(not(target_os = "windows"))] -fn separator(_url: &Path) -> char { - '/' -} - /// Parse a Hive partition string (e.g. "column=1.5") into a name and value part. /// /// Returns `None` if the string is not a Hive partition string. diff --git a/py-polars/tests/unit/io/test_hive.py b/py-polars/tests/unit/io/test_hive.py index a01a2ef6e59d..9e9213ac9bd4 100644 --- a/py-polars/tests/unit/io/test_hive.py +++ b/py-polars/tests/unit/io/test_hive.py @@ -779,3 +779,45 @@ def test_hive_predicate_dates_14712( ) pl.scan_parquet(tmp_path).filter(pl.col("a") != datetime(2024, 1, 1)).collect() assert "hive partitioning: skipped 1 files" in capfd.readouterr().err + + +@pytest.mark.skipif(sys.platform != "win32", reason="Test is only for Windows paths") +@pytest.mark.write_disk +def test_hive_windows_splits_on_forward_slashes(tmp_path: Path) -> None: + # Note: This needs to be an absolute path. + tmp_path = tmp_path.resolve() + path = f"{tmp_path}/a=1/b=1/c=1/d=1/e=1" + Path(path).mkdir(exist_ok=True, parents=True) + + df = pl.DataFrame({"x": "x"}) + df.write_parquet(f"{path}/data.parquet") + + expect = pl.DataFrame( + [ + s.new_from_index(0, 5) + for s in pl.DataFrame( + { + "x": "x", + "a": 1, + "b": 1, + "c": 1, + "d": 1, + "e": 1, + } + ) + ] + ) + + assert_frame_equal( + pl.scan_parquet( + [ + f"{tmp_path}/a=1/b=1/c=1/d=1/e=1/data.parquet", + f"{tmp_path}\\a=1\\b=1\\c=1\\d=1\\e=1\\data.parquet", + f"{tmp_path}\\a=1/b=1/c=1/d=1/**/*", + f"{tmp_path}/a=1/b=1\\c=1/d=1/**/*", + f"{tmp_path}/a=1/b=1/c=1/d=1\\e=1/*", + ], + hive_partitioning=True, + ).collect(), + expect, + )