Skip to content

Commit

Permalink
perf: Make date_range / datetime_range ~10x faster for constant durat…
Browse files Browse the repository at this point in the history
…ions (#19216)
  • Loading branch information
MarcoGorelli authored Oct 14, 2024
1 parent e29e9df commit 1536bb0
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 17 deletions.
45 changes: 28 additions & 17 deletions crates/polars-time/src/date_range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,25 +111,36 @@ pub(crate) fn datetime_range_i64(
ComputeError: "`interval` must be positive"
);

let size: usize;
let offset_fn: fn(&Duration, i64, Option<&Tz>) -> PolarsResult<i64>;

match tu {
TimeUnit::Nanoseconds => {
size = ((end - start) / interval.duration_ns() + 1) as usize;
offset_fn = Duration::add_ns;
},
TimeUnit::Microseconds => {
size = ((end - start) / interval.duration_us() + 1) as usize;
offset_fn = Duration::add_us;
},
TimeUnit::Milliseconds => {
size = ((end - start) / interval.duration_ms() + 1) as usize;
offset_fn = Duration::add_ms;
},
let duration = match tu {
TimeUnit::Nanoseconds => interval.duration_ns(),
TimeUnit::Microseconds => interval.duration_us(),
TimeUnit::Milliseconds => interval.duration_ms(),
};
let time_zone_opt_string: Option<String> = match tz {
#[cfg(feature = "timezones")]
Some(tz) => Some(tz.to_string()),
_ => None,
};
if interval.is_constant_duration(time_zone_opt_string.as_deref()) {
// Fast path!
let step: usize = duration.try_into().map_err(
|_err| polars_err!(ComputeError: "Could not convert {:?} to usize", duration),
)?;
return match closed {
ClosedWindow::Both => Ok((start..=end).step_by(step).collect::<Vec<i64>>()),
ClosedWindow::None => Ok((start + duration..end).step_by(step).collect::<Vec<i64>>()),
ClosedWindow::Left => Ok((start..end).step_by(step).collect::<Vec<i64>>()),
ClosedWindow::Right => Ok((start + duration..=end).step_by(step).collect::<Vec<i64>>()),
};
}
let mut ts = Vec::with_capacity(size);

let size = ((end - start) / duration + 1) as usize;
let offset_fn = match tu {
TimeUnit::Nanoseconds => Duration::add_ns,
TimeUnit::Microseconds => Duration::add_us,
TimeUnit::Milliseconds => Duration::add_ms,
};
let mut ts = Vec::with_capacity(size);
let mut i = match closed {
ClosedWindow::Both | ClosedWindow::Left => 0,
ClosedWindow::Right | ClosedWindow::None => 1,
Expand Down
41 changes: 41 additions & 0 deletions py-polars/tests/unit/functions/range/test_datetime_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
from datetime import date, datetime, timedelta
from typing import TYPE_CHECKING

import hypothesis.strategies as st
import pytest
from hypothesis import given, settings

import polars as pl
from polars.datatypes import DTYPE_TEMPORAL_UNITS
Expand Down Expand Up @@ -579,3 +581,42 @@ def test_datetime_range_specifying_ambiguous_11713() -> None:
"datetime", [datetime(2023, 10, 29, 2), datetime(2023, 10, 29, 3)]
).dt.replace_time_zone("Europe/Madrid", ambiguous=pl.Series(["latest", "raise"]))
assert_series_equal(result, expected)


@given(
closed=st.sampled_from(["none", "left", "right", "both"]),
time_unit=st.sampled_from(["ms", "us", "ns"]),
n=st.integers(1, 10),
size=st.integers(8, 10),
unit=st.sampled_from(["s", "m", "h", "d", "mo"]),
start=st.datetimes(datetime(1965, 1, 1), datetime(2100, 1, 1)),
)
@settings(max_examples=50)
@pytest.mark.benchmark
def test_datetime_range_fast_slow_paths(
closed: ClosedInterval,
time_unit: TimeUnit,
n: int,
size: int,
unit: str,
start: datetime,
) -> None:
end = pl.select(pl.lit(start).dt.offset_by(f"{n*size}{unit}")).item()
result_slow = pl.datetime_range(
start,
end,
closed=closed,
time_unit=time_unit,
interval=f"{n}{unit}",
time_zone="Asia/Kathmandu",
eager=True,
).dt.replace_time_zone(None)
result_fast = pl.datetime_range(
start,
end,
closed=closed,
time_unit=time_unit,
interval=f"{n}{unit}",
eager=True,
)
assert_series_equal(result_slow, result_fast)

0 comments on commit 1536bb0

Please sign in to comment.