From 6931d3f67487dcf9f94b3cbd5bf6502818a210ff Mon Sep 17 00:00:00 2001
From: Vasil Danielov Pashov <vasil.pashov1@gmail.com>
Date: Thu, 19 Dec 2024 16:22:48 +0200
Subject: [PATCH] Implement origin for pandas resampling (#1962)

#### Reference Issues/PRs
<!--Example: Fixes #1234. See also #3456.-->
Resolve: #1740
Resolve: #1751

#### What does this implement or fix?
Implements equivalent of pandas origin in ArcticDB's resample.

The default is set to `epoch` which matches the default behavior of
Arctic up to now but is different than Pandas which uses `start_day`.
Changing it to `start_day` is a breaking change.

Fix how min and max aggregators handle infinity when there are only
infinities in the column. Previously the result was `NaN` which is not
consistent with Pandas and wrong. Now they return the proper value of
infinity.

Add hypothesis test using combinations of all possible parameters for
resampling.

`start`, `end`, `start_day`, `end_day` are nut supported in conjunction
with `date_range` clause.

#### Any other comments?

#### Checklist

<details>
  <summary>
   Checklist for code changes...
  </summary>

- [ ] Have you updated the relevant docstrings, documentation and
copyright notice?
- [ ] Is this contribution tested against [all ArcticDB's
features](../docs/mkdocs/docs/technical/contributing.md)?
- [ ] Do all exceptions introduced raise appropriate [error
messages](https://docs.arcticdb.io/error_messages/)?
 - [ ] Are API changes highlighted in the PR description?
- [ ] Is the PR labelled as enhancement or bug so it appears in
autogenerated release notes?
</details>

<!--
Thanks for contributing a Pull Request to ArcticDB! Please ensure you
have taken a look at:
- ArcticDB's Code of Conduct:
https://github.com/man-group/ArcticDB/blob/master/CODE_OF_CONDUCT.md
- ArcticDB's Contribution Licensing:
https://github.com/man-group/ArcticDB/blob/master/docs/mkdocs/docs/technical/contributing.md#contribution-licensing
-->

---------

Co-authored-by: Vasil Pashov <vasil.pashov@man.com>
---
 cpp/arcticdb/processing/clause.cpp            |  83 ++++--
 cpp/arcticdb/processing/clause.hpp            |  40 +--
 cpp/arcticdb/processing/clause_utils.cpp      |  72 ++++-
 cpp/arcticdb/processing/clause_utils.hpp      |  50 +---
 .../processing/sorted_aggregation.hpp         |   4 +-
 .../processing/test/rapidcheck_resample.cpp   |   6 +-
 .../processing/test/test_resample.cpp         |  30 +-
 cpp/arcticdb/python/python_utils.hpp          |   1 -
 cpp/arcticdb/version/python_bindings.cpp      |  93 ++++--
 python/arcticdb/util/test.py                  |  78 +++++
 python/arcticdb/version_store/processing.py   |  33 ++-
 .../hypothesis/arcticdb/test_resample.py      | 110 ++++++++
 .../hypothesis/arcticdb/test_sort_merge.py    |   2 +-
 .../arcticdb/version_store/test_resample.py   | 267 ++++++++++++++++--
 14 files changed, 694 insertions(+), 175 deletions(-)
 create mode 100644 python/tests/hypothesis/arcticdb/test_resample.py
diff --git a/cpp/arcticdb/processing/clause.cpp b/cpp/arcticdb/processing/clause.cpp
index 82ce052969..308bb0d2f0 100644
--- a/cpp/arcticdb/processing/clause.cpp
+++ b/cpp/arcticdb/processing/clause.cpp
@@ -8,8 +8,6 @@
 #include <vector>
 #include <variant>
 
-#include <folly/Poly.h>
-
 #include <arcticdb/processing/processing_unit.hpp>
 #include <arcticdb/column_store/string_pool.hpp>
 #include <arcticdb/util/offset_string.hpp>
@@ -21,9 +19,13 @@
 #include <arcticdb/stream/segment_aggregator.hpp>
 #include <arcticdb/util/test/random_throw.hpp>
 #include <ankerl/unordered_dense.h>
+#include <ranges>
+
+
 
 namespace arcticdb {
 
+namespace ranges = std::ranges;
 using namespace pipelines;
 
 class GroupingMap {
@@ -431,6 +433,53 @@ std::vector<EntityId> AggregationClause::process(std::vector<EntityId>&& entity_
     return str_;
 }
 
+template<ResampleBoundary closed_boundary>
+ResampleClause<closed_boundary>::ResampleClause(std::string rule,
+    ResampleBoundary label_boundary,
+    BucketGeneratorT&& generate_bucket_boundaries,
+    timestamp offset,
+    ResampleOrigin origin) :
+    rule_(std::move(rule)),
+    label_boundary_(label_boundary),
+    generate_bucket_boundaries_(std::move(generate_bucket_boundaries)),
+    offset_(offset),
+    origin_(std::move(origin)) {
+    clause_info_.input_structure_ = ProcessingStructure::TIME_BUCKETED;
+    clause_info_.can_combine_with_column_selection_ = false;
+    clause_info_.modifies_output_descriptor_ = true;
+    clause_info_.index_ = KeepCurrentTopLevelIndex();
+}
+
+template<ResampleBoundary closed_boundary>
+const ClauseInfo& ResampleClause<closed_boundary>::clause_info() const {
+    return clause_info_;
+}
+
+template<ResampleBoundary closed_boundary>
+void ResampleClause<closed_boundary>::set_component_manager(std::shared_ptr<ComponentManager> component_manager) {
+    component_manager_ = std::move(component_manager);
+}
+
+template<ResampleBoundary closed_boundary>
+std::string ResampleClause<closed_boundary>::rule() const {
+    return rule_;
+}
+
+template<ResampleBoundary closed_boundary>
+void ResampleClause<closed_boundary>::set_date_range(timestamp date_range_start, timestamp date_range_end) {
+    // Start and end need to read the first and last segments of the date range. At the moment buckets are set up before
+    // reading and processing the data.
+    constexpr static std::array unsupported_origin{ "start", "end", "start_day", "end_day" };
+    user_input::check<ErrorCode::E_INVALID_USER_ARGUMENT>(
+        util::variant_match(origin_,
+            [&](const std::string& origin) { return ranges::none_of(unsupported_origin, [&](std::string_view el) { return el == origin; }); },
+            [](const auto&) { return true;}
+        ),
+        "Resampling origins {} are not supported in conjunction with date range", unsupported_origin
+    );
+    date_range_.emplace(date_range_start, date_range_end);
+}
+
 template<ResampleBoundary closed_boundary>
 void ResampleClause<closed_boundary>::set_aggregations(const std::vector<NamedAggregator>& named_aggregators) {
     clause_info_.input_columns_ = std::make_optional<std::unordered_set<std::string>>();
@@ -475,16 +524,15 @@ std::vector<std::vector<size_t>> ResampleClause<closed_boundary>::structure_for_
     if (ranges_and_keys.empty()) {
         return {};
     }
-    TimestampRange index_range(
-            std::min_element(ranges_and_keys.begin(), ranges_and_keys.end(),
-                             [](const RangesAndKey& left, const RangesAndKey& right) {
-                                 return left.start_time() < right.start_time();
-                             })->start_time(),
-            std::max_element(ranges_and_keys.begin(), ranges_and_keys.end(),
-                             [](const RangesAndKey& left, const RangesAndKey& right) {
-                                 return left.end_time() < right.end_time();
-                             })->end_time()
-    );
+
+    // Iterate over ranges_and_keys and create a pair with first element equal to the smallest start time and second
+    // element equal to the largest end time.
+    const TimestampRange index_range = std::accumulate(
+        std::next(ranges_and_keys.begin()),
+        ranges_and_keys.end(),
+        TimestampRange{ ranges_and_keys.begin()->start_time(), ranges_and_keys.begin()->end_time() },
+        [](const TimestampRange& rng, const RangesAndKey& el) { return TimestampRange{std::min(rng.first, el.start_time()), std::max(rng.second, el.end_time())};});
+
     if (date_range_.has_value()) {
         date_range_->first = std::max(date_range_->first, index_range.first);
         date_range_->second = std::min(date_range_->second, index_range.second);
@@ -492,11 +540,11 @@ std::vector<std::vector<size_t>> ResampleClause<closed_boundary>::structure_for_
         date_range_ = index_range;
     }
 
-    bucket_boundaries_ = generate_bucket_boundaries_(date_range_->first, date_range_->second, rule_, closed_boundary, offset_);
+    bucket_boundaries_ = generate_bucket_boundaries_(date_range_->first, date_range_->second, rule_, closed_boundary, offset_, origin_);
     if (bucket_boundaries_.size() < 2) {
         return {};
     }
-    debug::check<ErrorCode::E_ASSERTION_FAILURE>(std::is_sorted(bucket_boundaries_.begin(), bucket_boundaries_.end()),
+    debug::check<ErrorCode::E_ASSERTION_FAILURE>(ranges::is_sorted(bucket_boundaries_),
                                                  "Resampling expects provided bucket boundaries to be strictly monotonically increasing");
     return structure_by_time_bucket<closed_boundary>(ranges_and_keys, bucket_boundaries_);
 }
@@ -521,12 +569,11 @@ std::vector<std::vector<EntityId>> ResampleClause<closed_boundary>::structure_fo
     }
 
     date_range_ = std::make_optional<TimestampRange>(min_start_ts, max_end_ts);
-
-    bucket_boundaries_ = generate_bucket_boundaries_(date_range_->first, date_range_->second, rule_, closed_boundary, offset_);
+    bucket_boundaries_ = generate_bucket_boundaries_(date_range_->first, date_range_->second, rule_, closed_boundary, offset_, origin_);
     if (bucket_boundaries_.size() < 2) {
         return {};
     }
-    debug::check<ErrorCode::E_ASSERTION_FAILURE>(std::is_sorted(bucket_boundaries_.begin(), bucket_boundaries_.end()),
+    debug::check<ErrorCode::E_ASSERTION_FAILURE>(ranges::is_sorted(bucket_boundaries_),
                                                  "Resampling expects provided bucket boundaries to be strictly monotonically increasing");
 
     auto new_structure_offsets = structure_by_time_bucket<closed_boundary>(ranges_and_entities, bucket_boundaries_);
@@ -541,7 +588,7 @@ std::vector<std::vector<EntityId>> ResampleClause<closed_boundary>::structure_fo
         }
     }
     internal::check<ErrorCode::E_ASSERTION_FAILURE>(
-            std::all_of(expected_fetch_counts.begin(), expected_fetch_counts.end(), [](EntityFetchCount fetch_count) {
+            ranges::all_of(expected_fetch_counts, [](EntityFetchCount fetch_count) {
                 return fetch_count == 1 || fetch_count == 2;
             }),
             "ResampleClause::structure_for_processing: invalid expected entity fetch count (should be 1 or 2)"
diff --git a/cpp/arcticdb/processing/clause.hpp b/cpp/arcticdb/processing/clause.hpp
index 901b622b79..134780d8db 100644
--- a/cpp/arcticdb/processing/clause.hpp
+++ b/cpp/arcticdb/processing/clause.hpp
@@ -36,6 +36,8 @@
 
 namespace arcticdb {
 
+using ResampleOrigin = std::variant<std::string, timestamp>;
+
 using RangesAndKey = pipelines::RangesAndKey;
 using SliceAndKey = pipelines::SliceAndKey;
 
@@ -317,6 +319,7 @@ struct AggregationClause {
 
 template<ResampleBoundary closed_boundary>
 struct ResampleClause {
+    using BucketGeneratorT = std::function<std::vector<timestamp>(timestamp, timestamp, std::string_view, ResampleBoundary, timestamp, const ResampleOrigin&)>;
     ClauseInfo clause_info_;
     std::shared_ptr<ComponentManager> component_manager_;
     ProcessingConfig processing_config_;
@@ -325,29 +328,22 @@ struct ResampleClause {
     // This will contain the data range specified by the user (if any) intersected with the range of timestamps for the symbol
     std::optional<TimestampRange> date_range_;
     // Inject this as a callback in the ctor to avoid language-specific dependencies this low down in the codebase
-    std::function<std::vector<timestamp>(timestamp, timestamp, std::string_view, ResampleBoundary, timestamp)> generate_bucket_boundaries_;
+    BucketGeneratorT generate_bucket_boundaries_;
     std::vector<timestamp> bucket_boundaries_;
     std::vector<SortedAggregatorInterface> aggregators_;
     std::string str_;
     timestamp offset_;
+    ResampleOrigin origin_;
 
     ResampleClause() = delete;
 
     ARCTICDB_MOVE_COPY_DEFAULT(ResampleClause)
 
-    ResampleClause(const std::string& rule,
-                   ResampleBoundary label_boundary,
-                   std::function<std::vector<timestamp>(timestamp, timestamp, std::string_view, ResampleBoundary, timestamp)>&& generate_bucket_boundaries,
-                   timestamp offset):
-            rule_(rule),
-            label_boundary_(label_boundary),
-            generate_bucket_boundaries_(std::move(generate_bucket_boundaries)),
-            offset_(offset) {
-        clause_info_.input_structure_ = ProcessingStructure::TIME_BUCKETED;
-        clause_info_.can_combine_with_column_selection_ = false;
-        clause_info_.modifies_output_descriptor_ = true;
-        clause_info_.index_ = KeepCurrentTopLevelIndex();
-    }
+    ResampleClause(std::string rule,
+        ResampleBoundary label_boundary,
+        BucketGeneratorT&& generate_bucket_boundaries,
+        timestamp offset,
+        ResampleOrigin origin);
 
     [[nodiscard]] std::vector<std::vector<size_t>> structure_for_processing(
             std::vector<RangesAndKey>& ranges_and_keys);
@@ -356,27 +352,19 @@ struct ResampleClause {
 
     [[nodiscard]] std::vector<EntityId> process(std::vector<EntityId>&& entity_ids) const;
 
-    [[nodiscard]] const ClauseInfo& clause_info() const {
-        return clause_info_;
-    }
+    [[nodiscard]] const ClauseInfo& clause_info() const;
 
     void set_processing_config(const ProcessingConfig& processing_config);
 
-    void set_component_manager(std::shared_ptr<ComponentManager> component_manager) {
-        component_manager_ = component_manager;
-    }
+    void set_component_manager(std::shared_ptr<ComponentManager> component_manager);
 
     [[nodiscard]] std::string to_string() const;
 
-    [[nodiscard]] std::string rule() const {
-        return rule_;
-    }
+    [[nodiscard]] std::string rule() const;
 
     void set_aggregations(const std::vector<NamedAggregator>& named_aggregators);
 
-    void set_date_range(timestamp date_range_start, timestamp date_range_end) {
-        date_range_.emplace(date_range_start, date_range_end);
-    }
+    void set_date_range(timestamp date_range_start, timestamp date_range_end);
 
     std::vector<timestamp> generate_bucket_boundaries(timestamp first_ts,
                                                       timestamp last_ts,
diff --git a/cpp/arcticdb/processing/clause_utils.cpp b/cpp/arcticdb/processing/clause_utils.cpp
index 6b40a64e67..910dfa4c84 100644
--- a/cpp/arcticdb/processing/clause_utils.cpp
+++ b/cpp/arcticdb/processing/clause_utils.cpp
@@ -6,9 +6,12 @@
  */
 
 #include <arcticdb/processing/clause_utils.hpp>
+#include <ranges>
+
 
-namespace arcticdb {
 
+namespace arcticdb {
+namespace ranges = std::ranges;
 using namespace pipelines;
 
 std::vector<std::vector<EntityId>> structure_by_row_slice(ComponentManager& component_manager, std::vector<std::vector<EntityId>>&& entity_ids_vec) {
@@ -101,9 +104,74 @@ std::shared_ptr<std::vector<EntityFetchCount>> generate_segment_fetch_counts(
         }
     }
     debug::check<ErrorCode::E_ASSERTION_FAILURE>(
-            std::all_of(res->begin(), res->end(), [](const size_t& val) { return val != 0; }),
+            ranges::none_of(*res, [](size_t val) { return val == 0; }),
             "All segments should be needed by at least one ProcessingUnit");
     return res;
 }
 
+template<ResampleBoundary closed_boundary, typename T>
+requires std::is_same_v<T, RangesAndKey> || std::is_same_v<T, RangesAndEntity>
+std::vector<std::vector<size_t>> structure_by_time_bucket(
+    std::vector<T>& ranges,
+    const std::vector<timestamp>& bucket_boundaries) {
+    std::erase_if(ranges, [&bucket_boundaries](const T &range) {
+        auto start_index = range.start_time();
+        auto end_index = range.end_time();
+        return index_range_outside_bucket_range<closed_boundary>(start_index, end_index, bucket_boundaries);
+    });
+    auto res = structure_by_row_slice(ranges);
+    // Element i of res also needs the values from element i+1 if there is a bucket which incorporates the last index
+    // value of row-slice i and the first value of row-slice i+1
+    // Element i+1 should be removed if the last bucket involved in element i covers all the index values in element i+1
+    auto bucket_boundaries_it = std::cbegin(bucket_boundaries);
+    // Exit if res_it == std::prev(res.end()) as this implies the last row slice was not incorporated into an earlier processing unit
+    for (auto res_it = res.begin(); res_it != res.end() && res_it != std::prev(res.end());) {
+        auto last_index_value_in_row_slice = ranges[res_it->at(0)].end_time();
+        advance_boundary_past_value<closed_boundary>(bucket_boundaries, bucket_boundaries_it, last_index_value_in_row_slice);
+        // bucket_boundaries_it now contains the end value of the last bucket covering the row-slice in res_it, or an end iterator if the last bucket ends before the end of this row-slice
+        if (bucket_boundaries_it != bucket_boundaries.end()) {
+            Bucket<closed_boundary> current_bucket{ *std::prev(bucket_boundaries_it), *bucket_boundaries_it };
+            auto next_row_slice_it = std::next(res_it);
+            while (next_row_slice_it != res.end()) {
+                // end_index from the key is 1 nanosecond larger than the index value of the last row in the row-slice
+                TimestampRange next_row_slice_timestamp_range{
+                        ranges[next_row_slice_it->at(0)].start_time(),
+                        ranges[next_row_slice_it->at(0)].end_time() };
+                if (current_bucket.contains(next_row_slice_timestamp_range.first)) {
+                    // The last bucket in the current processing unit overlaps with the first index value in the next row slice, so add segments into current processing unit
+                    res_it->insert(res_it->end(), next_row_slice_it->begin(), next_row_slice_it->end());
+                    if (current_bucket.contains(next_row_slice_timestamp_range.second)) {
+                        // The last bucket in the current processing unit wholly contains the next row slice, so remove it from the result
+                        next_row_slice_it = res.erase(next_row_slice_it);
+                    } else {
+                        break;
+                    }
+                } else {
+                    break;
+                }
+            }
+            // This is the last bucket, and all the required row-slices have been incorporated into the current processing unit, so erase the rest
+            if (bucket_boundaries_it == std::prev(bucket_boundaries.end())) {
+                res.erase(next_row_slice_it, res.end());
+                break;
+            }
+            res_it = next_row_slice_it;
+        }
+    }
+    return res;
+}
+
+template std::vector<std::vector<size_t>> structure_by_time_bucket<ResampleBoundary::LEFT, RangesAndKey>(
+    std::vector<RangesAndKey>& ranges,
+    const std::vector<timestamp>& bucket_boundaries);
+template std::vector<std::vector<size_t>> structure_by_time_bucket<ResampleBoundary::RIGHT, RangesAndKey>(
+    std::vector<RangesAndKey>& ranges,
+    const std::vector<timestamp>& bucket_boundaries);
+template std::vector<std::vector<size_t>> structure_by_time_bucket<ResampleBoundary::LEFT, RangesAndEntity>(
+    std::vector<RangesAndEntity>& ranges,
+    const std::vector<timestamp>& bucket_boundaries);
+template std::vector<std::vector<size_t>> structure_by_time_bucket<ResampleBoundary::RIGHT, RangesAndEntity>(
+    std::vector<RangesAndEntity>& ranges,
+    const std::vector<timestamp>& bucket_boundaries);
+
 }
diff --git a/cpp/arcticdb/processing/clause_utils.hpp b/cpp/arcticdb/processing/clause_utils.hpp
index b0d7c044ec..7d0942c23d 100644
--- a/cpp/arcticdb/processing/clause_utils.hpp
+++ b/cpp/arcticdb/processing/clause_utils.hpp
@@ -157,54 +157,8 @@ void advance_boundary_past_value(const std::vector<timestamp>& bucket_boundaries
 template<ResampleBoundary closed_boundary, typename T>
 requires std::is_same_v<T, RangesAndKey> || std::is_same_v<T, RangesAndEntity>
 std::vector<std::vector<size_t>> structure_by_time_bucket(
-        std::vector<T>& ranges,
-        const std::vector<timestamp>& bucket_boundaries) {
-    std::erase_if(ranges, [&bucket_boundaries](const T &range) {
-        auto start_index = range.start_time();
-        auto end_index = range.end_time();
-        return index_range_outside_bucket_range<closed_boundary>(start_index, end_index, bucket_boundaries);
-    });
-    auto res = structure_by_row_slice(ranges);
-    // Element i of res also needs the values from element i+1 if there is a bucket which incorporates the last index
-    // value of row-slice i and the first value of row-slice i+1
-    // Element i+1 should be removed if the last bucket involved in element i covers all the index values in element i+1
-    auto bucket_boundaries_it = std::cbegin(bucket_boundaries);
-    // Exit if res_it == std::prev(res.end()) as this implies the last row slice was not incorporated into an earlier processing unit
-    for (auto res_it = res.begin(); res_it != res.end() && res_it != std::prev(res.end());) {
-        auto last_index_value_in_row_slice = ranges[res_it->at(0)].end_time();
-        advance_boundary_past_value<closed_boundary>(bucket_boundaries, bucket_boundaries_it, last_index_value_in_row_slice);
-        // bucket_boundaries_it now contains the end value of the last bucket covering the row-slice in res_it, or an end iterator if the last bucket ends before the end of this row-slice
-        if (bucket_boundaries_it != bucket_boundaries.end()) {
-            Bucket<closed_boundary> current_bucket{*std::prev(bucket_boundaries_it), *bucket_boundaries_it};
-            auto next_row_slice_it = std::next(res_it);
-            while (next_row_slice_it != res.end()) {
-                // end_index from the key is 1 nanosecond larger than the index value of the last row in the row-slice
-                TimestampRange next_row_slice_timestamp_range{
-                        ranges[next_row_slice_it->at(0)].start_time(),
-                        ranges[next_row_slice_it->at(0)].end_time()};
-                if (current_bucket.contains(next_row_slice_timestamp_range.first)) {
-                    // The last bucket in the current processing unit overlaps with the first index value in the next row slice, so add segments into current processing unit
-                    res_it->insert(res_it->end(), next_row_slice_it->begin(), next_row_slice_it->end());
-                    if (current_bucket.contains(next_row_slice_timestamp_range.second)) {
-                        // The last bucket in the current processing unit wholly contains the next row slice, so remove it from the result
-                        next_row_slice_it = res.erase(next_row_slice_it);
-                    } else {
-                        break;
-                    }
-                } else {
-                    break;
-                }
-            }
-            // This is the last bucket, and all the required row-slices have been incorporated into the current processing unit, so erase the rest
-            if (bucket_boundaries_it == std::prev(bucket_boundaries.end())) {
-                res.erase(next_row_slice_it, res.end());
-                break;
-            }
-            res_it = next_row_slice_it;
-        }
-    }
-    return res;
-}
+    std::vector<T>& ranges,
+    const std::vector<timestamp>& bucket_boundaries);
 
 std::vector<std::vector<EntityId>> structure_by_row_slice(ComponentManager& component_manager, std::vector<std::vector<EntityId>>&& entity_ids_vec);
 
diff --git a/cpp/arcticdb/processing/sorted_aggregation.hpp b/cpp/arcticdb/processing/sorted_aggregation.hpp
index 07ba30524a..440d58cfd7 100644
--- a/cpp/arcticdb/processing/sorted_aggregation.hpp
+++ b/cpp/arcticdb/processing/sorted_aggregation.hpp
@@ -160,7 +160,7 @@ class MinAggregatorSorted {
     void push(T value) {
         if constexpr (std::is_floating_point_v<T>) {
             if (ARCTICDB_LIKELY(!std::isnan(value))) {
-                min_ = std::min(min_.value_or(std::numeric_limits<T>::max()), value);
+                min_ = std::min(min_.value_or(std::numeric_limits<T>::infinity()), value);
             }
         } else if constexpr (std::is_same_v<T, timestamp> && TimeType) {
             if (ARCTICDB_LIKELY(value != NaT)) {
@@ -202,7 +202,7 @@ class MaxAggregatorSorted {
     void push(T value) {
         if constexpr (std::is_floating_point_v<T>) {
             if (ARCTICDB_LIKELY(!std::isnan(value))) {
-                max_ = std::max(max_.value_or(std::numeric_limits<T>::lowest()), value);
+                max_ = std::max(max_.value_or(-std::numeric_limits<T>::infinity()), value);
             }
         } else if constexpr (std::is_same_v<T, timestamp> && TimeType) {
             if (ARCTICDB_LIKELY(value != NaT)) {
diff --git a/cpp/arcticdb/processing/test/rapidcheck_resample.cpp b/cpp/arcticdb/processing/test/rapidcheck_resample.cpp
index 14285d7a91..9f7c3fe2b2 100644
--- a/cpp/arcticdb/processing/test/rapidcheck_resample.cpp
+++ b/cpp/arcticdb/processing/test/rapidcheck_resample.cpp
@@ -17,7 +17,7 @@
 using namespace arcticdb;
 
 auto generate_bucket_boundaries(std::vector<timestamp>&& bucket_boundaries) {
-    return [bucket_boundaries = std::move(bucket_boundaries)](timestamp, timestamp, std::string_view, ResampleBoundary, timestamp) mutable {
+    return [bucket_boundaries = std::move(bucket_boundaries)](timestamp, timestamp, std::string_view, ResampleBoundary, timestamp, ResampleOrigin) mutable {
         return bucket_boundaries;
     };
 }
@@ -113,11 +113,11 @@ RC_GTEST_PROP(Resample, StructureForProcessing, ()) {
     }
 
     if (left_boundary_closed) {
-        ResampleClause<ResampleBoundary::LEFT> resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries(std::move(bucket_boundaries)), 0};
+        ResampleClause<ResampleBoundary::LEFT> resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries(std::move(bucket_boundaries)), 0, 0};
         auto result = resample_clause.structure_for_processing(ranges_and_keys);
         RC_ASSERT(expected_result == result);
     } else {
-        ResampleClause<ResampleBoundary::RIGHT> resample_clause{"dummy", ResampleBoundary::RIGHT, generate_bucket_boundaries(std::move(bucket_boundaries)), 0};
+        ResampleClause<ResampleBoundary::RIGHT> resample_clause{"dummy", ResampleBoundary::RIGHT, generate_bucket_boundaries(std::move(bucket_boundaries)), 0, 0};
         auto result = resample_clause.structure_for_processing(ranges_and_keys);
         RC_ASSERT(expected_result == result);
     }
diff --git a/cpp/arcticdb/processing/test/test_resample.cpp b/cpp/arcticdb/processing/test/test_resample.cpp
index c2bfa40043..4279540b01 100644
--- a/cpp/arcticdb/processing/test/test_resample.cpp
+++ b/cpp/arcticdb/processing/test/test_resample.cpp
@@ -14,7 +14,7 @@
 using namespace arcticdb;
 
 auto generate_bucket_boundaries(std::vector<timestamp>&& bucket_boundaries) {
-    return [bucket_boundaries = std::move(bucket_boundaries)](timestamp, timestamp, std::string_view, ResampleBoundary, timestamp) {
+    return [bucket_boundaries = std::move(bucket_boundaries)](timestamp, timestamp, std::string_view, ResampleBoundary, timestamp, ResampleOrigin) {
         return bucket_boundaries;
     };
 }
@@ -33,7 +33,7 @@ TEST(Resample, StructureForProcessingBasic) {
     // Insert into vector "out of order" to ensure structure_for_processing reorders correctly
     std::vector<RangesAndKey> ranges_and_keys{bottom, top};
 
-    ResampleClause<ResampleBoundary::LEFT> resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 1500, 2500, 2999}), 0};
+    ResampleClause<ResampleBoundary::LEFT> resample_clause{ "dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 1500, 2500, 2999}), 0, 0 };
     auto proc_unit_ids = resample_clause.structure_for_processing(ranges_and_keys);
     ASSERT_EQ(ranges_and_keys.size(), 2);
     ASSERT_EQ(ranges_and_keys[0], top);
@@ -61,7 +61,7 @@ TEST(Resample, StructureForProcessingColumnSlicing) {
     // Insert into vector "out of order" to ensure structure_for_processing reorders correctly
     std::vector<RangesAndKey> ranges_and_keys{top_right, bottom_left, bottom_right, top_left};
 
-    ResampleClause<ResampleBoundary::LEFT> resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 1500, 2500, 2999}), 0};
+    ResampleClause<ResampleBoundary::LEFT> resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 1500, 2500, 2999}), 0, 0};
     auto proc_unit_ids = resample_clause.structure_for_processing(ranges_and_keys);
     ASSERT_EQ(ranges_and_keys.size(), 4);
     ASSERT_EQ(ranges_and_keys[0], top_left);
@@ -86,7 +86,7 @@ TEST(Resample, StructureForProcessingOverlap) {
     // Insert into vector "out of order" to ensure structure_for_processing reorders correctly
     std::vector<RangesAndKey> ranges_and_keys{bottom, top};
 
-    ResampleClause<ResampleBoundary::LEFT> resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 2500, 2999}), 0};
+    ResampleClause<ResampleBoundary::LEFT> resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 2500, 2999}), 0, 0};
     auto proc_unit_ids = resample_clause.structure_for_processing(ranges_and_keys);
     ASSERT_EQ(ranges_and_keys.size(), 2);
     ASSERT_EQ(ranges_and_keys[0], top);
@@ -113,7 +113,7 @@ TEST(Resample, StructureForProcessingSubsumed) {
     // Insert into vector "out of order" to ensure structure_for_processing reorders correctly
     std::vector<RangesAndKey> ranges_and_keys{bottom, middle, top};
 
-    ResampleClause<ResampleBoundary::LEFT> resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 4500}), 0};
+    ResampleClause<ResampleBoundary::LEFT> resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 4500}), 0, 0};
     auto proc_unit_ids = resample_clause.structure_for_processing(ranges_and_keys);
     ASSERT_EQ(ranges_and_keys.size(), 3);
     ASSERT_EQ(ranges_and_keys[0], top);
@@ -138,7 +138,7 @@ TEST(Resample, StructureForProcessingExactBoundary) {
     // Insert into vector "out of order" to ensure structure_for_processing reorders correctly
     std::vector<RangesAndKey> ranges_and_keys{bottom, top};
 
-    ResampleClause<ResampleBoundary::LEFT> resample_clause_left{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 2000, 2500, 2999}), 0};
+    ResampleClause<ResampleBoundary::LEFT> resample_clause_left{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 2000, 2500, 2999}), 0, 0};
     auto proc_unit_ids = resample_clause_left.structure_for_processing(ranges_and_keys);
     ASSERT_EQ(ranges_and_keys.size(), 2);
     ASSERT_EQ(ranges_and_keys[0], top);
@@ -146,7 +146,7 @@ TEST(Resample, StructureForProcessingExactBoundary) {
     std::vector<std::vector<size_t>> expected_proc_unit_ids_left{{0}, {1}};
     ASSERT_EQ(expected_proc_unit_ids_left, proc_unit_ids);
 
-    ResampleClause<ResampleBoundary::RIGHT> resample_clause_right{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 2000, 2500, 2999}), 0};
+    ResampleClause<ResampleBoundary::RIGHT> resample_clause_right{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 2000, 2500, 2999}), 0, 0};
     proc_unit_ids = resample_clause_right.structure_for_processing(ranges_and_keys);
     ASSERT_EQ(ranges_and_keys.size(), 2);
     ASSERT_EQ(ranges_and_keys[0], top);
@@ -157,11 +157,11 @@ TEST(Resample, StructureForProcessingExactBoundary) {
 
 TEST(Resample, FindBuckets) {
     // Enough bucket boundaries to test all the interesting cases
-    ResampleClause<ResampleBoundary::LEFT> resample_left("left", ResampleBoundary::LEFT, generate_bucket_boundaries({0, 10, 20, 30, 40}), 0);
-    ResampleClause<ResampleBoundary::RIGHT> resample_right("right", ResampleBoundary::RIGHT, generate_bucket_boundaries({0, 10, 20, 30, 40}), 0);
+    ResampleClause<ResampleBoundary::LEFT> resample_left("left", ResampleBoundary::LEFT, generate_bucket_boundaries({0, 10, 20, 30, 40}), 0, 0);
+    ResampleClause<ResampleBoundary::RIGHT> resample_right("right", ResampleBoundary::RIGHT, generate_bucket_boundaries({0, 10, 20, 30, 40}), 0, 0);
 
-    resample_left.bucket_boundaries_ = resample_left.generate_bucket_boundaries_(0, 0, "dummy", ResampleBoundary::LEFT, 0);
-    resample_right.bucket_boundaries_ = resample_right.generate_bucket_boundaries_(0, 0, "dummy", ResampleBoundary::RIGHT, 0);
+    resample_left.bucket_boundaries_ = resample_left.generate_bucket_boundaries_(0, 0, "dummy", ResampleBoundary::LEFT, 0, 0);
+    resample_right.bucket_boundaries_ = resample_right.generate_bucket_boundaries_(0, 0, "dummy", ResampleBoundary::RIGHT, 0, 0);
 
     std::vector<timestamp> res;
 
@@ -221,8 +221,8 @@ TEST(Resample, FindBuckets) {
 TEST(Resample, ProcessOneSegment) {
     auto component_manager = std::make_shared<ComponentManager>();
 
-    ResampleClause<ResampleBoundary::LEFT> resample("dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({-1, 2, 5}), 0);
-    resample.bucket_boundaries_ = resample.generate_bucket_boundaries_(0, 0, "dummy", ResampleBoundary::LEFT, 0);
+    ResampleClause<ResampleBoundary::LEFT> resample("dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({-1, 2, 5}), 0, 0);
+    resample.bucket_boundaries_ = resample.generate_bucket_boundaries_(0, 0, "dummy", ResampleBoundary::LEFT, 0, 0);
     resample.date_range_ = {0, 5};
     resample.set_component_manager(component_manager);
     resample.set_aggregations({{"sum", "sum_column", "sum_column"}});
@@ -266,8 +266,8 @@ TEST(Resample, ProcessOneSegment) {
 TEST(Resample, ProcessMultipleSegments) {
     auto component_manager = std::make_shared<ComponentManager>();
 
-    ResampleClause<ResampleBoundary::LEFT> resample("dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({-15, -5, 5, 6, 25, 35, 45, 46, 55, 65}), 0);
-    resample.bucket_boundaries_ = resample.generate_bucket_boundaries_(0, 0, "dummy", ResampleBoundary::LEFT, 0);
+    ResampleClause<ResampleBoundary::LEFT> resample("dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({-15, -5, 5, 6, 25, 35, 45, 46, 55, 65}), 0, 0);
+    resample.bucket_boundaries_ = resample.generate_bucket_boundaries_(0, 0, "dummy", ResampleBoundary::LEFT, 0, 0);
     resample.date_range_ = {0, 51};
     resample.set_component_manager(component_manager);
     resample.set_aggregations({{"sum", "sum_column", "sum_column"}});
diff --git a/cpp/arcticdb/python/python_utils.hpp b/cpp/arcticdb/python/python_utils.hpp
index 10e33c6838..4c4ea638f0 100644
--- a/cpp/arcticdb/python/python_utils.hpp
+++ b/cpp/arcticdb/python/python_utils.hpp
@@ -8,7 +8,6 @@
 #pragma once
 
 #include <pybind11/pybind11.h>
-#include <arcticdb/util/pb_util.hpp>
 #include <arcticdb/entity/types.hpp>
 #include <arcticdb/entity/data_error.hpp>
 #include <arcticdb/entity/read_result.hpp>
diff --git a/cpp/arcticdb/version/python_bindings.cpp b/cpp/arcticdb/version/python_bindings.cpp
index bb8c8518cb..b839159843 100644
--- a/cpp/arcticdb/version/python_bindings.cpp
+++ b/cpp/arcticdb/version/python_bindings.cpp
@@ -5,7 +5,7 @@
  * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
  */
 
-#include <arcticdb/version/python_bindings.hpp>
+#include <arcticdb/util/error_code.hpp>
 #include <pybind11/pybind11.h>
 #include <pybind11/numpy.h>
 #include <pybind11/operators.h>
@@ -14,7 +14,6 @@
 #include <arcticdb/python/python_utils.hpp>
 #include <arcticdb/pipeline/column_stats.hpp>
 #include <arcticdb/pipeline/query.hpp>
-#include <arcticdb/storage/mongo/mongo_instance.hpp>
 #include <arcticdb/processing/operation_types.hpp>
 #include <arcticdb/processing/expression_node.hpp>
 #include <arcticdb/processing/expression_context.hpp>
@@ -23,27 +22,81 @@
 #include <arcticdb/python/adapt_read_dataframe.hpp>
 #include <arcticdb/version/schema_checks.hpp>
 #include <arcticdb/util/pybind_mutex.hpp>
+#include <boost/date_time/posix_time/posix_time.hpp>
 
 
 namespace arcticdb::version_store {
 
+static consteval timestamp one_day_in_nanoseconds() {
+    return timestamp(24) * 60 * 60 * 1'000'000'000;
+}
+
+template<typename T>
+requires std::integral<T>
+[[nodiscard]] static T python_mod(T a, T b) {
+    return (a % b + b) % b;
+}
+
+/// @param ts in nanoseconds
+[[nodiscard]] static timestamp start_of_day_nanoseconds(timestamp ts) {
+    return ts - python_mod(ts, one_day_in_nanoseconds());
+}
+
+/// @param ts in nanoseconds
+[[nodiscard]] static timestamp end_of_day_nanoseconds(timestamp ts) {
+    const timestamp start_of_day = start_of_day_nanoseconds(ts);
+    const bool is_midnnight = start_of_day == ts;
+    if (is_midnnight) {
+        return ts;
+    }
+    return start_of_day + one_day_in_nanoseconds();
+}
+
 [[nodiscard]] static std::pair<timestamp, timestamp> compute_first_last_dates(
     timestamp start,
     timestamp end,
     timestamp rule,
     ResampleBoundary closed_boundary_arg,
-    timestamp offset
+    timestamp offset,
+    const ResampleOrigin& origin
 ) {
-    const timestamp ns_to_prev_offset_start = (start - offset) % rule;
-    const timestamp ns_to_prev_offset_end = (end - offset) % rule;
+    // Origin value formula from Pandas:
+    // https://github.com/pandas-dev/pandas/blob/68d9dcab5b543adb3bfe5b83563c61a9b8afae77/pandas/core/resample.py#L2564
+    auto [origin_ns, origin_adjusted_start] = util::variant_match(
+        origin,
+        [start](timestamp o) -> std::pair<timestamp, timestamp> {return {o, start}; },
+        [&](const std::string& o) -> std::pair<timestamp, timestamp> {
+            if (o == "epoch") {
+                return { 0, start };
+            } else if (o == "start") {
+                return { start, start };
+            } else if (o == "start_day") {
+                return { start_of_day_nanoseconds(start), start };
+            } else if (o == "end_day" || o == "end") {
+                const timestamp origin_last = o == "end" ? end: end_of_day_nanoseconds(end);
+                const timestamp bucket_count = (origin_last - start) / rule + (closed_boundary_arg == ResampleBoundary::LEFT);
+                const timestamp origin_ns = origin_last - bucket_count * rule;
+                return { origin_ns, origin_ns };
+            } else {
+                user_input::raise<ErrorCode::E_INVALID_USER_ARGUMENT>(
+                    "Invalid origin value {}. Supported values are: \"start\", \"start_day\", \"end\", \"end_day\", \"epoch\" or timestamp in nanoseconds",
+                    o);
+            }
+        }
+    );
+    origin_ns += offset;
+
+    const timestamp ns_to_prev_offset_start = python_mod(origin_adjusted_start - origin_ns, rule);
+    const timestamp ns_to_prev_offset_end = python_mod(end - origin_ns, rule);
+
     if (closed_boundary_arg == ResampleBoundary::RIGHT) {
         return {
-            ns_to_prev_offset_start > 0 ? start - ns_to_prev_offset_start : start - rule,
+            ns_to_prev_offset_start > 0 ? origin_adjusted_start - ns_to_prev_offset_start : origin_adjusted_start - rule,
             ns_to_prev_offset_end > 0 ? end + (rule - ns_to_prev_offset_end) : end
         };
     } else {
         return {
-            ns_to_prev_offset_start > 0 ? start - ns_to_prev_offset_start : start,
+            ns_to_prev_offset_start > 0 ? origin_adjusted_start - ns_to_prev_offset_start : origin_adjusted_start,
             ns_to_prev_offset_end > 0 ? end + (rule - ns_to_prev_offset_end) : end + rule
         };
     }
@@ -54,14 +107,14 @@ std::vector<timestamp> generate_buckets(
     timestamp end,
     std::string_view rule,
     ResampleBoundary closed_boundary_arg,
-    timestamp offset
+    timestamp offset,
+    const ResampleOrigin& origin
 ) {
-    timestamp rule_ns;
-    {
+    const timestamp rule_ns = [](std::string_view rule) {
         py::gil_scoped_acquire acquire_gil;
-        rule_ns = python_util::pd_to_offset(rule);
-    }
-    const auto [start_with_offset, end_with_offset] = compute_first_last_dates(start, end, rule_ns, closed_boundary_arg, offset);
+        return python_util::pd_to_offset(rule);
+    }(rule);
+    const auto [start_with_offset, end_with_offset] = compute_first_last_dates(start, end, rule_ns, closed_boundary_arg, offset, origin);
     const auto bucket_boundary_count = (end_with_offset - start_with_offset) / rule_ns + 1;
     std::vector<timestamp> res;
     res.reserve(bucket_boundary_count);
@@ -73,16 +126,10 @@ std::vector<timestamp> generate_buckets(
 
 template<ResampleBoundary closed_boundary>
 void declare_resample_clause(py::module& version) {
-    std::string class_name;
-    if constexpr (closed_boundary == ResampleBoundary::LEFT) {
-        class_name = "ResampleClauseLeftClosed";
-    } else {
-        // closed_boundary == ResampleBoundary::RIGHT
-        class_name = "ResampleClauseRightClosed";
-    }
-    py::class_<ResampleClause<closed_boundary>, std::shared_ptr<ResampleClause<closed_boundary>>>(version, class_name.c_str())
-            .def(py::init([](std::string rule, ResampleBoundary label_boundary, timestamp offset){
-                return ResampleClause<closed_boundary>(rule, label_boundary, generate_buckets, offset);
+    const char* class_name = closed_boundary == ResampleBoundary::LEFT ? "ResampleClauseLeftClosed" : "ResampleClauseRightClosed";
+    py::class_<ResampleClause<closed_boundary>, std::shared_ptr<ResampleClause<closed_boundary>>>(version, class_name)
+            .def(py::init([](std::string rule, ResampleBoundary label_boundary, timestamp offset, ResampleOrigin origin){
+                return ResampleClause<closed_boundary>(std::move(rule), label_boundary, generate_buckets, offset, std::move(origin));
             }))
             .def_property_readonly("rule", &ResampleClause<closed_boundary>::rule)
             .def("set_aggregations", [](ResampleClause<closed_boundary>& self,
diff --git a/python/arcticdb/util/test.py b/python/arcticdb/util/test.py
index 61440572a6..fdc922665c 100644
--- a/python/arcticdb/util/test.py
+++ b/python/arcticdb/util/test.py
@@ -35,6 +35,7 @@
 from arcticdb.version_store.helper import ArcticFileConfig
 from arcticdb.config import _DEFAULT_ENVS_PATH
 from arcticdb_ext import set_config_int, get_config_int, unset_config_int
+from packaging.version import Version
 
 from arcticdb import log
 
@@ -756,3 +757,80 @@ def generic_named_aggregation_test(lib, symbol, df, grouping_column, aggs_dict):
             f"""\nPandas result:\n{expected}\n"ArcticDB result:\n{received}"""
         )
         raise e
+
+def drop_inf_and_nan(df: pd.DataFrame) -> pd.DataFrame:
+    return df[~df.isin([np.nan, np.inf, -np.inf]).any(axis=1)]
+
+
+def assert_dfs_approximate(left: pd.DataFrame, right: pd.DataFrame):
+    """
+    Checks if integer columns are exactly the same. For float columns checks if they are approximately the same.
+    We can't guarantee the same order of operations for the floats thus numerical errors might appear.
+    """
+    assert left.shape == right.shape
+    assert left.columns.equals(right.columns)
+    # To avoid checking the freq member of the index as arctic does not fill it in
+    assert left.index.equals(right.index)
+
+    # Drop NaN an inf values because. Pandas uses Kahan summation algorithm to improve numerical stability.
+    # Thus they don't consistently overflow to infinity. Discussion: https://github.com/pandas-dev/pandas/issues/60303
+    left_no_inf_and_nan = drop_inf_and_nan(left)
+    right_no_inf_and_nan = drop_inf_and_nan(right)
+
+    check_equals_flags = {"check_dtype": False}
+    if PANDAS_VERSION >= Version("1.1"):
+        check_equals_flags["check_freq"] = False
+    if PANDAS_VERSION >= Version("1.2"):
+        check_equals_flags["check_flags"] = False
+    for col in left_no_inf_and_nan.columns:
+        if pd.api.types.is_integer_dtype(left_no_inf_and_nan[col].dtype) and pd.api.types.is_integer_dtype(right_no_inf_and_nan[col].dtype):
+            pd.testing.assert_series_equal(left_no_inf_and_nan[col], right_no_inf_and_nan[col], **check_equals_flags)
+        else:
+            if PANDAS_VERSION >= Version("1.1"):
+                check_equals_flags["atol"] = 1e-8
+            pd.testing.assert_series_equal(left_no_inf_and_nan[col], right_no_inf_and_nan[col], **check_equals_flags)
+
+
+def generic_resample_test(lib, sym, rule, aggregations, date_range=None, closed=None, label=None, offset=None, origin=None, drop_empty_buckets_for=None):
+    """
+    Perform a resampling in ArcticDB and compare it against the same query in Pandas.
+
+    :param drop_empty_buckets_for: Will add additional aggregation column using the count aggregator. At the end of the
+    aggregation query will remove all rows for which this newly added count aggregation is 0. Works only for int/uint
+    columns. There is similar function generic_resample_test_with_empty_buckets in
+    python/tests/unit/arcticdb/version_store/test_resample.py which can drop empty buckets for all types of columns,
+    but it cannot take parameters such as origin and offset.
+    """
+    # Pandas doesn't have a good date_range equivalent in resample, so just use read for that
+    expected = lib.read(sym, date_range=date_range).data
+    # Pandas 1.X needs None as the first argument to agg with named aggregators
+
+    pandas_aggregations = {**aggregations, "_bucket_size_": (drop_empty_buckets_for, "count")} if drop_empty_buckets_for else aggregations
+    resample_args = {}
+    if origin:
+        resample_args['origin'] = origin
+    if offset:
+        resample_args['offset'] = offset
+
+    if PANDAS_VERSION >= Version("1.1.0"):
+        expected = expected.resample(rule, closed=closed, label=label, **resample_args).agg(None, **pandas_aggregations)
+    else:
+        expected = expected.resample(rule, closed=closed, label=label).agg(None, **pandas_aggregations)
+    if drop_empty_buckets_for:
+        expected = expected[expected["_bucket_size_"] > 0]
+        expected.drop(columns=["_bucket_size_"], inplace=True)
+    expected = expected.reindex(columns=sorted(expected.columns))
+
+    q = QueryBuilder()
+    if origin:
+        q = q.resample(rule, closed=closed, label=label, offset=offset, origin=origin).agg(aggregations)
+    else:
+        q = q.resample(rule, closed=closed, label=label, offset=offset).agg(aggregations)
+    received = lib.read(sym, date_range=date_range, query_builder=q).data
+    received = received.reindex(columns=sorted(received.columns))
+
+    has_float_column = any(pd.api.types.is_float_dtype(col_type) for col_type in list(expected.dtypes))
+    if has_float_column:
+        assert_dfs_approximate(expected, received)
+    else:
+        assert_frame_equal(expected, received, check_dtype=False)
\ No newline at end of file
diff --git a/python/arcticdb/version_store/processing.py b/python/arcticdb/version_store/processing.py
index 46a821d5fe..5f77cda66d 100644
--- a/python/arcticdb/version_store/processing.py
+++ b/python/arcticdb/version_store/processing.py
@@ -320,6 +320,7 @@ class PythonResampleClause:
     aggregations: Dict[str, Union[str, Tuple[str, str]]] = None
     # In nanosecods
     offset: int = 0
+    origin: Union[str, pd.Timestamp] = "epoch"
 
 
 class QueryBuilder:
@@ -590,7 +591,8 @@ def resample(
             rule: Union[str, pd.DateOffset],
             closed: Optional[str] = None,
             label: Optional[str] = None,
-            offset: Optional[Union[str, pd.Timedelta]] = None
+            offset: Optional[Union[str, pd.Timedelta]] = None,
+            origin: Union[str, pd.Timestamp] = 'epoch'
     ):
         """
         Resample a symbol on the index. The symbol must be datetime indexed. Resample operations must be followed by
@@ -637,7 +639,16 @@ def resample(
         offset: Optional[Union[str, pd.Timedelta]] default=None
             Offset the start of each bucket. Supported strings are the same as in `pd.Timedelta`. If offset is larger than
             rule then `offset` modulo `rule` is used as an offset.
+        origin: Optional[Union[str, pd.Timestamp]] default='epoch'
+            The timestamp on which to adjust the grouping. Supported string are:
 
+            * epoch: origin is 1970-01-01
+            * start: origin is the first value of the timeseries
+            * start_day: origin is the first day at midnight of the timeseries
+            * end: origin is the last value of the timeseries
+            * end_day: origin is the ceiling midnight of the last day
+
+            `start`, `start_day`, `end`, `end_day` origin values are not supported in conjunction with `date_range`.
         Returns
         -------
         QueryBuilder
@@ -658,6 +669,10 @@ def resample(
             * The library has dynamic schema enabled, and at least one of the columns being aggregated is missing
               from at least one row-slice.
             * At least one of the columns being aggregated contains sparse data.
+        UserInputException
+
+            * `start`, `start_day`, `end`, `end_day` is used in conjunction with `date_range`
+            * `origin` is not one of `start`, `start_day`, `end`, `end_day`, `epoch` or a `pd.Timestamp`
 
         Examples
         --------
@@ -742,6 +757,10 @@ def resample(
         else:
             offset_ns = 0
 
+        if not (isinstance(origin, pd.Timestamp) or origin in ["start", "end", "start_day", "end_day", "epoch"]):
+            raise UserInputException(f'Argument origin must be either of type pd.Timestamp or one of ["start", "end", "start_day", "end_day", "epoch"]. Got {offset} instead')
+        if type(origin) is pd.Timestamp:
+            origin = origin.value
         # This set is documented here:
         # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.resample.html#pandas.Series.resample
         # and lifted directly from pandas.core.resample.TimeGrouper.__init__, and so is inherently fragile to upstream
@@ -750,15 +769,15 @@ def resample(
         boundary_map = {
             "left": _ResampleBoundary.LEFT,
             "right": _ResampleBoundary.RIGHT,
-            None: _ResampleBoundary.RIGHT if rule in end_types else _ResampleBoundary.LEFT
+            None: _ResampleBoundary.RIGHT if rule in end_types or origin in ["end", "end_day"] else _ResampleBoundary.LEFT
         }
         check(closed in boundary_map.keys(), f"closed kwarg to resample must be `left`, 'right', or None, but received '{closed}'")
         check(label in boundary_map.keys(), f"label kwarg to resample must be `left`, 'right', or None, but received '{closed}'")
         if boundary_map[closed] == _ResampleBoundary.LEFT:
-            self.clauses = self.clauses + [_ResampleClauseLeftClosed(rule, boundary_map[label], offset_ns)]
+            self.clauses = self.clauses + [_ResampleClauseLeftClosed(rule, boundary_map[label], offset_ns, origin)]
         else:
-            self.clauses = self.clauses + [_ResampleClauseRightClosed(rule, boundary_map[label], offset_ns)]
-        self._python_clauses = self._python_clauses + [PythonResampleClause(rule=rule, closed=boundary_map[closed], label=boundary_map[label], offset=offset_ns)]
+            self.clauses = self.clauses + [_ResampleClauseRightClosed(rule, boundary_map[label], offset_ns, origin)]
+        self._python_clauses = self._python_clauses + [PythonResampleClause(rule=rule, closed=boundary_map[closed], label=boundary_map[label], offset=offset_ns, origin=origin)]
         return self
 
 
@@ -936,9 +955,9 @@ def __setstate__(self, state):
                 self.clauses = self.clauses + [_AggregationClause(self.clauses[-1].grouping_column, python_clause.aggregations)]
             elif isinstance(python_clause, PythonResampleClause):
                 if python_clause.closed == _ResampleBoundary.LEFT:
-                    self.clauses = self.clauses + [_ResampleClauseLeftClosed(python_clause.rule, python_clause.label, python_clause.offset)]
+                    self.clauses = self.clauses + [_ResampleClauseLeftClosed(python_clause.rule, python_clause.label, python_clause.offset, python_clause.origin)]
                 else:
-                    self.clauses = self.clauses + [_ResampleClauseRightClosed(python_clause.rule, python_clause.label, python_clause.offset)]
+                    self.clauses = self.clauses + [_ResampleClauseRightClosed(python_clause.rule, python_clause.label, python_clause.offset, python_clause.origin)]
                 if python_clause.aggregations is not None:
                     self.clauses[-1].set_aggregations(python_clause.aggregations)
             elif isinstance(python_clause, PythonRowRangeClause):
diff --git a/python/tests/hypothesis/arcticdb/test_resample.py b/python/tests/hypothesis/arcticdb/test_resample.py
new file mode 100644
index 0000000000..e263211e92
--- /dev/null
+++ b/python/tests/hypothesis/arcticdb/test_resample.py
@@ -0,0 +1,110 @@
+import pandas as pd
+import numpy as np
+import pytest
+from hypothesis import given, settings, assume
+import hypothesis.extra.pandas as hs_pd
+import hypothesis.extra.numpy as hs_np
+import hypothesis.strategies as st
+from arcticdb.util.hypothesis import use_of_function_scoped_fixtures_in_hypothesis_checked
+from arcticdb import QueryBuilder
+from arcticdb.util.test import assert_frame_equal, generic_resample_test
+from arcticdb.util._versions import IS_PANDAS_TWO
+
+
+COLUMN_DTYPE = ["float", "int", "uint"]
+ALL_AGGREGATIONS = ["sum", "mean", "min", "max", "first", "last", "count"]
+MIN_DATE = np.datetime64('1969-01-01')
+MAX_DATE = np.datetime64('2000-01-01')
+
+@st.composite
+def date(draw, min_date, max_date):
+    # Bound the start and end date so that we don't end up with too many buckets eating all RAM
+    # Use some pre-epoch dates.
+    # hs_np.from_dtype's min_value and max_value do not work with dates
+    res = draw(hs_np.from_dtype(np.dtype("datetime64[ns]")))
+    assume(min_date <= res and res <= max_date)
+    return res
+
+@st.composite
+def dataframe(draw):
+    index = hs_pd.indexes(elements=date(min_date=MIN_DATE, max_date=MAX_DATE).filter(lambda d: d is not pd.NaT), min_size=1)
+    columns = [hs_pd.column(name=f"col_{dtype}", dtype=dtype) for dtype in COLUMN_DTYPE]
+    result = draw(hs_pd.data_frames(columns, index=index))
+    result.sort_index(inplace=True)
+    return result
+
+@st.composite
+def origin(draw):
+    selected_origin = draw(st.sampled_from(["start", "end", "start_day", "end_day", "epoch", "timestamp"]))
+    # Hypothesis may generate dates for year > 2200 and some of the arithmetic operation will overflow.
+    if selected_origin == "timestamp":
+        min_date = MIN_DATE - np.timedelta64(365, 'D')
+        max_date = MAX_DATE + np.timedelta64(365, 'D')
+        return pd.Timestamp(draw(date(min_date=min_date, max_date=max_date)))
+    else:
+        return selected_origin
+
+def freq_fits_in_64_bits(count, unit):
+    """
+    This is used to check if a frequency is usable by Arctic. ArcticDB converts the frequency to signed 64 bit integer. 
+    """
+    billion = 1_000_000_000
+    mult = {'h': 3600 * billion, 'min': 60 * billion, 's': billion}
+    return (mult[unit] * count).bit_length() <= 63
+
+@st.composite
+def rule(draw):
+    count = draw(st.integers(min_value=1))
+    unit = draw(st.sampled_from(['min', 'h']))
+    result = f"{count}{unit}"
+    assume(freq_fits_in_64_bits(count=count, unit=unit))
+    return result
+
+@st.composite
+def offset(draw):
+    unit = draw(st.sampled_from(['s', 'min', 'h', None]))
+    if unit is None:
+        return None
+    count = draw(st.integers(min_value=1))
+    result = f"{count}{unit}"
+    assume(freq_fits_in_64_bits(count=count, unit=unit))
+    return result
+
+
+@pytest.mark.skipif(not IS_PANDAS_TWO, reason="Some resampling parameters don't exist in Pandas < 2")
+@use_of_function_scoped_fixtures_in_hypothesis_checked
+@settings(deadline=None)
+@given(
+    df=dataframe(),
+    rule=rule(),
+    origin=origin(),
+    offset=offset()
+)
+def test_resample(lmdb_version_store_v1, df, rule, origin, offset):
+    lib = lmdb_version_store_v1
+    sym = "sym"
+    lib.write(sym, df)
+    for closed in ["left", "right"]:
+        for label in ["left", "right"]:
+            columns = list(df.columns)
+            agg = {f"{name}_{op}": (name, op) for name in columns for op in ALL_AGGREGATIONS}
+            try:
+                generic_resample_test(
+                    lib,
+                    sym,
+                    rule,
+                    agg,
+                    origin=origin,
+                    offset=offset,
+                    closed=closed,
+                    label=label,
+                    # Must be int or uint column otherwise dropping of empty buckets will not work
+                    drop_empty_buckets_for="col_uint")
+            except ValueError as pandas_error:
+                # This is to avoid a bug in pandas related to how end an end_day work. It's possible that when end/end_day are used
+                # the first value of the data frame to be outside of the computed resampling range. In arctic this is not a problem
+                # as we allow this by design.
+                if str(pandas_error) != "Values falls before first bin":
+                    raise pandas_error
+                else:
+                    return
\ No newline at end of file
diff --git a/python/tests/hypothesis/arcticdb/test_sort_merge.py b/python/tests/hypothesis/arcticdb/test_sort_merge.py
index d983ca97dd..2417d1bf08 100644
--- a/python/tests/hypothesis/arcticdb/test_sort_merge.py
+++ b/python/tests/hypothesis/arcticdb/test_sort_merge.py
@@ -38,7 +38,7 @@ def generate_single_dataframe(draw, column_list, min_size=0, allow_nat_in_index=
     if not IS_PANDAS_TWO:
         # Due to https://github.com/man-group/ArcticDB/blob/7479c0b0caa8121bc2ca71a73e29769bbc41c66a/python/arcticdb/version_store/_normalization.py#L184
         # we change the dtype of empty float columns. This makes hypothesis tests extremely hard to write as we must
-        # keep addional state about is there a mix of empty/non-empty float columns in the staging area, did we write
+        # keep additional state about is there a mix of empty/non-empty float columns in the staging area, did we write
         # empty float column (if so it's type would be object). These edge cases are covered in the unit tests.
         index = hs_pd.indexes(dtype="datetime64[ns]", min_size=1 if min_size <= 0 else min_size).filter(lambda x: allow_nat_in_index or not pd.NaT in x)
     else:
diff --git a/python/tests/unit/arcticdb/version_store/test_resample.py b/python/tests/unit/arcticdb/version_store/test_resample.py
index 965f679340..6b41090f87 100644
--- a/python/tests/unit/arcticdb/version_store/test_resample.py
+++ b/python/tests/unit/arcticdb/version_store/test_resample.py
@@ -12,16 +12,20 @@
 import pytest
 
 from arcticdb import QueryBuilder
-from arcticdb.exceptions import ArcticDbNotYetImplemented, SchemaException
-from arcticdb.util.test import assert_frame_equal
+from arcticdb.exceptions import ArcticDbNotYetImplemented, SchemaException, UserInputException
+from arcticdb.util.test import assert_frame_equal, generic_resample_test
 from packaging.version import Version
 from arcticdb.util._versions import IS_PANDAS_TWO, PANDAS_VERSION
+import itertools
 
 pytestmark = pytest.mark.pipeline
 
 
 ALL_AGGREGATIONS = ["sum", "mean", "min", "max", "first", "last", "count"]
 
+def all_aggregations_dict(col):
+    return {f"to_{agg}": (col, agg) for agg in ALL_AGGREGATIONS}
+
 # Pandas recommended way to resample and exclude buckets with no index values, which is our behaviour
 # See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#sparse-resampling
 def round(t, freq):
@@ -30,31 +34,22 @@ def round(t, freq):
     return pd.Timestamp((t.value // td.value) * td.value)
 
 def generic_resample_test_with_empty_buckets(lib, sym, rule, aggregations, date_range=None):
-    # Pandas doesn't have a good date_range equivalent in resample, so just use read for that
-    expected = lib.read(sym, date_range=date_range).data
-    # Pandas 1.X needs None as the first argument to agg with named aggregators
-    expected = expected.groupby(partial(round, freq=rule)).agg(None, **aggregations)
-    expected = expected.reindex(columns=sorted(expected.columns))
-    
-    q = QueryBuilder()
-    q = q.resample(rule).agg(aggregations)
-    received = lib.read(sym, date_range=date_range, query_builder=q).data
-    received = received.reindex(columns=sorted(received.columns))
+    """
+    Perform a resampling in ArcticDB and compare it against the same query in Pandas.
 
-    assert_frame_equal(expected, received, check_dtype=False)
+    This will remove all empty buckets mirroring ArcticDB's behavior. It cannot take additional parameters such as
+    orign and offset. In case such parameters are needed arcticdb.util.test.generic_resample_test can be used.
 
-def generic_resample_test(lib, sym, rule, aggregations, date_range=None, closed=None, label=None, offset=None):
+    This can drop buckets even all columns are of float type while generic_resample_test needs at least one non-float
+    column.
+    """
     # Pandas doesn't have a good date_range equivalent in resample, so just use read for that
     expected = lib.read(sym, date_range=date_range).data
     # Pandas 1.X needs None as the first argument to agg with named aggregators
-    if PANDAS_VERSION >= Version("1.1.0"):
-        expected = expected.resample(rule, closed=closed, label=label, offset=offset).agg(None, **aggregations)
-    else:
-        expected = expected.resample(rule, closed=closed, label=label).agg(None, **aggregations)
+    expected = expected.groupby(partial(round, freq=rule)).agg(None, **aggregations)
     expected = expected.reindex(columns=sorted(expected.columns))
-
     q = QueryBuilder()
-    q = q.resample(rule, closed=closed, label=label, offset=offset).agg(aggregations)
+    q = q.resample(rule).agg(aggregations)
     received = lib.read(sym, date_range=date_range, query_builder=q).data
     received = received.reindex(columns=sorted(received.columns))
 
@@ -548,10 +543,6 @@ def test_resampling_empty_type_column(lmdb_version_store_empty_types_v1):
 @pytest.mark.parametrize("closed", ["left", "right"])
 class TestResamplingOffset:
 
-    @staticmethod
-    def all_aggregations_dict(col):
-        return {f"to_{agg}": (col, agg) for agg in ALL_AGGREGATIONS}
-
     @pytest.mark.parametrize("offset", ("30s", pd.Timedelta(seconds=30)))
     def test_offset_smaller_than_freq(self, lmdb_version_store_v1, closed, offset):
         lib = lmdb_version_store_v1
@@ -564,7 +555,7 @@ def test_offset_smaller_than_freq(self, lmdb_version_store_v1, closed, offset):
             lib,
             sym,
             "2min",
-            self.all_aggregations_dict("col"),
+            all_aggregations_dict("col"),
             closed=closed,
             offset="30s"
         )
@@ -581,7 +572,7 @@ def test_offset_larger_than_freq(self, lmdb_version_store_v1, closed, offset):
             lib,
             sym,
             "2min",
-            self.all_aggregations_dict("col"),
+            all_aggregations_dict("col"),
             closed=closed,
             offset=offset
         )
@@ -603,7 +594,7 @@ def test_values_on_offset_boundary(self, lmdb_version_store_v1, closed, offset):
             lib,
             sym,
             "2min",
-            self.all_aggregations_dict("col"),
+            all_aggregations_dict("col"),
             closed=closed,
             offset=offset
         )
@@ -630,8 +621,226 @@ def test_with_date_range(self, lmdb_version_store_v1, closed, date_range, offset
             lib,
             sym,
             "2min",
-            self.all_aggregations_dict("col"),
+            all_aggregations_dict("col"),
             closed=closed,
             offset=offset,
             date_range=date_range
-        )
\ No newline at end of file
+        )
+
+@pytest.mark.skipif(PANDAS_VERSION < Version("1.1.0"), reason="Pandas < 1.1.0 do not have offset param")
+@pytest.mark.parametrize("closed", ["left", "right"])
+class TestResamplingOrigin:
+
+    # Timestamps: pre start, between start and end, post end, first date in the index, last date in the index
+    @pytest.mark.parametrize(
+        "origin",
+        [
+            "start",
+            "start_day",
+            pytest.param("end", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")),
+            pytest.param("end_day", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")),
+            "epoch",
+            pd.Timestamp("2024-01-01"),
+            pd.Timestamp("2025-01-01 15:00:00"),
+            pd.Timestamp("2025-01-03 15:00:00"),
+            pd.Timestamp("2025-01-01 10:00:33"),
+            pd.Timestamp("2025-01-02 12:00:13")
+        ]
+    )
+    def test_origin(self, lmdb_version_store_v1, closed, origin):
+        lib = lmdb_version_store_v1
+        sym = "test_origin_special_values"
+        # Start and end are picked so that #bins * rule + start != end on purpose to test
+        # the bin generation in case of end and end_day
+        start = pd.Timestamp("2025-01-01 10:00:33")
+        end = pd.Timestamp("2025-01-02 12:00:20")
+        idx = pd.date_range(start, end, freq='10s')
+        rng = np.random.default_rng()
+        df = pd.DataFrame({"col": rng.integers(0, 100, len(idx))}, index=idx)
+        lib.write(sym, df)
+        generic_resample_test(
+            lib,
+            sym,
+            "2min",
+            all_aggregations_dict("col"),
+            closed=closed,
+            origin=origin
+        )
+
+    @pytest.mark.parametrize("origin", [
+        "start",
+        "start_day",
+        pytest.param("end", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")),
+        pytest.param("end_day", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported"))
+    ])
+    @pytest.mark.parametrize("date_range", [
+        (pd.Timestamp("2025-01-01 10:00:00"), pd.Timestamp("2025-01-02 12:00:00")), # start and end are multiples of rule
+        (pd.Timestamp("2025-01-01 10:00:00"), pd.Timestamp("2025-01-02 12:00:03")), # start is multiple of rule
+        (pd.Timestamp("2025-01-01 10:00:03"), pd.Timestamp("2025-01-02 12:00:00")) # end is multiple of rule
+    ])
+    def test_origin_is_multiple_of_freq(self, lmdb_version_store_v1, closed, origin, date_range):
+        lib = lmdb_version_store_v1
+        sym = "test_origin_special_values"
+        start, end = date_range
+        idx = pd.date_range(start, end, freq='10s')
+        rng = np.random.default_rng()
+        df = pd.DataFrame({"col": rng.integers(0, 100, len(idx))}, index=idx)
+        lib.write(sym, df)
+        generic_resample_test(
+            lib,
+            sym,
+            "2min",
+            all_aggregations_dict("col"),
+            closed=closed,
+            origin=origin,
+            drop_empty_buckets_for="col"
+        )
+
+    @pytest.mark.parametrize("origin", [
+        "start",
+        "start_day",
+        pytest.param("end", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")),
+        pytest.param("end_day", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")),
+        "epoch"
+    ])
+    def test_pre_epoch_data(self, lmdb_version_store_v1, closed, origin):
+        lib = lmdb_version_store_v1
+        sym = "test_origin_special_values"
+        start = pd.Timestamp("1800-01-01 10:00:00")
+        end = pd.Timestamp("1800-01-02 10:00:00")
+        idx = pd.date_range(start, end, freq='30s')
+        rng = np.random.default_rng()
+        df = pd.DataFrame({"col": rng.integers(0, 100, len(idx))}, index=idx)
+        lib.write(sym, df)
+        generic_resample_test(
+            lib,
+            sym,
+            "2min",
+            all_aggregations_dict("col"),
+            closed=closed,
+            origin=origin,
+            drop_empty_buckets_for="col"
+        )
+
+    @pytest.mark.parametrize("origin", [
+        "start",
+        "start_day",
+        pytest.param("end", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")),
+        pytest.param("end_day", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")),
+    ])
+    @pytest.mark.parametrize("date_range",
+        list(itertools.product(
+            [pd.Timestamp("2024-01-01") - pd.Timedelta(1), pd.Timestamp("2024-01-01") + pd.Timedelta(1)],
+            [pd.Timestamp("2024-01-02") - pd.Timedelta(1), pd.Timestamp("2024-01-02") + pd.Timedelta(1)]))
+    )
+    def test_origin_off_by_one_on_boundary(self, lmdb_version_store_v1, closed, origin, date_range):
+        lib = lmdb_version_store_v1
+        sym = "test_origin_special_values"
+        start, end = date_range
+        idx = pd.date_range(start, end, freq='10s')
+        rng = np.random.default_rng()
+        df = pd.DataFrame({"col": rng.integers(0, 100, len(idx))}, index=idx)
+        lib.write(sym, df)
+        generic_resample_test(
+            lib,
+            sym,
+            "2min",
+            all_aggregations_dict("col"),
+            closed=closed,
+            origin=origin,
+            drop_empty_buckets_for="col"
+        )
+
+    @pytest.mark.parametrize("origin", [
+        "start_day",
+        "start",
+        pytest.param("end", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")),
+        pytest.param("end_day", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported"))
+    ])
+    def test_non_epoch_origin_throws_with_daterange(self, lmdb_version_store_v1, origin, closed):
+        lib = lmdb_version_store_v1
+        sym = "test_origin_start_throws_with_daterange"
+
+        lib.write(sym, pd.DataFrame({"col": [1, 2, 3]}, index=pd.DatetimeIndex([pd.Timestamp("2024-01-01"), pd.Timestamp("2024-01-02"), pd.Timestamp("2024-01-03")])))
+        q = QueryBuilder()
+        q = q.resample('1min', origin=origin, closed=closed).agg({"col_min":("col", "min")})
+        with pytest.raises(UserInputException) as exception_info:
+            lib.read(sym, query_builder=q, date_range=(pd.Timestamp("2024-01-02"), pd.Timestamp("2024-01-03")))
+        assert all(w in str(exception_info.value) for w in [origin, "origin"])
+
+    @pytest.mark.parametrize("origin", ["epoch", pd.Timestamp("2025-01-03 12:00:00")])
+    def test_epoch_and_ts_origin_works_with_date_range(self, lmdb_version_store_v1, closed, origin):
+        lib = lmdb_version_store_v1
+        sym = "test_origin_special_values"
+        # Start and end are picked so that #bins * rule + start != end on purpose to test
+        # the bin generation in case of end and end_day
+        start = pd.Timestamp("2025-01-01 00:00:00")
+        end = pd.Timestamp("2025-01-04 00:00:00")
+        idx = pd.date_range(start, end, freq='3s')
+        rng = np.random.default_rng()
+        df = pd.DataFrame({"col": rng.integers(0, 100, len(idx))}, index=idx)
+        lib.write(sym, df)
+        generic_resample_test(
+            lib,
+            sym,
+            "2min",
+            all_aggregations_dict("col"),
+            closed=closed,
+            origin=origin,
+            date_range=(pd.Timestamp("2025-01-02 00:00:00"), pd.Timestamp("2025-01-03 00:00:00"))
+        )
+
+@pytest.mark.skipif(PANDAS_VERSION < Version("1.1.0"), reason="Pandas < 1.1.0 do not have offset param")
+@pytest.mark.parametrize("closed", ["left", "right"])
+@pytest.mark.parametrize("label", ["left", "right"])
+@pytest.mark.parametrize("origin",[
+    "start",
+    "start_day",
+    pytest.param("end", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")),
+    pytest.param("end_day", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")),
+    "epoch",
+    pd.Timestamp("2024-01-01"),
+    pd.Timestamp("2025-01-01 15:00:00"),
+    pd.Timestamp("2025-01-03 15:00:00")
+])
+@pytest.mark.parametrize("offset", ['10s', '13s', '2min'])
+def test_origin_offset_combined(lmdb_version_store_v1, closed, origin, label, offset):
+    lib = lmdb_version_store_v1
+    sym = "test_origin_special_values"
+    # Start and end are picked so that #bins * rule + start != end on purpose to test
+    # the bin generation in case of end and end_day
+    start = pd.Timestamp("2025-01-01 10:00:33")
+    end = pd.Timestamp("2025-01-02 12:00:20")
+    idx = pd.date_range(start, end, freq='10s')
+    rng = np.random.default_rng()
+    df = pd.DataFrame({"col": range(len(idx))}, index=idx)
+    lib.write(sym, df)
+    generic_resample_test(
+        lib,
+        sym,
+        "2min",
+        all_aggregations_dict("col"),
+        closed=closed,
+        origin=origin,
+        drop_empty_buckets_for="col",
+        label=label,
+        offset=offset
+    )
+
+def test_max_with_one_infinity_element(lmdb_version_store_v1):
+    lib = lmdb_version_store_v1
+    sym = "test_max_with_one_infinity_element"
+
+    lib.write(sym, pd.DataFrame({"col": [np.inf]}, index=pd.DatetimeIndex([pd.Timestamp("2024-01-01")])))
+    q = QueryBuilder()
+    q = q.resample('1min').agg({"col_max":("col", "max")})
+    assert np.isinf(lib.read(sym, query_builder=q).data['col_max'][0])
+
+def test_min_with_one_infinity_element(lmdb_version_store_v1):
+    lib = lmdb_version_store_v1
+    sym = "test_min_with_one_infinity_element"
+
+    lib.write(sym, pd.DataFrame({"col": [-np.inf]}, index=pd.DatetimeIndex([pd.Timestamp("2024-01-01")])))
+    q = QueryBuilder()
+    q = q.resample('1min').agg({"col_min":("col", "min")})
+    assert np.isneginf(lib.read(sym, query_builder=q).data['col_min'][0])
\ No newline at end of file