From 5d5f2a4fcb51a0626066569eb072d16965d28579 Mon Sep 17 00:00:00 2001 From: Mahesh Vashishtha Date: Wed, 8 Jun 2022 09:11:29 -0500 Subject: [PATCH] FIX-#4541: Fix merge_asof with non-unique right index. (#4542) Co-authored-by: Yaroslav Igoshev Signed-off-by: mvashishtha --- docs/release_notes/release_notes-0.15.0.rst | 1 + modin/pandas/general.py | 5 +++++ modin/pandas/test/test_general.py | 7 +++++-- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/release_notes/release_notes-0.15.0.rst b/docs/release_notes/release_notes-0.15.0.rst index d273aaf6ae0..9135a41de5d 100644 --- a/docs/release_notes/release_notes-0.15.0.rst +++ b/docs/release_notes/release_notes-0.15.0.rst @@ -30,6 +30,7 @@ Key Features and Updates * FIX-#4531: Fix a makedirs race condition in to_parquet (#4533) * FIX-#4464: Refactor Ray utils and quick fix groupby.count failing on virtual partitions (#4490) * FIX-#4436: Fix to_pydatetime dtype for timezone None (#4437) + * FIX-#4541: Fix merge_asof with non-unique right index (#4542) * Performance enhancements * FEAT-#4320: Add connectorx as an alternative engine for read_sql (#4346) * PERF-#4493: Use partition size caches more in Modin dataframe (#4495) diff --git a/modin/pandas/general.py b/modin/pandas/general.py index 79bf90f5809..1d29ae63594 100644 --- a/modin/pandas/general.py +++ b/modin/pandas/general.py @@ -194,6 +194,11 @@ def merge_asof( or not isinstance(by, (str, type(None))) or not isinstance(left_by, (str, type(None))) or not isinstance(right_by, (str, type(None))) + # The implementation below assumes that the right index is unique + # because it uses merge_asof to map each position in the merged + # index to the label of the one right row that should be merged + # at that row position. + or not right.index.is_unique ): if isinstance(right, DataFrame): right = to_pandas(right) diff --git a/modin/pandas/test/test_general.py b/modin/pandas/test/test_general.py index 7707c3ed518..4867e418a84 100644 --- a/modin/pandas/test/test_general.py +++ b/modin/pandas/test/test_general.py @@ -201,9 +201,12 @@ def test_merge_ordered(): pd.merge_ordered(data_a, data_b, fill_method="ffill", left_by="group") -def test_merge_asof(): +@pytest.mark.parametrize("right_index", [None, [0] * 5], ids=["default", "non_unique"]) +def test_merge_asof(right_index): left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) - right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}) + right = pd.DataFrame( + {"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}, index=right_index + ) with warns_that_defaulting_to_pandas(): df = pd.merge_asof(left, right, on="a")