Skip to content

Commit

Permalink
FIX-#4541: Fix merge_asof with non-unique right index. (#4542)
Browse files Browse the repository at this point in the history
Co-authored-by: Yaroslav Igoshev <[email protected]>
Signed-off-by: mvashishtha <[email protected]>
  • Loading branch information
mvashishtha and YarShev authored Jun 8, 2022
1 parent 8679052 commit 5d5f2a4
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 2 deletions.
1 change: 1 addition & 0 deletions docs/release_notes/release_notes-0.15.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Key Features and Updates
* FIX-#4531: Fix a makedirs race condition in to_parquet (#4533)
* FIX-#4464: Refactor Ray utils and quick fix groupby.count failing on virtual partitions (#4490)
* FIX-#4436: Fix to_pydatetime dtype for timezone None (#4437)
* FIX-#4541: Fix merge_asof with non-unique right index (#4542)
* Performance enhancements
* FEAT-#4320: Add connectorx as an alternative engine for read_sql (#4346)
* PERF-#4493: Use partition size caches more in Modin dataframe (#4495)
Expand Down
5 changes: 5 additions & 0 deletions modin/pandas/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,11 @@ def merge_asof(
or not isinstance(by, (str, type(None)))
or not isinstance(left_by, (str, type(None)))
or not isinstance(right_by, (str, type(None)))
# The implementation below assumes that the right index is unique
# because it uses merge_asof to map each position in the merged
# index to the label of the one right row that should be merged
# at that row position.
or not right.index.is_unique
):
if isinstance(right, DataFrame):
right = to_pandas(right)
Expand Down
7 changes: 5 additions & 2 deletions modin/pandas/test/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,12 @@ def test_merge_ordered():
pd.merge_ordered(data_a, data_b, fill_method="ffill", left_by="group")


def test_merge_asof():
@pytest.mark.parametrize("right_index", [None, [0] * 5], ids=["default", "non_unique"])
def test_merge_asof(right_index):
left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]})
right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]})
right = pd.DataFrame(
{"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}, index=right_index
)

with warns_that_defaulting_to_pandas():
df = pd.merge_asof(left, right, on="a")
Expand Down

0 comments on commit 5d5f2a4

Please sign in to comment.