Skip to content

Commit

Permalink
PERF-#0000: repartition if needed for merge
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev committed Apr 28, 2024
1 parent 5b96219 commit b6a887e
Showing 1 changed file with 19 additions and 1 deletion.
20 changes: 19 additions & 1 deletion modin/core/storage_formats/pandas/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,23 @@

"""Contains implementations for Merge/Join."""

from __future__ import annotations

from typing import TYPE_CHECKING

import pandas
from pandas.core.dtypes.common import is_list_like
from pandas.errors import MergeError

from modin.core.dataframe.base.dataframe.utils import join_columns
from modin.core.dataframe.pandas.metadata import ModinDtypes
from modin.config import NPartitions, MinPartitionSize

from .utils import merge_partitioning

if TYPE_CHECKING:
from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler

Check warning on line 31 in modin/core/storage_formats/pandas/merge.py

View check run for this annotation

Codecov / codecov/patch

modin/core/storage_formats/pandas/merge.py#L31

Added line #L31 was not covered by tests


# TODO: add methods for 'join' here
class MergeImpl:
Expand Down Expand Up @@ -93,7 +101,9 @@ def func(left, right):
).reset_index(drop=True)

@classmethod
def row_axis_merge(cls, left, right, kwargs):
def row_axis_merge(
cls, left: PandasQueryCompiler, right: PandasQueryCompiler, kwargs: dict
):
"""
Execute merge using row-axis implementation.
Expand Down Expand Up @@ -164,6 +174,14 @@ def map_func(
left, right, on, left_on, right_on, kwargs.get("suffixes", ("_x", "_y"))
)

# partitioning is too bad, it's more profitable to repartition
if (
left._modin_frame._partitions.shape[0] < 0.3 * NPartitions.get()
# to avoid empty partitions after repartition; can materialize index
and len(left._modin_frame) > NPartitions.get() * MinPartitionSize.get()
):
left = left.repartition(axis=0)

Check warning on line 183 in modin/core/storage_formats/pandas/merge.py

View check run for this annotation

Codecov / codecov/patch

modin/core/storage_formats/pandas/merge.py#L183

Added line #L183 was not covered by tests

new_left = left.__constructor__(
left._modin_frame.broadcast_apply_full_axis(
axis=1,
Expand Down

0 comments on commit b6a887e

Please sign in to comment.