From d1a23f9a9a7bb784179b27b8b4dd693956072aca Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 16 Oct 2023 18:01:06 +0200 Subject: [PATCH] return None from lambda to speedup 'ip' computation Signed-off-by: Anatoly Myachev --- .../implementations/pandas_on_dask/partitioning/partition.py | 3 +-- .../implementations/pandas_on_ray/partitioning/partition.py | 3 +-- .../pandas_on_unidist/partitioning/partition.py | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/modin/core/execution/dask/implementations/pandas_on_dask/partitioning/partition.py b/modin/core/execution/dask/implementations/pandas_on_dask/partitioning/partition.py index d6679701fd3..18dd8b02888 100644 --- a/modin/core/execution/dask/implementations/pandas_on_dask/partitioning/partition.py +++ b/modin/core/execution/dask/implementations/pandas_on_dask/partitioning/partition.py @@ -13,7 +13,6 @@ """Module houses class that wraps data (block partition) and its metadata.""" -import pandas from distributed import Future from distributed.utils import get_ip @@ -307,7 +306,7 @@ def ip(self, materialize=True): IP address of the node that holds the data. """ if self._ip_cache is None: - self._ip_cache = self.apply(lambda df: pandas.DataFrame([]))._ip_cache + self._ip_cache = self.apply(lambda df: None)._ip_cache if materialize and isinstance(self._ip_cache, Future): self._ip_cache = DaskWrapper.materialize(self._ip_cache) return self._ip_cache diff --git a/modin/core/execution/ray/implementations/pandas_on_ray/partitioning/partition.py b/modin/core/execution/ray/implementations/pandas_on_ray/partitioning/partition.py index 9235d6faabb..d138b1ad1be 100644 --- a/modin/core/execution/ray/implementations/pandas_on_ray/partitioning/partition.py +++ b/modin/core/execution/ray/implementations/pandas_on_ray/partitioning/partition.py @@ -13,7 +13,6 @@ """Module houses class that wraps data (block partition) and its metadata.""" -import pandas import ray from ray.util import get_node_ip_address @@ -331,7 +330,7 @@ def ip(self, materialize=True): if len(self.call_queue): self.drain_call_queue() else: - self._ip_cache = self.apply(lambda df: pandas.DataFrame([]))._ip_cache + self._ip_cache = self.apply(lambda df: None)._ip_cache if materialize and isinstance(self._ip_cache, ObjectIDType): self._ip_cache = RayWrapper.materialize(self._ip_cache) return self._ip_cache diff --git a/modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/partition.py b/modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/partition.py index 3959e88969a..668acf8eef6 100644 --- a/modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/partition.py +++ b/modin/core/execution/unidist/implementations/pandas_on_unidist/partitioning/partition.py @@ -15,7 +15,6 @@ import warnings -import pandas import unidist from modin.core.dataframe.pandas.partitioning.partition import PandasDataframePartition @@ -302,7 +301,7 @@ def ip(self, materialize=True): if len(self.call_queue): self.drain_call_queue() else: - self._ip_cache = self.apply(lambda df: pandas.DataFrame([]))._ip_cache + self._ip_cache = self.apply(lambda df: None)._ip_cache if materialize and unidist.is_object_ref(self._ip_cache): self._ip_cache = UnidistWrapper.materialize(self._ip_cache) return self._ip_cache