FEAT-#6767: Provide the ability to use experimental functionality whe…

…n experimental mode is not enabled globally via an environment variable (#6764) Signed-off-by: Anatoly Myachev <[email protected]>
modin-project · Dec 8, 2023 · c3a4f78 · c3a4f78
1 parent b61b40d
commit c3a4f78
Show file tree

Hide file tree

Showing 32 changed files with 287 additions and 695 deletions.
diff --git a/.github/workflows/ci-required.yml b/.github/workflows/ci-required.yml
@@ -66,7 +66,6 @@ jobs:
             asv_bench/benchmarks/__init__.py asv_bench/benchmarks/io/__init__.py \
             asv_bench/benchmarks/scalability/__init__.py \
             modin/core/io \
-            modin/experimental/core/execution/ray/implementations/pandas_on_ray \
             modin/experimental/core/execution/ray/implementations/pyarrow_on_ray \
             modin/pandas/series.py \
             modin/core/execution/python \

diff --git a/docs/development/architecture.rst b/docs/development/architecture.rst
@@ -224,18 +224,6 @@ documentation page on :doc:`contributing </development/contributing>`.
     - Uses native python execution - mainly used for debugging.
     - The storage format is `pandas` and the in-memory partition type is a pandas DataFrame.
     - For more information on the execution path, see the :doc:`pandas on Python </flow/modin/core/execution/python/implementations/pandas_on_python/index>` page.
-- pandas on Ray (experimental)
-    - Uses the Ray_ execution framework.
-    - The storage format is `pandas` and the in-memory partition type is a pandas DataFrame.
-    - For more information on the execution path, see the :doc:`experimental pandas on Ray </flow/modin/experimental/core/execution/ray/implementations/pandas_on_ray/index>` page.
-- pandas on MPI (experimental)
-    - Uses MPI_ through the Unidist_ execution framework.
-    - The storage format is `pandas` and the in-memory partition type is a pandas DataFrame.
-    - For more information on the execution path, see the :doc:`experimental pandas on Unidist </flow/modin/experimental/core/execution/unidist/implementations/pandas_on_unidist/index>` page.
-- pandas on Dask (experimental)
-    - Uses the Dask_ execution framework.
-    - The storage format is `pandas` and the in-memory partition type is a pandas DataFrame.
-    - For more information on the execution path, see the :doc:`experimental pandas on Dask </flow/modin/experimental/core/execution/dask/implementations/pandas_on_dask/index>` page.
 - :doc:`HDK on Native </development/using_hdk>` (experimental)
     - Uses HDK as an engine.
     - The storage format is `hdk` and the in-memory partition type is a pyarrow Table. When defaulting to pandas, the pandas DataFrame is used.
@@ -341,19 +329,9 @@ details. The documentation covers most modules, with more docs being added every
    │   ├─── :doc:`experimental </flow/modin/experimental/index>`
    │   │   ├───core
    │   │   │   ├───execution
-   │   │   │   │   ├───native
-   │   │   │   │   │   └───implementations
-   │   │   │   │   │       └─── :doc:`hdk_on_native </flow/modin/experimental/core/execution/native/implementations/hdk_on_native/index>`
-   │   │   │   │   ├───ray
-   │   │   │   │   │   └───implementations
-   │   │   │   │   │       ├─── :doc:`pandas_on_ray </flow/modin/experimental/core/execution/ray/implementations/pandas_on_ray/index>`
-   │   │   │   │   │       └─── :doc:`pyarrow_on_ray </flow/modin/experimental/core/execution/ray/implementations/pyarrow_on_ray>`
-   │   │   │   │   ├───unidist
-   │   │   │   │   |   └───implementations
-   │   │   │   │   |       └─── :doc:`pandas_on_unidist </flow/modin/experimental/core/execution/unidist/implementations/pandas_on_unidist/index>`
-   |   │   |   |   └───dask
-   |   |   |   |       └───implementations
-   │   │   │   │           └─── :doc:`pandas_on_dask </flow/modin/experimental/core/execution/dask/implementations/pandas_on_dask/index>`
+   │   │   │   │   └───native
+   │   │   │   │       └───implementations
+   │   │   │   │           └─── :doc:`hdk_on_native </flow/modin/experimental/core/execution/native/implementations/hdk_on_native/index>`
    │   │   │   ├─── :doc:`storage_formats </flow/modin/experimental/core/storage_formats/index>`
    |   │   │   |   ├─── :doc:`hdk </flow/modin/experimental/core/storage_formats/hdk/index>`
    │   │   │   |   └─── :doc:`pyarrow </flow/modin/experimental/core/storage_formats/pyarrow/index>`

diff --git a/...modin/experimental/core/execution/dask/implementations/pandas_on_dask/index.rst b/...modin/experimental/core/execution/dask/implementations/pandas_on_dask/index.rst
diff --git a/...in/experimental/core/execution/dask/implementations/pandas_on_dask/io/index.rst b/...in/experimental/core/execution/dask/implementations/pandas_on_dask/io/index.rst
diff --git a/...w/modin/experimental/core/execution/ray/implementations/pandas_on_ray/index.rst b/...w/modin/experimental/core/execution/ray/implementations/pandas_on_ray/index.rst
diff --git a/...odin/experimental/core/execution/ray/implementations/pandas_on_ray/io/index.rst b/...odin/experimental/core/execution/ray/implementations/pandas_on_ray/io/index.rst
diff --git a/...experimental/core/execution/unidist/implementations/pandas_on_unidist/index.rst b/...experimental/core/execution/unidist/implementations/pandas_on_unidist/index.rst
diff --git a/...erimental/core/execution/unidist/implementations/pandas_on_unidist/io/index.rst b/...erimental/core/execution/unidist/implementations/pandas_on_unidist/io/index.rst
diff --git a/docs/flow/modin/experimental/pandas.rst b/docs/flow/modin/experimental/pandas.rst
@@ -13,4 +13,4 @@ Experimental API Reference
 .. autofunction:: read_csv_glob
 .. autofunction:: read_custom_text
 .. autofunction:: read_pickle_distributed
-.. automethod:: modin.experimental.pandas.DataFrame.to_pickle_distributed
+.. automethod:: modin.pandas.DataFrame.modin::to_pickle_distributed
diff --git a/docs/supported_apis/dataframe_supported.rst b/docs/supported_apis/dataframe_supported.rst
@@ -424,7 +424,8 @@ default to pandas.
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``to_period``              | `to_period`_              | D                      |                                                    |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
-| ``to_pickle``              | `to_pickle`_              | D                      | Experimental implementation: to_pickle_distributed |
+| ``to_pickle``              | `to_pickle`_              | D                      | Experimental implementation:                       |
+|                            |                           |                        | DataFrame.modin.to_pickle_distributed              |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``to_records``             | `to_records`_             | D                      |                                                    |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+

diff --git a/docs/usage_guide/advanced_usage/index.rst b/docs/usage_guide/advanced_usage/index.rst
@@ -31,7 +31,7 @@ Modin also supports these experimental APIs on top of pandas that are under acti
 - :py:func:`~modin.experimental.pandas.read_sql` -- add optional parameters for the database connection
 - :py:func:`~modin.experimental.pandas.read_custom_text` -- read custom text data from file
 - :py:func:`~modin.experimental.pandas.read_pickle_distributed`  -- read multiple files in a directory
-- :py:meth:`~modin.experimental.pandas.DataFrame.to_pickle_distributed` -- write to multiple files in a directory
+- :py:meth:`~modin.pandas.DataFrame.modin.to_pickle_distributed` -- write to multiple files in a directory
 
 DataFrame partitioning API
 --------------------------

diff --git a/modin/core/execution/dask/implementations/pandas_on_dask/io/io.py b/modin/core/execution/dask/implementations/pandas_on_dask/io/io.py
@@ -31,6 +31,9 @@
     SQLDispatcher,
 )
 from modin.core.storage_formats.pandas.parsers import (
+    ExperimentalCustomTextParser,
+    ExperimentalPandasPickleParser,
+    PandasCSVGlobParser,
     PandasCSVParser,
     PandasExcelParser,
     PandasFeatherParser,
@@ -40,6 +43,12 @@
     PandasSQLParser,
 )
 from modin.core.storage_formats.pandas.query_compiler import PandasQueryCompiler
+from modin.experimental.core.io import (
+    ExperimentalCSVGlobDispatcher,
+    ExperimentalCustomTextDispatcher,
+    ExperimentalPickleDispatcher,
+    ExperimentalSQLDispatcher,
+)
 
 
 class PandasOnDaskIO(BaseIO):
@@ -74,5 +83,18 @@ def __make_write(*classes, build_args=build_args):
     to_sql = __make_write(SQLDispatcher)
     read_excel = __make_read(PandasExcelParser, ExcelDispatcher)
 
+    # experimental methods that don't exist in pandas
+    read_csv_glob = __make_read(PandasCSVGlobParser, ExperimentalCSVGlobDispatcher)
+    read_pickle_distributed = __make_read(
+        ExperimentalPandasPickleParser, ExperimentalPickleDispatcher
+    )
+    to_pickle_distributed = __make_write(ExperimentalPickleDispatcher)
+    read_custom_text = __make_read(
+        ExperimentalCustomTextParser, ExperimentalCustomTextDispatcher
+    )
+    read_sql_distributed = __make_read(
+        ExperimentalSQLDispatcher, build_args={**build_args, "base_read": read_sql}
+    )
+
     del __make_read  # to not pollute class namespace
     del __make_write  # to not pollute class namespace