Release version 0.13.3 - pin pypi and docs (#4334)

* Release version 0.13.3 * FIX-#4113, FIX-#4116, FIX-#4115: Apply new `black` formatting, fix pydocstyle check and readthedocs build (#4114) Co-authored-by: Alexey Prutskov <[email protected]> Signed-off-by: Vasilij Litvinov <[email protected]>
modin-project · Mar 17, 2022 · bac4031 · bac4031
1 parent 9670c1c
commit bac4031
Show file tree

Hide file tree

Showing 14 changed files with 46 additions and 43 deletions.
diff --git a/README.md b/README.md
@@ -6,8 +6,8 @@
 <a href='https://join.slack.com/t/modin-project/shared_invite/zt-yvk5hr3b-f08p_ulbuRWsAfg9rMY3uA'><img src='https://img.shields.io/static/v1?label=chat&logo=slack&message=Slack&color=brightgreen' alt='Slack'  align="center"/></a>
 <a href="https://codecov.io/gh/modin-project/modin"><img src="https://codecov.io/gh/modin-project/modin/branch/master/graph/badge.svg" align="center"/></a>
 <a href="https://github.com/modin-project/modin/actions"><img src="https://github.com/modin-project/modin/workflows/master/badge.svg" align="center"></a>
-<a href="https://modin.readthedocs.io/en/0.13.2/?badge=0.13.2"><img alt="" src="https://readthedocs.org/projects/modin/badge/?version=0.13.2" align="center"></a>
-<a href="https://pypi.org/project/modin/0.13.2/"><img alt="" src="https://img.shields.io/badge/pypi-0.13.2-blue.svg" align="center"></a>
+<a href="https://modin.readthedocs.io/en/0.13.3/?badge=0.13.3"><img alt="" src="https://readthedocs.org/projects/modin/badge/?version=0.13.3" align="center"></a>
+<a href="https://pypi.org/project/modin/0.13.3/"><img alt="" src="https://img.shields.io/badge/pypi-0.13.3-blue.svg" align="center"></a>
 <a href="https://modin.org/modin-bench/#/"><img src="https://img.shields.io/badge/benchmarked%20by-asv-blue.svg" align="center"></a>
 </p>
 

diff --git a/docs/requirements-doc.txt b/docs/requirements-doc.txt
@@ -13,3 +13,4 @@ git+https://github.com/modin-project/modin.git@master#egg=modin[all]
 sphinxcontrib_plantuml
 sphinx-issues
 xgboost
+pip
diff --git a/modin/core/storage_formats/pandas/parsers.py b/modin/core/storage_formats/pandas/parsers.py
@@ -519,14 +519,14 @@ def update_row_nums(match):
             """
             b = match.group(0)
             return re.sub(
-                br"\d+",
+                rb"\d+",
                 lambda c: str(int(c.group(0).decode("utf-8")) - _skiprows).encode(
                     "utf-8"
                 ),
                 b,
             )
 
-        bytes_data = re.sub(br'r="[A-Z]*\d+"', update_row_nums, bytes_data)
+        bytes_data = re.sub(rb'r="[A-Z]*\d+"', update_row_nums, bytes_data)
         bytesio = BytesIO(excel_header + bytes_data + footer)
         # Use openpyxl to read/parse sheet data
         reader = WorksheetReader(ws, bytesio, ex.shared_strings, False)

diff --git a/modin/experimental/core/execution/native/implementations/omnisci_on_native/omnisci_worker.py b/modin/experimental/core/execution/native/implementations/omnisci_on_native/omnisci_worker.py
@@ -203,8 +203,8 @@ def put_arrow_to_omnisci(cls, table, name=None):
             cpu_count = os.cpu_count()
             if cpu_count is not None:
                 fragment_size = table.num_rows // cpu_count
-                fragment_size = min(fragment_size, 2 ** 25)
-                fragment_size = max(fragment_size, 2 ** 18)
+                fragment_size = min(fragment_size, 2**25)
+                fragment_size = max(fragment_size, 2**18)
             else:
                 fragment_size = 0
         else:

diff --git a/modin/pandas/test/dataframe/test_binary.py b/modin/pandas/test/dataframe/test_binary.py
@@ -72,7 +72,7 @@ def test_math_functions(other, axis, op):
 
 @pytest.mark.parametrize(
     "other",
-    [lambda df: df[: -(2 ** 4)], lambda df: df[df.columns[0]].reset_index(drop=True)],
+    [lambda df: df[: -(2**4)], lambda df: df[df.columns[0]].reset_index(drop=True)],
     ids=["check_missing_value", "check_different_index"],
 )
 @pytest.mark.parametrize("fill_value", [None, 3.0])

diff --git a/modin/pandas/test/dataframe/test_join_sort.py b/modin/pandas/test/dataframe/test_join_sort.py
@@ -64,20 +64,20 @@ def test_combine(data):
     "test_data, test_data2",
     [
         (
-            np.random.uniform(0, 100, size=(2 ** 6, 2 ** 6)),
-            np.random.uniform(0, 100, size=(2 ** 7, 2 ** 6)),
+            np.random.uniform(0, 100, size=(2**6, 2**6)),
+            np.random.uniform(0, 100, size=(2**7, 2**6)),
         ),
         (
-            np.random.uniform(0, 100, size=(2 ** 7, 2 ** 6)),
-            np.random.uniform(0, 100, size=(2 ** 6, 2 ** 6)),
+            np.random.uniform(0, 100, size=(2**7, 2**6)),
+            np.random.uniform(0, 100, size=(2**6, 2**6)),
         ),
         (
-            np.random.uniform(0, 100, size=(2 ** 6, 2 ** 6)),
-            np.random.uniform(0, 100, size=(2 ** 6, 2 ** 7)),
+            np.random.uniform(0, 100, size=(2**6, 2**6)),
+            np.random.uniform(0, 100, size=(2**6, 2**7)),
         ),
         (
-            np.random.uniform(0, 100, size=(2 ** 6, 2 ** 7)),
-            np.random.uniform(0, 100, size=(2 ** 6, 2 ** 6)),
+            np.random.uniform(0, 100, size=(2**6, 2**7)),
+            np.random.uniform(0, 100, size=(2**6, 2**6)),
         ),
     ],
 )
@@ -162,20 +162,20 @@ def test_join(test_data, test_data2):
     "test_data, test_data2",
     [
         (
-            np.random.uniform(0, 100, size=(2 ** 6, 2 ** 6)),
-            np.random.uniform(0, 100, size=(2 ** 7, 2 ** 6)),
+            np.random.uniform(0, 100, size=(2**6, 2**6)),
+            np.random.uniform(0, 100, size=(2**7, 2**6)),
         ),
         (
-            np.random.uniform(0, 100, size=(2 ** 7, 2 ** 6)),
-            np.random.uniform(0, 100, size=(2 ** 6, 2 ** 6)),
+            np.random.uniform(0, 100, size=(2**7, 2**6)),
+            np.random.uniform(0, 100, size=(2**6, 2**6)),
         ),
         (
-            np.random.uniform(0, 100, size=(2 ** 6, 2 ** 6)),
-            np.random.uniform(0, 100, size=(2 ** 6, 2 ** 7)),
+            np.random.uniform(0, 100, size=(2**6, 2**6)),
+            np.random.uniform(0, 100, size=(2**6, 2**7)),
         ),
         (
-            np.random.uniform(0, 100, size=(2 ** 6, 2 ** 7)),
-            np.random.uniform(0, 100, size=(2 ** 6, 2 ** 6)),
+            np.random.uniform(0, 100, size=(2**6, 2**7)),
+            np.random.uniform(0, 100, size=(2**6, 2**6)),
         ),
     ],
 )

diff --git a/modin/pandas/test/dataframe/test_reduce.py b/modin/pandas/test/dataframe/test_reduce.py
@@ -165,12 +165,12 @@ def test_describe(data, percentiles):
 @pytest.mark.parametrize("datetime_is_numeric", [True, False, None])
 def test_2195(datetime_is_numeric, has_numeric_column):
     data = {
-        "categorical": pd.Categorical(["d"] * 10 ** 2),
-        "date": [np.datetime64("2000-01-01")] * 10 ** 2,
+        "categorical": pd.Categorical(["d"] * 10**2),
+        "date": [np.datetime64("2000-01-01")] * 10**2,
     }
 
     if has_numeric_column:
-        data.update({"numeric": [5] * 10 ** 2})
+        data.update({"numeric": [5] * 10**2})
 
     modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)
 

diff --git a/modin/pandas/test/test_concat.py b/modin/pandas/test/test_concat.py
@@ -149,7 +149,7 @@ def test_ignore_index_concat():
 
 
 def test_concat_non_subscriptable_keys():
-    frame_data = np.random.randint(0, 100, size=(2 ** 10, 2 ** 6))
+    frame_data = np.random.randint(0, 100, size=(2**10, 2**6))
     df = pd.DataFrame(frame_data).add_prefix("col")
     pdf = pandas.DataFrame(frame_data).add_prefix("col")
 

diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
@@ -85,7 +85,7 @@ def wrapper(obj1, obj2, *args, **kwargs):
 
 @pytest.mark.parametrize("as_index", [True, False])
 def test_mixed_dtypes_groupby(as_index):
-    frame_data = np.random.randint(97, 198, size=(2 ** 6, 2 ** 4))
+    frame_data = np.random.randint(97, 198, size=(2**6, 2**4))
     pandas_df = pandas.DataFrame(frame_data).add_prefix("col")
     # Convert every other column to string
     for col in pandas_df.iloc[
@@ -885,13 +885,13 @@ def test_simple_col_groupby():
 
 
 @pytest.mark.parametrize(
-    "by", [np.random.randint(0, 100, size=2 ** 8), lambda x: x % 3, None]
+    "by", [np.random.randint(0, 100, size=2**8), lambda x: x % 3, None]
 )
 @pytest.mark.parametrize("as_index_series_or_dataframe", [0, 1, 2])
 def test_series_groupby(by, as_index_series_or_dataframe):
     if as_index_series_or_dataframe <= 1:
         as_index = as_index_series_or_dataframe == 1
-        series_data = np.random.randint(97, 198, size=2 ** 8)
+        series_data = np.random.randint(97, 198, size=2**8)
         modin_series = pd.Series(series_data)
         pandas_series = pandas.Series(series_data)
     else:
@@ -1292,7 +1292,7 @@ def eval_shift(modin_groupby, pandas_groupby):
 
 
 def test_groupby_on_index_values_with_loop():
-    length = 2 ** 6
+    length = 2**6
     data = {
         "a": np.random.randint(0, 100, size=length),
         "b": np.random.randint(0, 100, size=length),
@@ -1332,7 +1332,7 @@ def test_groupby_on_index_values_with_loop():
     ],
 )
 def test_groupby_multiindex(groupby_kwargs):
-    frame_data = np.random.randint(0, 100, size=(2 ** 6, 2 ** 4))
+    frame_data = np.random.randint(0, 100, size=(2**6, 2**4))
     modin_df = pd.DataFrame(frame_data)
     pandas_df = pandas.DataFrame(frame_data)
 

diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
@@ -3474,7 +3474,7 @@ def sort_sensitive_comparator(df1, df2):
     )
 
     # from issue #2365
-    arr = np.random.rand(2 ** 6)
+    arr = np.random.rand(2**6)
     arr[::10] = np.nan
     eval_general(
         *create_test_series(arr),

diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py
@@ -42,9 +42,9 @@
 random_state = np.random.RandomState(seed=42)
 
 DATASET_SIZE_DICT = {
-    "Small": (2 ** 2, 2 ** 3),
-    "Normal": (2 ** 6, 2 ** 8),
-    "Big": (2 ** 7, 2 ** 12),
+    "Small": (2**2, 2**3),
+    "Normal": (2**6, 2**8),
+    "Big": (2**7, 2**12),
 }
 
 # Size of test dataframes

diff --git a/modin/test/test_envvar_npartitions.py b/modin/test/test_envvar_npartitions.py
@@ -21,7 +21,7 @@
 @pytest.mark.parametrize("num_partitions", [2, 4, 6, 8, 10])
 def test_set_npartitions(num_partitions):
     NPartitions.put(num_partitions)
-    data = np.random.randint(0, 100, size=(2 ** 16, 2 ** 8))
+    data = np.random.randint(0, 100, size=(2**16, 2**8))
     df = pd.DataFrame(data)
     part_shape = df._query_compiler._modin_frame._partitions.shape
     assert part_shape[0] == num_partitions and part_shape[1] == min(num_partitions, 8)
@@ -31,7 +31,7 @@ def test_set_npartitions(num_partitions):
 @pytest.mark.parametrize("right_num_partitions", [2, 4, 6, 8, 10])
 def test_runtime_change_npartitions(left_num_partitions, right_num_partitions):
     NPartitions.put(left_num_partitions)
-    data = np.random.randint(0, 100, size=(2 ** 16, 2 ** 8))
+    data = np.random.randint(0, 100, size=(2**16, 2**8))
     left_df = pd.DataFrame(data)
     part_shape = left_df._query_compiler._modin_frame._partitions.shape
     assert part_shape[0] == left_num_partitions and part_shape[1] == min(

diff --git a/modin/test/test_partition_api.py b/modin/test/test_partition_api.py
@@ -55,7 +55,7 @@
 
 @pytest.mark.parametrize("axis", [None, 0, 1])
 def test_unwrap_partitions(axis):
-    data = np.random.randint(0, 100, size=(2 ** 16, 2 ** 8))
+    data = np.random.randint(0, 100, size=(2**16, 2**8))
     df = pd.DataFrame(data)
 
     if axis is None:
@@ -108,8 +108,8 @@ def test_unwrap_partitions(axis):
 @pytest.mark.parametrize("index", [None, "index"])
 @pytest.mark.parametrize("axis", [None, 0, 1])
 def test_from_partitions(axis, index, columns, row_lengths, column_widths):
-    num_rows = 2 ** 16
-    num_cols = 2 ** 8
+    num_rows = 2**16
+    num_cols = 2**8
     data = np.random.randint(0, 100, size=(num_rows, num_cols))
     df1, df2 = pandas.DataFrame(data), pandas.DataFrame(data)
     expected_df = pandas.concat([df1, df2], axis=1 if axis is None else axis)
@@ -157,8 +157,8 @@ def test_from_partitions(axis, index, columns, row_lengths, column_widths):
 @pytest.mark.parametrize("index", ["original_idx", "new_idx"])
 @pytest.mark.parametrize("axis", [None, 0, 1])
 def test_from_partitions_mismatched_labels(axis, index, columns):
-    num_rows = 2 ** 16
-    num_cols = 2 ** 8
+    num_rows = 2**16
+    num_cols = 2**8
     expected_df = pd.DataFrame(np.random.randint(0, 100, size=(num_rows, num_cols)))
     partitions = unwrap_partitions(expected_df, axis=axis)
 

diff --git a/scripts/doc_checker.py b/scripts/doc_checker.py
@@ -45,6 +45,8 @@
         )
 if not hasattr(sys.modules["cudf"], "DataFrame"):
     sys.modules["cudf"].DataFrame = type("DataFrame", (object,), {})
+if not hasattr(sys.modules["cupy"], "ndarray"):
+    sys.modules["cupy"].ndarray = type("ndarray", (object,), {})
 
 logging.basicConfig(
     stream=sys.stdout, format="%(levelname)s:%(message)s", level=logging.INFO
-Original file line number
+Diff line change
@@ Expand Up @@
     sphinxcontrib_plantuml
     sphinx-issues
     xgboost
+    pip