From 240923c2750e7c87d8d28286e25a80cfe3b08494 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 4 Apr 2024 12:34:00 -0700 Subject: [PATCH] [SPARK-46812][PYTHON][TESTS][FOLLOWUP] Check should_test_connect and pyarrow to skip tests ### What changes were proposed in this pull request? This is a follow-up of SPARK-46812 to skip the tests more robustly and to recover PyPy CIs. - https://github.com/apache/spark/actions/runs/8556900899/job/23447948557 ### Why are the changes needed? - `should_test_connect` covers more edge cases than `have_pandas`. - `test_resources.py` has Arrow usage too. https://github.com/apache/spark/blob/25fc67fa114d2c34099c3ab50396870f543c338b/python/pyspark/resource/tests/test_resources.py#L85 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually tests with `pandas` and without `pyarrow`. ``` $ pip3 freeze | grep pyarrow $ pip3 freeze | grep pandas pandas==2.2.1 pandas-stubs==1.2.0.53 $ python/run-tests --modules=pyspark-resource --parallelism=1 --python-executables=python3.10 Running PySpark tests. Output is in /Users/dongjoon/APACHE/spark-merge/python/unit-tests.log Will test against the following Python executables: ['python3.10'] Will test the following Python modules: ['pyspark-resource'] python3.10 python_implementation is CPython python3.10 version is: Python 3.10.13 Starting test(python3.10): pyspark.resource.profile (temp output: /Users/dongjoon/APACHE/spark-merge/python/target/db9cb886-2698-49d9-a663-9b8bea79caba/python3.10__pyspark.resource.profile__8mg46xru.log) Finished test(python3.10): pyspark.resource.profile (1s) Starting test(python3.10): pyspark.resource.tests.test_connect_resources (temp output: /Users/dongjoon/APACHE/spark-merge/python/target/53f979bd-1073-41e6-99ba-8e787edc415b/python3.10__pyspark.resource.tests.test_connect_resources__hrgrs5sk.log) Finished test(python3.10): pyspark.resource.tests.test_connect_resources (0s) ... 1 tests were skipped Starting test(python3.10): pyspark.resource.tests.test_resources (temp output: /Users/dongjoon/APACHE/spark-merge/python/target/2b06c671-0199-4827-a0e5-f852a28313fd/python3.10__pyspark.resource.tests.test_resources__jis6mk9a.log) Finished test(python3.10): pyspark.resource.tests.test_resources (2s) ... 1 tests were skipped Tests passed in 4 seconds Skipped tests in pyspark.resource.tests.test_connect_resources with python3.10: test_profile_before_sc_for_connect (pyspark.resource.tests.test_connect_resources.ResourceProfileTests) ... skip (0.002s) Skipped tests in pyspark.resource.tests.test_resources with python3.10: test_profile_before_sc_for_sql (pyspark.resource.tests.test_resources.ResourceProfileTests) ... skip (0.001s) ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #45880 from dongjoon-hyun/SPARK-46812-2. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../resource/tests/test_connect_resources.py | 7 +++++-- python/pyspark/resource/tests/test_resources.py | 13 +++++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/python/pyspark/resource/tests/test_connect_resources.py b/python/pyspark/resource/tests/test_connect_resources.py index 40c68029a1535..1529a33cb0ad0 100644 --- a/python/pyspark/resource/tests/test_connect_resources.py +++ b/python/pyspark/resource/tests/test_connect_resources.py @@ -18,10 +18,13 @@ from pyspark.resource import ResourceProfileBuilder, TaskResourceRequests, ExecutorResourceRequests from pyspark.sql import SparkSession -from pyspark.testing.sqlutils import have_pandas, pandas_requirement_message +from pyspark.testing.connectutils import ( + should_test_connect, + connect_requirement_message, +) -@unittest.skipIf(not have_pandas, pandas_requirement_message) +@unittest.skipIf(not should_test_connect, connect_requirement_message) class ResourceProfileTests(unittest.TestCase): def test_profile_before_sc_for_connect(self): rpb = ResourceProfileBuilder() diff --git a/python/pyspark/resource/tests/test_resources.py b/python/pyspark/resource/tests/test_resources.py index 6f61d5af2d926..e29a77ed36dda 100644 --- a/python/pyspark/resource/tests/test_resources.py +++ b/python/pyspark/resource/tests/test_resources.py @@ -15,10 +15,16 @@ # limitations under the License. # import unittest +from typing import cast from pyspark.resource import ExecutorResourceRequests, ResourceProfileBuilder, TaskResourceRequests from pyspark.sql import SparkSession -from pyspark.testing.sqlutils import have_pandas, pandas_requirement_message +from pyspark.testing.sqlutils import ( + have_pandas, + have_pyarrow, + pandas_requirement_message, + pyarrow_requirement_message, +) class ResourceProfileTests(unittest.TestCase): @@ -72,7 +78,10 @@ def assert_request_contents(exec_reqs, task_reqs): assert_request_contents(rp3.executorResources, rp3.taskResources) sc.stop() - @unittest.skipIf(not have_pandas, pandas_requirement_message) + @unittest.skipIf( + not have_pandas or not have_pyarrow, + cast(str, pandas_requirement_message or pyarrow_requirement_message), + ) def test_profile_before_sc_for_sql(self): rpb = ResourceProfileBuilder() treqs = TaskResourceRequests().cpus(2)