Skip to content

Commit

Permalink
Fix string_test for [databricks] 14.3
Browse files Browse the repository at this point in the history
Fixes NVIDIA#11535.

This commit fixes the failure of `test_startswith`
and `test_endswith` in `string_test.py` on Databricks 14.3.

This is, in effect, a follow-on to the changes introduced in NVIDIA#11247,
where `test_endswith` was skipped for Databricks 4.0, on account of
https://issues.apache.org/jira/browse/SPARK-48995.  It appears that
that bug afflicts Databricks 14.3 as well.

This commit handles both `test_startswith` and `test_endswith` for
Databricks 14.3 and Apache Spark 4.0.

Signed-off-by: MithunR <[email protected]>
  • Loading branch information
mythrocks committed Oct 28, 2024
1 parent b653ce2 commit 7807452
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions integration_tests/src/main/python/string_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from pyspark.sql.types import *
import pyspark.sql.utils
import pyspark.sql.functions as f
from spark_session import with_cpu_session, with_gpu_session, is_databricks104_or_later, is_before_spark_320, is_before_spark_400
from spark_session import with_cpu_session, with_gpu_session, is_databricks104_or_later, is_databricks_version_or_later, is_before_spark_320, is_spark_400_or_later

_regexp_conf = { 'spark.rapids.sql.regexp.enabled': 'true' }

Expand Down Expand Up @@ -104,7 +104,7 @@ def test_substring_index(data_gen,delim):


@allow_non_gpu('ProjectExec')
@pytest.mark.skipif(condition=not is_before_spark_400(),
@pytest.mark.skipif(condition=is_spark_400_or_later(),
reason="Bug in Apache Spark 4.0 causes NumberFormatExceptions from substring_index(), "
"if called with index==null. For further information, see: "
"https://issues.apache.org/jira/browse/SPARK-48989.")
Expand Down Expand Up @@ -327,6 +327,10 @@ def test_rtrim(data_gen):
'TRIM(TRAILING NULL FROM a)',
'TRIM(TRAILING "" FROM a)'))

@pytest.mark.skipif(condition=is_spark_400_or_later() or is_databricks_version_or_later(14, 3),
reason="startsWith(None)/endswith(None) seems to cause an NPE in Column.fn() on Apache Spark 4.0, "
"and Databricks 14.3."
"See https://issues.apache.org/jira/browse/SPARK-48995.")
def test_startswith():
gen = mk_str_gen('[Ab\ud720]{3}A.{0,3}Z[Ab\ud720]{3}')
assert_gpu_and_cpu_are_equal_collect(
Expand All @@ -351,8 +355,9 @@ def assert_gpu_did_fallback(op):
assert_gpu_did_fallback(f.col("a").startswith(f.col("a")))


@pytest.mark.skipif(condition=not is_before_spark_400(),
reason="endswith(None) seems to cause an NPE in Column.fn() on Apache Spark 4.0. "
@pytest.mark.skipif(condition=is_spark_400_or_later() or is_databricks_version_or_later(14, 3),
reason="startsWith(None)/endswith(None) seems to cause an NPE in Column.fn() on Apache Spark 4.0, "
"and Databricks 14.3."
"See https://issues.apache.org/jira/browse/SPARK-48995.")
def test_endswith():
gen = mk_str_gen('[Ab\ud720]{3}A.{0,3}Z[Ab\ud720]{3}')
Expand Down

0 comments on commit 7807452

Please sign in to comment.