Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed Failing tests in arithmetic_ops_tests for Spark 4.0.0 [databricks] #11044

Merged
merged 9 commits into from
Jun 25, 2024
54 changes: 27 additions & 27 deletions integration_tests/src/main/python/arithmetic_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_error, assert_gpu_fallback_collect, assert_gpu_and_cpu_are_equal_sql
from data_gen import *
from marks import ignore_order, incompat, approximate_float, allow_non_gpu, datagen_overrides, ansi_mode_disabled
from marks import ignore_order, incompat, approximate_float, allow_non_gpu, datagen_overrides, disable_ansi_mode
from pyspark.sql.types import *
from pyspark.sql.types import IntegralType
from spark_session import *
Expand Down Expand Up @@ -99,7 +99,7 @@ def _get_overflow_df(spark, data, data_type, expr):
).selectExpr(expr)

@pytest.mark.parametrize('data_gen', _arith_data_gens, ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_addition(data_gen):
data_type = data_gen.data_type
assert_gpu_and_cpu_are_equal_collect(
Expand All @@ -124,7 +124,7 @@ def test_addition_ansi_no_overflow(data_gen):
conf=ansi_enabled_conf)

@pytest.mark.parametrize('data_gen', _arith_data_gens, ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_subtraction(data_gen):
data_type = data_gen.data_type
assert_gpu_and_cpu_are_equal_collect(
Expand All @@ -142,7 +142,7 @@ def test_subtraction(data_gen):
DecimalGen(10, -2), DecimalGen(15, 3), DecimalGen(30, 12), DecimalGen(3, -3),
DecimalGen(27, 7), DecimalGen(20, -3)], ids=idfn)
@pytest.mark.parametrize('addOrSub', ['+', '-'])
@ansi_mode_disabled
@disable_ansi_mode
def test_addition_subtraction_mixed(lhs, rhs, addOrSub):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : two_col_df(spark, lhs, rhs).selectExpr(f"a {addOrSub} b")
Expand All @@ -167,7 +167,7 @@ def test_subtraction_ansi_no_overflow(data_gen):
_decimal_gen_38_10,
_decimal_gen_38_neg10
], ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_multiplication(data_gen):
data_type = data_gen.data_type
assert_gpu_and_cpu_are_equal_collect(
Expand Down Expand Up @@ -211,7 +211,7 @@ def test_multiplication_ansi_overflow():
@pytest.mark.parametrize('rhs', [byte_gen, short_gen, int_gen, long_gen, DecimalGen(6, 3),
DecimalGen(10, -2), DecimalGen(15, 3), DecimalGen(30, 12), DecimalGen(3, -3),
DecimalGen(27, 7), DecimalGen(20, -3)], ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_multiplication_mixed(lhs, rhs):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : two_col_df(spark, lhs, rhs).select(
Expand All @@ -229,7 +229,7 @@ def test_float_multiplication_mixed(lhs, rhs):
@pytest.mark.parametrize('data_gen', [double_gen, decimal_gen_32bit_neg_scale, DecimalGen(6, 3),
DecimalGen(5, 5), DecimalGen(6, 0), DecimalGen(7, 4), DecimalGen(15, 0), DecimalGen(18, 0),
DecimalGen(17, 2), DecimalGen(16, 4), DecimalGen(38, 21), DecimalGen(21, 17), DecimalGen(3, -2)], ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_division(data_gen):
data_type = data_gen.data_type
assert_gpu_and_cpu_are_equal_collect(
Expand All @@ -242,7 +242,7 @@ def test_division(data_gen):

@pytest.mark.parametrize('rhs', [byte_gen, short_gen, int_gen, long_gen, DecimalGen(4, 1), DecimalGen(5, 0), DecimalGen(5, 1), DecimalGen(10, 5)], ids=idfn)
@pytest.mark.parametrize('lhs', [byte_gen, short_gen, int_gen, long_gen, DecimalGen(5, 3), DecimalGen(4, 2), DecimalGen(1, -2), DecimalGen(16, 1)], ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_division_mixed(lhs, rhs):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : two_col_df(spark, lhs, rhs).select(
Expand All @@ -253,14 +253,14 @@ def test_division_mixed(lhs, rhs):
# instead of increasing the precision. So we have a second test that deals with a few of these use cases
@pytest.mark.parametrize('rhs', [DecimalGen(30, 10), DecimalGen(28, 18)], ids=idfn)
@pytest.mark.parametrize('lhs', [DecimalGen(27, 7), DecimalGen(20, -3)], ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_division_mixed_larger_dec(lhs, rhs):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : two_col_df(spark, lhs, rhs).select(
f.col('a'), f.col('b'),
f.col('a') / f.col('b')))

@ansi_mode_disabled
@disable_ansi_mode
def test_special_decimal_division():
for precision in range(1, 39):
for scale in range(-3, precision + 1):
Expand All @@ -273,7 +273,7 @@ def test_special_decimal_division():
@approximate_float # we should get the perfectly correct answer for floats except when casting a decimal to a float in some corner cases.
@pytest.mark.parametrize('rhs', [float_gen, double_gen], ids=idfn)
@pytest.mark.parametrize('lhs', [DecimalGen(5, 3), DecimalGen(4, 2), DecimalGen(1, -2), DecimalGen(16, 1)], ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_float_division_mixed(lhs, rhs):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : two_col_df(spark, lhs, rhs).select(
Expand All @@ -283,7 +283,7 @@ def test_float_division_mixed(lhs, rhs):
@pytest.mark.parametrize('data_gen', integral_gens + [
decimal_gen_32bit, decimal_gen_64bit, _decimal_gen_7_7, _decimal_gen_18_3, _decimal_gen_30_2,
_decimal_gen_36_5, _decimal_gen_38_0], ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_int_division(data_gen):
string_type = to_cast_string(data_gen.data_type)
assert_gpu_and_cpu_are_equal_collect(
Expand All @@ -297,14 +297,14 @@ def test_int_division(data_gen):
@pytest.mark.parametrize('lhs', [DecimalGen(6, 5), DecimalGen(5, 4), DecimalGen(3, -2), _decimal_gen_30_2], ids=idfn)
@pytest.mark.parametrize('rhs', [DecimalGen(13, 2), DecimalGen(6, 3), _decimal_gen_38_0,
pytest.param(_decimal_gen_36_neg5, marks=pytest.mark.skipif(not is_before_spark_340() or is_databricks113_or_later(), reason='SPARK-41207'))], ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_int_division_mixed(lhs, rhs):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : two_col_df(spark, lhs, rhs).selectExpr(
'a DIV b'))

@pytest.mark.parametrize('data_gen', _arith_data_gens, ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_mod(data_gen):
data_type = data_gen.data_type
assert_gpu_and_cpu_are_equal_collect(
Expand All @@ -325,7 +325,7 @@ def test_mod(data_gen):
_decimal_gen_7_7]

@pytest.mark.parametrize('data_gen', _pmod_gens, ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_pmod(data_gen):
string_type = to_cast_string(data_gen.data_type)
assert_gpu_and_cpu_are_equal_collect(
Expand All @@ -339,7 +339,7 @@ def test_pmod(data_gen):

@allow_non_gpu("ProjectExec", "Pmod")
@pytest.mark.parametrize('data_gen', test_pmod_fallback_decimal_gens + [_decimal_gen_38_0, _decimal_gen_38_10], ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_pmod_fallback(data_gen):
string_type = to_cast_string(data_gen.data_type)
assert_gpu_fallback_collect(
Expand Down Expand Up @@ -431,23 +431,23 @@ def test_mod_pmod_by_zero_not_ansi(data_gen):
@pytest.mark.parametrize('rhs', [byte_gen, short_gen, int_gen, long_gen, DecimalGen(6, 3),
DecimalGen(10, -2), DecimalGen(15, 3), DecimalGen(30, 12), DecimalGen(3, -3),
DecimalGen(27, 7), DecimalGen(20, -3)], ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_mod_mixed(lhs, rhs):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : two_col_df(spark, lhs, rhs).selectExpr(f"a % b"))

# @pytest.mark.skipif(not is_databricks113_or_later() and not is_spark_340_or_later(), reason="https://github.com/NVIDIA/spark-rapids/issues/8330")
@pytest.mark.parametrize('lhs', [DecimalGen(38,0), DecimalGen(37,2), DecimalGen(38,5), DecimalGen(38,-10), DecimalGen(38,7)], ids=idfn)
@pytest.mark.parametrize('rhs', [DecimalGen(27,7), DecimalGen(30,10), DecimalGen(38,1), DecimalGen(36,0), DecimalGen(28,-7)], ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_mod_mixed_decimal128(lhs, rhs):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : two_col_df(spark, lhs, rhs).selectExpr("a", "b", f"a % b"))

# Split into 4 tests to permute https://github.com/NVIDIA/spark-rapids/issues/7553 failures
@pytest.mark.parametrize('lhs', [byte_gen, short_gen, int_gen, long_gen], ids=idfn)
@pytest.mark.parametrize('rhs', [byte_gen, short_gen, int_gen, long_gen], ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_pmod_mixed_numeric(lhs, rhs):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : two_col_df(spark, lhs, rhs).selectExpr(f"pmod(a, b)"))
Expand All @@ -457,7 +457,7 @@ def test_pmod_mixed_numeric(lhs, rhs):
DecimalGen(4, 2), DecimalGen(3, -2), DecimalGen(16, 7), DecimalGen(19, 0), DecimalGen(30, 10)
], ids=idfn)
@pytest.mark.parametrize('rhs', [byte_gen, short_gen, int_gen, long_gen], ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_pmod_mixed_decimal_lhs(lhs, rhs):
assert_gpu_fallback_collect(
lambda spark : two_col_df(spark, lhs, rhs).selectExpr(f"pmod(a, b)"),
Expand All @@ -468,7 +468,7 @@ def test_pmod_mixed_decimal_lhs(lhs, rhs):
@pytest.mark.parametrize('rhs', [DecimalGen(6, 3), DecimalGen(10, -2), DecimalGen(15, 3),
DecimalGen(30, 12), DecimalGen(3, -3), DecimalGen(27, 7), DecimalGen(20, -3)
], ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_pmod_mixed_decimal_rhs(lhs, rhs):
assert_gpu_fallback_collect(
lambda spark : two_col_df(spark, lhs, rhs).selectExpr(f"pmod(a, b)"),
Expand All @@ -481,7 +481,7 @@ def test_pmod_mixed_decimal_rhs(lhs, rhs):
@pytest.mark.parametrize('rhs', [DecimalGen(6, 3), DecimalGen(10, -2), DecimalGen(15, 3),
DecimalGen(30, 12), DecimalGen(3, -3), DecimalGen(27, 7), DecimalGen(20, -3)
], ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_pmod_mixed_decimal(lhs, rhs):
assert_gpu_fallback_collect(
lambda spark : two_col_df(spark, lhs, rhs).selectExpr(f"pmod(a, b)"),
Expand All @@ -493,7 +493,7 @@ def test_signum(data_gen):
lambda spark : unary_op_df(spark, data_gen).selectExpr('signum(a)'))

@pytest.mark.parametrize('data_gen', numeric_gens + _arith_decimal_gens_low_precision, ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_unary_minus(data_gen):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).selectExpr('-a'))
Expand Down Expand Up @@ -543,7 +543,7 @@ def test_unary_positive(data_gen):
lambda spark : unary_op_df(spark, data_gen).selectExpr('+a'))

@pytest.mark.parametrize('data_gen', numeric_gens + _arith_decimal_gens_low_precision, ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_abs(data_gen):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).selectExpr('abs(a)'))
Expand Down Expand Up @@ -706,7 +706,7 @@ def test_shift_right_unsigned(data_gen):
@approximate_float
@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/9350")
@pytest.mark.parametrize('data_gen', _arith_data_gens_for_round, ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_decimal_bround(data_gen):
assert_gpu_and_cpu_are_equal_collect(
lambda spark: unary_op_df(spark, data_gen).selectExpr(
Expand All @@ -721,7 +721,7 @@ def test_decimal_bround(data_gen):
@approximate_float
@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/9847")
@pytest.mark.parametrize('data_gen', _arith_data_gens_for_round, ids=idfn)
@ansi_mode_disabled
@disable_ansi_mode
def test_decimal_round(data_gen):
assert_gpu_and_cpu_are_equal_collect(
lambda spark: unary_op_df(spark, data_gen).selectExpr(
Expand Down Expand Up @@ -756,7 +756,7 @@ def doit(spark):

@incompat
@approximate_float
@ansi_mode_disabled
@disable_ansi_mode
def test_non_decimal_round_overflow():
gen = StructGen([('byte_c', byte_gen), ('short_c', short_gen),
('int_c', int_gen), ('long_c', long_gen),
Expand Down
4 changes: 2 additions & 2 deletions integration_tests/src/main/python/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def is_allowing_any_non_gpu():
def get_non_gpu_allowed():
return _non_gpu_allowed

def get_per_test_ansi_mode():
def is_per_test_ansi_mode_disabled():
return _per_test_ansi_mode

def get_validate_execs_in_gpu_plan():
Expand Down Expand Up @@ -219,7 +219,7 @@ def pytest_runtest_setup(item):
_allow_any_non_gpu_databricks = False
non_gpu_databricks = item.get_closest_marker('allow_non_gpu_databricks')
non_gpu = item.get_closest_marker('allow_non_gpu')
per_test_ansi_mode = item.get_closest_marker('ansi_mode_disabled')
per_test_ansi_mode = item.get_closest_marker('disable_ansi_mode')
if per_test_ansi_mode:
_per_test_ansi_mode = "false"

Expand Down
2 changes: 1 addition & 1 deletion integration_tests/src/main/python/marks.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

allow_non_gpu_databricks = pytest.mark.allow_non_gpu_databricks
allow_non_gpu = pytest.mark.allow_non_gpu
ansi_mode_disabled = pytest.mark.ansi_mode_disabled
disable_ansi_mode = pytest.mark.disable_ansi_mode
validate_execs_in_gpu_plan = pytest.mark.validate_execs_in_gpu_plan
approximate_float = pytest.mark.approximate_float
ignore_order = pytest.mark.ignore_order
Expand Down
6 changes: 3 additions & 3 deletions integration_tests/src/main/python/spark_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import calendar, time
from datetime import date, datetime
from contextlib import contextmanager, ExitStack
from conftest import is_allowing_any_non_gpu, get_non_gpu_allowed, get_validate_execs_in_gpu_plan, is_databricks_runtime, is_at_least_precommit_run, get_inject_oom_conf, get_per_test_ansi_mode
from conftest import is_allowing_any_non_gpu, get_non_gpu_allowed, get_validate_execs_in_gpu_plan, is_databricks_runtime, is_at_least_precommit_run, get_inject_oom_conf, is_per_test_ansi_mode_disabled
from pyspark.sql import DataFrame
from pyspark.sql.types import TimestampType, DateType, _acceptable_types
from spark_init_internal import get_spark_i_know_what_i_am_doing, spark_version
Expand Down Expand Up @@ -127,8 +127,8 @@ def with_spark_session(func, conf={}):
reset_spark_session_conf()
_add_job_description(conf)
# Only set the ansi conf if not set by the test explicitly by setting the value in the dict
if "spark.sql.ansi.enabled" not in conf and get_per_test_ansi_mode() is not None:
conf["spark.sql.ansi.enabled"] = get_per_test_ansi_mode()
if "spark.sql.ansi.enabled" not in conf and is_per_test_ansi_mode_disabled() is not None:
conf["spark.sql.ansi.enabled"] = is_per_test_ansi_mode_disabled()
razajafri marked this conversation as resolved.
Show resolved Hide resolved
_set_all_confs(conf)
ret = func(_spark)
_check_for_proper_return_values(ret)
Expand Down