From b8bd9601f1464ff38619ac8d5ddde1b21f94c11e Mon Sep 17 00:00:00 2001 From: MithunR Date: Wed, 28 Aug 2024 21:43:51 -0700 Subject: [PATCH 1/7] Fix collection_ops_tests for Spark 4.0. Fixes #11011. This commit fixes the failures in `collection_ops_tests` on Spark 4.0. On all versions of Spark, when a Sequence is collected with rows that exceed MAX_INT, an exception is thrown indicating that the collected Sequence/array is larger than permissible. The different versions of Spark vary in the contents of the exception message. On Spark 4, one sees that the error message now contains more information than all prior versions, including: 1. The name of the op causing the error 2. The errant sequence size This commit introduces a shim to make this new information available in the exception. Note that this shim does not fit cleanly in RapidsErrorUtils, because there are differences within major Spark versions. For instance, Spark 3.4.0-1 have a different message as compared to 3.4.2 and 3.4.3. Likewise, the differences in 3.5.0, 3.5.1, 3.5.2. Signed-off-by: MithunR --- .../src/main/python/collection_ops_test.py | 11 ++++-- .../sql/rapids/collectionOperations.scala | 13 ++++--- .../spark/rapids/shims/GetSequenceSize.scala | 7 +++- .../spark/rapids/shims/GetSequenceSize.scala | 5 +-- .../sql/rapids/shims/SequenceSizeError.scala | 35 +++++++++++++++++++ .../sql/rapids/shims/SequenceSizeError.scala | 28 +++++++++++++++ 6 files changed, 89 insertions(+), 10 deletions(-) create mode 100644 sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala create mode 100644 sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala diff --git a/integration_tests/src/main/python/collection_ops_test.py b/integration_tests/src/main/python/collection_ops_test.py index 099eb28c053..9731caba78b 100644 --- a/integration_tests/src/main/python/collection_ops_test.py +++ b/integration_tests/src/main/python/collection_ops_test.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,6 +17,8 @@ from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_error from data_gen import * from pyspark.sql.types import * + +from src.main.python.spark_session import is_before_spark_400 from string_test import mk_str_gen import pyspark.sql.functions as f import pyspark.sql.utils @@ -326,8 +328,11 @@ def test_sequence_illegal_boundaries(start_gen, stop_gen, step_gen): @pytest.mark.parametrize('stop_gen', sequence_too_long_length_gens, ids=idfn) @allow_non_gpu(*non_utc_allow) def test_sequence_too_long_sequence(stop_gen): - msg = "Too long sequence" if is_before_spark_334() or (not is_before_spark_340() and is_before_spark_342()) \ - or is_spark_350() else "Unsuccessful try to create array with" + msg = "Too long sequence" if is_before_spark_334() \ + or (not is_before_spark_340() and is_before_spark_342()) \ + or is_spark_350() \ + else "Can't create array" if not is_before_spark_400() \ + else "Unsuccessful try to create array with" assert_gpu_and_cpu_error( # To avoid OOM, reduce the row number to 1, it is enough to verify this case. lambda spark:unary_op_df(spark, stop_gen, 1).selectExpr( diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala index 7543d113bfb..51590bbde28 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.rapids import java.util.Optional import ai.rapids.cudf -import ai.rapids.cudf.{BinaryOp, ColumnVector, ColumnView, DType, Scalar, SegmentedReductionAggregation, Table} +import ai.rapids.cudf.{BinaryOp, ColumnVector, ColumnView, DType, ReductionAggregation, Scalar, SegmentedReductionAggregation, Table} import com.nvidia.spark.rapids._ import com.nvidia.spark.rapids.Arm._ import com.nvidia.spark.rapids.ArrayIndexUtils.firstIndexAndNumElementUnchecked @@ -1535,7 +1535,8 @@ object GpuSequenceUtil { def computeSequenceSize( start: ColumnVector, stop: ColumnVector, - step: ColumnVector): ColumnVector = { + step: ColumnVector, + functionName: String): ColumnVector = { checkSequenceInputs(start, stop, step) val actualSize = GetSequenceSize(start, stop, step) val sizeAsLong = withResource(actualSize) { _ => @@ -1557,7 +1558,11 @@ object GpuSequenceUtil { // check max size withResource(Scalar.fromInt(MAX_ROUNDED_ARRAY_LENGTH)) { maxLen => withResource(sizeAsLong.lessOrEqualTo(maxLen)) { allValid => - require(isAllValidTrue(allValid), GetSequenceSize.TOO_LONG_SEQUENCE) + withResource(sizeAsLong.reduce(ReductionAggregation.max())) { maxSizeScalar => + require(isAllValidTrue(allValid), + GetSequenceSize.TOO_LONG_SEQUENCE(maxSizeScalar.getLong.asInstanceOf[Int], + functionName)) + } } } // cast to int and return @@ -1597,7 +1602,7 @@ case class GpuSequence(start: Expression, stop: Expression, stepOpt: Option[Expr val steps = stepGpuColOpt.map(_.getBase.incRefCount()) .getOrElse(defaultStepsFunc(startCol, stopCol)) closeOnExcept(steps) { _ => - (computeSequenceSize(startCol, stopCol, steps), steps) + (computeSequenceSize(startCol, stopCol, steps, prettyName), steps) } } diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala index 32ca03974bf..e00aa26baad 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala @@ -42,7 +42,12 @@ import com.nvidia.spark.rapids.Arm._ import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH object GetSequenceSize { - val TOO_LONG_SEQUENCE = s"Too long sequence found. Should be <= $MAX_ROUNDED_ARRAY_LENGTH" + def TOO_LONG_SEQUENCE(sequenceLength: Int, functionName: String) = { + // For these Spark versions, the sequence length and function name + // do not appear in the exception message. + s"Too long sequence found. Should be <= $MAX_ROUNDED_ARRAY_LENGTH" + } + /** * Compute the size of each sequence according to 'start', 'stop' and 'step'. * A row (Row[start, stop, step]) contains at least one null element will produce diff --git a/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala b/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala index aba0f465483..93956610933 100644 --- a/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala +++ b/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala @@ -28,11 +28,12 @@ import ai.rapids.cudf._ import com.nvidia.spark.rapids.Arm._ import org.apache.spark.sql.rapids.{AddOverflowChecks, SubtractOverflowChecks} +import org.apache.spark.sql.rapids.shims.SequenceSizeError import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH object GetSequenceSize { - val TOO_LONG_SEQUENCE = "Unsuccessful try to create array with elements exceeding the array " + - s"size limit $MAX_ROUNDED_ARRAY_LENGTH" + def TOO_LONG_SEQUENCE(sequenceLength: Int, functionName: String): String = + SequenceSizeError.getTooLongSequenceErrorString(sequenceLength, functionName) /** * Compute the size of each sequence according to 'start', 'stop' and 'step'. * A row (Row[start, stop, step]) contains at least one null element will produce diff --git a/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala b/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala new file mode 100644 index 00000000000..eadcc497318 --- /dev/null +++ b/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "334"} +{"spark": "342"} +{"spark": "343"} +{"spark": "351"} +{"spark": "352"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.shims + +import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH + +object SequenceSizeError { + def getTooLongSequenceErrorString(sequenceSize: Int, functionName: String): String = { + // The errant function's name does not feature in the exception message + // prior to Spark 4.0. Neither does the attempted allocation size. + "Unsuccessful try to create array with elements exceeding the array " + + s"size limit $MAX_ROUNDED_ARRAY_LENGTH" + } +} diff --git a/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala new file mode 100644 index 00000000000..aede65d02fa --- /dev/null +++ b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "400"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.shims + +import org.apache.spark.sql.errors.QueryExecutionErrors + +object SequenceSizeError { + def getTooLongSequenceErrorString(sequenceSize: Int, functionName: String): String = { + QueryExecutionErrors.createArrayWithElementsExceedLimitError(functionName, sequenceSize).getMessage + } +} From ee2eb810ef69b63a02526aab227671e09d9d86f7 Mon Sep 17 00:00:00 2001 From: MithunR Date: Wed, 4 Sep 2024 15:04:46 -0700 Subject: [PATCH 2/7] Fixed formatting error. --- .../org/apache/spark/sql/rapids/shims/SequenceSizeError.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala index aede65d02fa..6c7cef55fab 100644 --- a/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala +++ b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala @@ -23,6 +23,7 @@ import org.apache.spark.sql.errors.QueryExecutionErrors object SequenceSizeError { def getTooLongSequenceErrorString(sequenceSize: Int, functionName: String): String = { - QueryExecutionErrors.createArrayWithElementsExceedLimitError(functionName, sequenceSize).getMessage + QueryExecutionErrors.createArrayWithElementsExceedLimitError(functionName, sequenceSize) + .getMessage } } From 2427bf3718de300577fd72e10e1035e2d10bb90e Mon Sep 17 00:00:00 2001 From: MithunR Date: Fri, 27 Sep 2024 19:01:30 -0700 Subject: [PATCH 3/7] Review comments. This moves the construction of the long-sequence error strings into RapidsErrorUtils. The process involved introducing many new RapidsErrorUtils classes, and using mix-ins of concrete implementations for the error-string construction. --- .../src/main/python/collection_ops_test.py | 2 +- .../sql/rapids/collectionOperations.scala | 5 +- .../spark/rapids/shims/GetSequenceSize.scala | 8 -- .../SequenceSizeTooLongErrorBuilder.scala | 48 ++++++++++ .../sql/rapids/shims/RapidsErrorUtils.scala | 2 +- .../sql/rapids/shims/RapidsErrorUtils.scala | 59 +----------- .../shims/RapidsErrorUtils330To334Base.scala | 84 ++++++++++++++++ .../sql/rapids/shims/RapidsErrorUtils.scala | 4 +- .../spark/rapids/shims/GetSequenceSize.scala | 3 - .../sql/rapids/shims/RapidsErrorUtils.scala | 24 +++++ ...SizeTooLongUnsuccessfulErrorBuilder.scala} | 2 +- .../sql/rapids/shims/RapidsErrorUtils.scala | 75 +-------------- .../shims/RapidsErrorUtils340PlusBase.scala | 96 +++++++++++++++++++ .../sql/rapids/shims/RapidsErrorUtils.scala | 4 +- .../sql/rapids/shims/RapidsErrorUtils.scala | 25 +++++ .../sql/rapids/shims/RapidsErrorUtils.scala | 23 +++++ ...quenceSizeExceededLimitErrorBuilder.scala} | 2 +- 17 files changed, 317 insertions(+), 149 deletions(-) create mode 100644 sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/SequenceSizeTooLongErrorBuilder.scala create mode 100644 sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils330To334Base.scala create mode 100644 sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala rename sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/{SequenceSizeError.scala => SequenceSizeTooLongUnsuccessfulErrorBuilder.scala} (96%) create mode 100644 sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils340PlusBase.scala create mode 100644 sql-plugin/src/main/spark342/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala create mode 100644 sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala rename sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/{SequenceSizeError.scala => SequenceSizeExceededLimitErrorBuilder.scala} (95%) diff --git a/integration_tests/src/main/python/collection_ops_test.py b/integration_tests/src/main/python/collection_ops_test.py index 9731caba78b..813f1a77c94 100644 --- a/integration_tests/src/main/python/collection_ops_test.py +++ b/integration_tests/src/main/python/collection_ops_test.py @@ -18,7 +18,7 @@ from data_gen import * from pyspark.sql.types import * -from src.main.python.spark_session import is_before_spark_400 +from spark_session import is_before_spark_400 from string_test import mk_str_gen import pyspark.sql.functions as f import pyspark.sql.utils diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala index 5eb68336a8f..23b823e7117 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala @@ -1676,8 +1676,9 @@ object GpuSequenceUtil { withResource(sizeAsLong.lessOrEqualTo(maxLen)) { allValid => withResource(sizeAsLong.reduce(ReductionAggregation.max())) { maxSizeScalar => require(isAllValidTrue(allValid), - GetSequenceSize.TOO_LONG_SEQUENCE(maxSizeScalar.getLong.asInstanceOf[Int], - functionName)) + RapidsErrorUtils.getTooLongSequenceErrorString( + maxSizeScalar.getLong.asInstanceOf[Int], + functionName)) } } } diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala index e00aa26baad..deb305cc89c 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala @@ -39,15 +39,7 @@ package com.nvidia.spark.rapids.shims import ai.rapids.cudf._ import com.nvidia.spark.rapids.Arm._ -import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH - object GetSequenceSize { - def TOO_LONG_SEQUENCE(sequenceLength: Int, functionName: String) = { - // For these Spark versions, the sequence length and function name - // do not appear in the exception message. - s"Too long sequence found. Should be <= $MAX_ROUNDED_ARRAY_LENGTH" - } - /** * Compute the size of each sequence according to 'start', 'stop' and 'step'. * A row (Row[start, stop, step]) contains at least one null element will produce diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/SequenceSizeTooLongErrorBuilder.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/SequenceSizeTooLongErrorBuilder.scala new file mode 100644 index 00000000000..32d38540cb5 --- /dev/null +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/SequenceSizeTooLongErrorBuilder.scala @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "320"} +{"spark": "321"} +{"spark": "321cdh"} +{"spark": "322"} +{"spark": "323"} +{"spark": "324"} +{"spark": "330"} +{"spark": "330cdh"} +{"spark": "330db"} +{"spark": "331"} +{"spark": "332"} +{"spark": "332cdh"} +{"spark": "332db"} +{"spark": "333"} +{"spark": "340"} +{"spark": "341"} +{"spark": "341db"} +{"spark": "350"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.shims + +import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH + +trait SequenceSizeTooLongErrorBuilder { + + def getTooLongSequenceErrorString(sequenceSize: Int, functionName: String): String = { + // For these Spark versions, the sequence length and function name + // do not appear in the exception message. + s"Too long sequence found. Should be <= $MAX_ROUNDED_ARRAY_LENGTH" + } +} \ No newline at end of file diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala index 68a6ce30569..dd387d453b5 100644 --- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala +++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.trees.Origin import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.types.{DataType, Decimal, DecimalType} -object RapidsErrorUtils extends RapidsQueryErrorUtils { +object RapidsErrorUtils extends RapidsQueryErrorUtils with SequenceSizeTooLongErrorBuilder { def invalidArrayIndexError(index: Int, numElements: Int, isElementAtF: Boolean = false): ArrayIndexOutOfBoundsException = { // Follow the Spark string format before 3.3.0 diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala index e5cdcd43568..80c61a9d481 100644 --- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala +++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -21,64 +21,9 @@ {"spark": "332"} {"spark": "332cdh"} {"spark": "333"} -{"spark": "334"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims -import org.apache.spark.SparkDateTimeException -import org.apache.spark.sql.catalyst.trees.Origin -import org.apache.spark.sql.errors.QueryExecutionErrors -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.{DataType, Decimal, DecimalType} +object RapidsErrorUtils extends RapidsErrorUtils330To334Base + with SequenceSizeTooLongErrorBuilder -object RapidsErrorUtils extends RapidsErrorUtilsFor330plus with RapidsQueryErrorUtils { - - def mapKeyNotExistError( - key: String, - keyType: DataType, - origin: Origin): NoSuchElementException = { - QueryExecutionErrors.mapKeyNotExistError(key, keyType, origin.context) - } - - def invalidArrayIndexError(index: Int, numElements: Int, - isElementAtF: Boolean = false): ArrayIndexOutOfBoundsException = { - if (isElementAtF) { - QueryExecutionErrors.invalidElementAtIndexError(index, numElements) - } else { - QueryExecutionErrors.invalidArrayIndexError(index, numElements) - } - } - - def arithmeticOverflowError( - message: String, - hint: String = "", - errorContext: String = ""): ArithmeticException = { - QueryExecutionErrors.arithmeticOverflowError(message, hint, errorContext) - } - - def cannotChangeDecimalPrecisionError( - value: Decimal, - toType: DecimalType, - context: String = ""): ArithmeticException = { - QueryExecutionErrors.cannotChangeDecimalPrecisionError( - value, toType.precision, toType.scale, context - ) - } - - def overflowInIntegralDivideError(context: String = ""): ArithmeticException = { - QueryExecutionErrors.arithmeticOverflowError( - "Overflow in integral divide", "try_divide", context - ) - } - - def sparkDateTimeException(infOrNan: String): SparkDateTimeException = { - // These are the arguments required by SparkDateTimeException class to create error message. - val errorClass = "CAST_INVALID_INPUT" - val messageParameters = Array("DOUBLE", "TIMESTAMP", SQLConf.ANSI_ENABLED.key) - new SparkDateTimeException(errorClass, Array(infOrNan) ++ messageParameters) - } - - def sqlArrayIndexNotStartAtOneError(): RuntimeException = { - new ArrayIndexOutOfBoundsException("SQL array indices start at 1") - } -} diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils330To334Base.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils330To334Base.scala new file mode 100644 index 00000000000..0e8f9261d6e --- /dev/null +++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils330To334Base.scala @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +{"spark": "330cdh"} +{"spark": "331"} +{"spark": "332"} +{"spark": "332cdh"} +{"spark": "333"} +{"spark": "334"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.shims + +import org.apache.spark.SparkDateTimeException +import org.apache.spark.sql.catalyst.trees.Origin +import org.apache.spark.sql.errors.QueryExecutionErrors +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{DataType, Decimal, DecimalType} + +trait RapidsErrorUtils330To334Base extends RapidsErrorUtilsFor330plus with RapidsQueryErrorUtils { + + def mapKeyNotExistError( + key: String, + keyType: DataType, + origin: Origin): NoSuchElementException = { + QueryExecutionErrors.mapKeyNotExistError(key, keyType, origin.context) + } + + def invalidArrayIndexError(index: Int, numElements: Int, + isElementAtF: Boolean = false): ArrayIndexOutOfBoundsException = { + if (isElementAtF) { + QueryExecutionErrors.invalidElementAtIndexError(index, numElements) + } else { + QueryExecutionErrors.invalidArrayIndexError(index, numElements) + } + } + + def arithmeticOverflowError( + message: String, + hint: String = "", + errorContext: String = ""): ArithmeticException = { + QueryExecutionErrors.arithmeticOverflowError(message, hint, errorContext) + } + + def cannotChangeDecimalPrecisionError( + value: Decimal, + toType: DecimalType, + context: String = ""): ArithmeticException = { + QueryExecutionErrors.cannotChangeDecimalPrecisionError( + value, toType.precision, toType.scale, context + ) + } + + def overflowInIntegralDivideError(context: String = ""): ArithmeticException = { + QueryExecutionErrors.arithmeticOverflowError( + "Overflow in integral divide", "try_divide", context + ) + } + + def sparkDateTimeException(infOrNan: String): SparkDateTimeException = { + // These are the arguments required by SparkDateTimeException class to create error message. + val errorClass = "CAST_INVALID_INPUT" + val messageParameters = Array("DOUBLE", "TIMESTAMP", SQLConf.ANSI_ENABLED.key) + new SparkDateTimeException(errorClass, Array(infOrNan) ++ messageParameters) + } + + def sqlArrayIndexNotStartAtOneError(): RuntimeException = { + new ArrayIndexOutOfBoundsException("SQL array indices start at 1") + } +} diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala index 7e58a54c921..0f40c6e3bfd 100644 --- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala +++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -22,7 +22,9 @@ package org.apache.spark.sql.rapids.shims import org.apache.spark.sql.errors.QueryExecutionErrors -object RapidsErrorUtils extends RapidsErrorUtilsBase with RapidsQueryErrorUtils { +object RapidsErrorUtils extends RapidsErrorUtilsBase + with RapidsQueryErrorUtils + with SequenceSizeTooLongErrorBuilder { def sqlArrayIndexNotStartAtOneError(): RuntimeException = { QueryExecutionErrors.elementAtByIndexZeroError(context = null) } diff --git a/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala b/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala index 93956610933..f386973200a 100644 --- a/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala +++ b/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala @@ -28,12 +28,9 @@ import ai.rapids.cudf._ import com.nvidia.spark.rapids.Arm._ import org.apache.spark.sql.rapids.{AddOverflowChecks, SubtractOverflowChecks} -import org.apache.spark.sql.rapids.shims.SequenceSizeError import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH object GetSequenceSize { - def TOO_LONG_SEQUENCE(sequenceLength: Int, functionName: String): String = - SequenceSizeError.getTooLongSequenceErrorString(sequenceLength, functionName) /** * Compute the size of each sequence according to 'start', 'stop' and 'step'. * A row (Row[start, stop, step]) contains at least one null element will produce diff --git a/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala new file mode 100644 index 00000000000..0376d2e69a7 --- /dev/null +++ b/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "334"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.shims + +object RapidsErrorUtils extends RapidsErrorUtils330To334Base + with SequenceSizeTooLongUnsuccessfulErrorBuilder + diff --git a/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala b/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/SequenceSizeTooLongUnsuccessfulErrorBuilder.scala similarity index 96% rename from sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala rename to sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/SequenceSizeTooLongUnsuccessfulErrorBuilder.scala index eadcc497318..5e584de7167 100644 --- a/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala +++ b/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/SequenceSizeTooLongUnsuccessfulErrorBuilder.scala @@ -25,7 +25,7 @@ package org.apache.spark.sql.rapids.shims import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH -object SequenceSizeError { +trait SequenceSizeTooLongUnsuccessfulErrorBuilder { def getTooLongSequenceErrorString(sequenceSize: Int, functionName: String): String = { // The errant function's name does not feature in the exception message // prior to Spark 4.0. Neither does the attempted allocation size. diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala index 0bf3e66d556..fadce5a8231 100644 --- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala +++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -17,80 +17,9 @@ /*** spark-rapids-shim-json-lines {"spark": "340"} {"spark": "341"} -{"spark": "342"} -{"spark": "343"} {"spark": "350"} -{"spark": "351"} -{"spark": "352"} -{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims -import org.apache.spark.SparkDateTimeException -import org.apache.spark.sql.catalyst.trees.{Origin, SQLQueryContext} -import org.apache.spark.sql.errors.QueryExecutionErrors -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.{DataType, Decimal, DecimalType} - -object RapidsErrorUtils extends RapidsErrorUtilsFor330plus with RapidsQueryErrorUtils { - - def mapKeyNotExistError( - key: String, - keyType: DataType, - origin: Origin): NoSuchElementException = { - throw new UnsupportedOperationException( - "`mapKeyNotExistError` has been removed since Spark 3.4.0. " - ) - } - - def invalidArrayIndexError( - index: Int, - numElements: Int, - isElementAtF: Boolean = false, - context: SQLQueryContext = null): ArrayIndexOutOfBoundsException = { - if (isElementAtF) { - QueryExecutionErrors.invalidElementAtIndexError(index, numElements, context) - } else { - QueryExecutionErrors.invalidArrayIndexError(index, numElements, context) - } - } - - def arithmeticOverflowError( - message: String, - hint: String = "", - errorContext: SQLQueryContext = null): ArithmeticException = { - QueryExecutionErrors.arithmeticOverflowError(message, hint, errorContext) - } - - def cannotChangeDecimalPrecisionError( - value: Decimal, - toType: DecimalType, - context: SQLQueryContext = null): ArithmeticException = { - QueryExecutionErrors.cannotChangeDecimalPrecisionError( - value, toType.precision, toType.scale, context - ) - } - - def overflowInIntegralDivideError(context: SQLQueryContext = null): ArithmeticException = { - QueryExecutionErrors.arithmeticOverflowError( - "Overflow in integral divide", "try_divide", context - ) - } - - def sparkDateTimeException(infOrNan: String): SparkDateTimeException = { - // These are the arguments required by SparkDateTimeException class to create error message. - val errorClass = "CAST_INVALID_INPUT" - val messageParameters = Map("expression" -> infOrNan, "sourceType" -> "DOUBLE", - "targetType" -> "TIMESTAMP", "ansiConfig" -> SQLConf.ANSI_ENABLED.key) - SparkDateTimeExceptionShims.newSparkDateTimeException(errorClass, messageParameters, - Array.empty, "") - } - - def sqlArrayIndexNotStartAtOneError(): RuntimeException = { - QueryExecutionErrors.invalidIndexOfZeroError(context = null) - } - - override def intervalDivByZeroError(origin: Origin): ArithmeticException = { - QueryExecutionErrors.intervalDividedByZeroError(origin.context) - } -} +object RapidsErrorUtils extends RapidsErrorUtils340PlusBase + with SequenceSizeTooLongErrorBuilder diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils340PlusBase.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils340PlusBase.scala new file mode 100644 index 00000000000..173b06e3f8f --- /dev/null +++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils340PlusBase.scala @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "340"} +{"spark": "341"} +{"spark": "342"} +{"spark": "343"} +{"spark": "350"} +{"spark": "351"} +{"spark": "352"} +{"spark": "400"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.shims + +import org.apache.spark.SparkDateTimeException +import org.apache.spark.sql.catalyst.trees.{Origin, SQLQueryContext} +import org.apache.spark.sql.errors.QueryExecutionErrors +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{DataType, Decimal, DecimalType} + +trait RapidsErrorUtils340PlusBase extends RapidsErrorUtilsFor330plus with RapidsQueryErrorUtils { + + def mapKeyNotExistError( + key: String, + keyType: DataType, + origin: Origin): NoSuchElementException = { + throw new UnsupportedOperationException( + "`mapKeyNotExistError` has been removed since Spark 3.4.0. " + ) + } + + def invalidArrayIndexError( + index: Int, + numElements: Int, + isElementAtF: Boolean = false, + context: SQLQueryContext = null): ArrayIndexOutOfBoundsException = { + if (isElementAtF) { + QueryExecutionErrors.invalidElementAtIndexError(index, numElements, context) + } else { + QueryExecutionErrors.invalidArrayIndexError(index, numElements, context) + } + } + + def arithmeticOverflowError( + message: String, + hint: String = "", + errorContext: SQLQueryContext = null): ArithmeticException = { + QueryExecutionErrors.arithmeticOverflowError(message, hint, errorContext) + } + + def cannotChangeDecimalPrecisionError( + value: Decimal, + toType: DecimalType, + context: SQLQueryContext = null): ArithmeticException = { + QueryExecutionErrors.cannotChangeDecimalPrecisionError( + value, toType.precision, toType.scale, context + ) + } + + def overflowInIntegralDivideError(context: SQLQueryContext = null): ArithmeticException = { + QueryExecutionErrors.arithmeticOverflowError( + "Overflow in integral divide", "try_divide", context + ) + } + + def sparkDateTimeException(infOrNan: String): SparkDateTimeException = { + // These are the arguments required by SparkDateTimeException class to create error message. + val errorClass = "CAST_INVALID_INPUT" + val messageParameters = Map("expression" -> infOrNan, "sourceType" -> "DOUBLE", + "targetType" -> "TIMESTAMP", "ansiConfig" -> SQLConf.ANSI_ENABLED.key) + SparkDateTimeExceptionShims.newSparkDateTimeException(errorClass, messageParameters, + Array.empty, "") + } + + def sqlArrayIndexNotStartAtOneError(): RuntimeException = { + QueryExecutionErrors.invalidIndexOfZeroError(context = null) + } + + override def intervalDivByZeroError(origin: Origin): ArithmeticException = { + QueryExecutionErrors.intervalDividedByZeroError(origin.context) + } +} diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala index 9b800d4e51a..37393604f42 100644 --- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala +++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -21,7 +21,9 @@ package org.apache.spark.sql.rapids.shims import org.apache.spark.sql.errors.QueryExecutionErrors -object RapidsErrorUtils extends RapidsErrorUtilsBase with RapidsQueryErrorUtils { +object RapidsErrorUtils extends RapidsErrorUtilsBase + with RapidsQueryErrorUtils + with SequenceSizeTooLongErrorBuilder { def sqlArrayIndexNotStartAtOneError(): RuntimeException = { QueryExecutionErrors.invalidIndexOfZeroError(context = null) } diff --git a/sql-plugin/src/main/spark342/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark342/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala new file mode 100644 index 00000000000..7ffb3b16cd7 --- /dev/null +++ b/sql-plugin/src/main/spark342/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "342"} +{"spark": "343"} +{"spark": "351"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.shims + +object RapidsErrorUtils extends RapidsErrorUtils340PlusBase + with SequenceSizeTooLongUnsuccessfulErrorBuilder diff --git a/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala new file mode 100644 index 00000000000..a7eca011383 --- /dev/null +++ b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "400"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.shims + +object RapidsErrorUtils extends RapidsErrorUtils340PlusBase + with SequenceSizeExceededLimitErrorBuilder diff --git a/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/SequenceSizeExceededLimitErrorBuilder.scala similarity index 95% rename from sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala rename to sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/SequenceSizeExceededLimitErrorBuilder.scala index 6c7cef55fab..741634aea3f 100644 --- a/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/SequenceSizeError.scala +++ b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/SequenceSizeExceededLimitErrorBuilder.scala @@ -21,7 +21,7 @@ package org.apache.spark.sql.rapids.shims import org.apache.spark.sql.errors.QueryExecutionErrors -object SequenceSizeError { +trait SequenceSizeExceededLimitErrorBuilder { def getTooLongSequenceErrorString(sequenceSize: Int, functionName: String): String = { QueryExecutionErrors.createArrayWithElementsExceedLimitError(functionName, sequenceSize) .getMessage From cc4ae453d44873210f5bbad805958c909adcbb5c Mon Sep 17 00:00:00 2001 From: MithunR Date: Fri, 27 Sep 2024 23:05:52 -0700 Subject: [PATCH 4/7] Added missing shim tag for 3.5.2. --- .../org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql-plugin/src/main/spark342/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark342/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala index 7ffb3b16cd7..b07ea3b1c7e 100644 --- a/sql-plugin/src/main/spark342/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala +++ b/sql-plugin/src/main/spark342/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -18,6 +18,7 @@ {"spark": "342"} {"spark": "343"} {"spark": "351"} +{"spark": "352"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims From 0bfac65de01f2eb7e2626135be6115943ed982d0 Mon Sep 17 00:00:00 2001 From: MithunR Date: Thu, 3 Oct 2024 15:46:01 -0700 Subject: [PATCH 5/7] Review comments: Fixed code style. --- .../org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala | 2 +- .../org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala | 3 +-- .../org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala | 2 +- .../org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala | 2 +- .../org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala | 3 +-- .../org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala | 2 +- .../org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala | 2 +- 7 files changed, 7 insertions(+), 9 deletions(-) diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala index 80c61a9d481..a08f38e5596 100644 --- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala +++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -25,5 +25,5 @@ spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims object RapidsErrorUtils extends RapidsErrorUtils330To334Base - with SequenceSizeTooLongErrorBuilder + with SequenceSizeTooLongErrorBuilder diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala index 0f40c6e3bfd..1b9bafff947 100644 --- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala +++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -23,8 +23,7 @@ package org.apache.spark.sql.rapids.shims import org.apache.spark.sql.errors.QueryExecutionErrors object RapidsErrorUtils extends RapidsErrorUtilsBase - with RapidsQueryErrorUtils - with SequenceSizeTooLongErrorBuilder { + with RapidsQueryErrorUtils with SequenceSizeTooLongErrorBuilder { def sqlArrayIndexNotStartAtOneError(): RuntimeException = { QueryExecutionErrors.elementAtByIndexZeroError(context = null) } diff --git a/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala index 0376d2e69a7..b91c5ed360b 100644 --- a/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala +++ b/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -20,5 +20,5 @@ spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims object RapidsErrorUtils extends RapidsErrorUtils330To334Base - with SequenceSizeTooLongUnsuccessfulErrorBuilder + with SequenceSizeTooLongUnsuccessfulErrorBuilder diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala index fadce5a8231..815e8d9dbb0 100644 --- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala +++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -22,4 +22,4 @@ spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims object RapidsErrorUtils extends RapidsErrorUtils340PlusBase - with SequenceSizeTooLongErrorBuilder + with SequenceSizeTooLongErrorBuilder diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala index 37393604f42..f3aa56d5f4d 100644 --- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala +++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -22,8 +22,7 @@ package org.apache.spark.sql.rapids.shims import org.apache.spark.sql.errors.QueryExecutionErrors object RapidsErrorUtils extends RapidsErrorUtilsBase - with RapidsQueryErrorUtils - with SequenceSizeTooLongErrorBuilder { + with RapidsQueryErrorUtils with SequenceSizeTooLongErrorBuilder { def sqlArrayIndexNotStartAtOneError(): RuntimeException = { QueryExecutionErrors.invalidIndexOfZeroError(context = null) } diff --git a/sql-plugin/src/main/spark342/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark342/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala index b07ea3b1c7e..a1c038e1148 100644 --- a/sql-plugin/src/main/spark342/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala +++ b/sql-plugin/src/main/spark342/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -23,4 +23,4 @@ spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims object RapidsErrorUtils extends RapidsErrorUtils340PlusBase - with SequenceSizeTooLongUnsuccessfulErrorBuilder + with SequenceSizeTooLongUnsuccessfulErrorBuilder diff --git a/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala index a7eca011383..51f56f612fd 100644 --- a/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala +++ b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -20,4 +20,4 @@ spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims object RapidsErrorUtils extends RapidsErrorUtils340PlusBase - with SequenceSizeExceededLimitErrorBuilder + with SequenceSizeExceededLimitErrorBuilder From fe704af6a476478caa3296061335322812db58ff Mon Sep 17 00:00:00 2001 From: MithunR Date: Tue, 8 Oct 2024 14:29:28 -0700 Subject: [PATCH 6/7] Reformatting, per project guideline. --- .../shims/RapidsErrorUtils340PlusBase.scala | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils340PlusBase.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils340PlusBase.scala index 173b06e3f8f..366cbb289c7 100644 --- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils340PlusBase.scala +++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils340PlusBase.scala @@ -35,19 +35,19 @@ import org.apache.spark.sql.types.{DataType, Decimal, DecimalType} trait RapidsErrorUtils340PlusBase extends RapidsErrorUtilsFor330plus with RapidsQueryErrorUtils { def mapKeyNotExistError( - key: String, - keyType: DataType, - origin: Origin): NoSuchElementException = { + key: String, + keyType: DataType, + origin: Origin): NoSuchElementException = { throw new UnsupportedOperationException( "`mapKeyNotExistError` has been removed since Spark 3.4.0. " ) } def invalidArrayIndexError( - index: Int, - numElements: Int, - isElementAtF: Boolean = false, - context: SQLQueryContext = null): ArrayIndexOutOfBoundsException = { + index: Int, + numElements: Int, + isElementAtF: Boolean = false, + context: SQLQueryContext = null): ArrayIndexOutOfBoundsException = { if (isElementAtF) { QueryExecutionErrors.invalidElementAtIndexError(index, numElements, context) } else { @@ -56,16 +56,16 @@ trait RapidsErrorUtils340PlusBase extends RapidsErrorUtilsFor330plus with Rapids } def arithmeticOverflowError( - message: String, - hint: String = "", - errorContext: SQLQueryContext = null): ArithmeticException = { + message: String, + hint: String = "", + errorContext: SQLQueryContext = null): ArithmeticException = { QueryExecutionErrors.arithmeticOverflowError(message, hint, errorContext) } def cannotChangeDecimalPrecisionError( - value: Decimal, - toType: DecimalType, - context: SQLQueryContext = null): ArithmeticException = { + value: Decimal, + toType: DecimalType, + context: SQLQueryContext = null): ArithmeticException = { QueryExecutionErrors.cannotChangeDecimalPrecisionError( value, toType.precision, toType.scale, context ) From 4ae9e6867f93b372f599c0f31e0bfc0f5e32d55c Mon Sep 17 00:00:00 2001 From: MithunR Date: Fri, 11 Oct 2024 14:46:13 -0700 Subject: [PATCH 7/7] Fixed missed whitespace problem. --- .../shims/RapidsErrorUtils330To334Base.scala | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils330To334Base.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils330To334Base.scala index 0e8f9261d6e..5e560faf90c 100644 --- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils330To334Base.scala +++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils330To334Base.scala @@ -34,14 +34,14 @@ import org.apache.spark.sql.types.{DataType, Decimal, DecimalType} trait RapidsErrorUtils330To334Base extends RapidsErrorUtilsFor330plus with RapidsQueryErrorUtils { def mapKeyNotExistError( - key: String, - keyType: DataType, - origin: Origin): NoSuchElementException = { + key: String, + keyType: DataType, + origin: Origin): NoSuchElementException = { QueryExecutionErrors.mapKeyNotExistError(key, keyType, origin.context) } def invalidArrayIndexError(index: Int, numElements: Int, - isElementAtF: Boolean = false): ArrayIndexOutOfBoundsException = { + isElementAtF: Boolean = false): ArrayIndexOutOfBoundsException = { if (isElementAtF) { QueryExecutionErrors.invalidElementAtIndexError(index, numElements) } else { @@ -50,16 +50,16 @@ trait RapidsErrorUtils330To334Base extends RapidsErrorUtilsFor330plus with Rapid } def arithmeticOverflowError( - message: String, - hint: String = "", - errorContext: String = ""): ArithmeticException = { + message: String, + hint: String = "", + errorContext: String = ""): ArithmeticException = { QueryExecutionErrors.arithmeticOverflowError(message, hint, errorContext) } def cannotChangeDecimalPrecisionError( - value: Decimal, - toType: DecimalType, - context: String = ""): ArithmeticException = { + value: Decimal, + toType: DecimalType, + context: String = ""): ArithmeticException = { QueryExecutionErrors.cannotChangeDecimalPrecisionError( value, toType.precision, toType.scale, context )