-
Notifications
You must be signed in to change notification settings - Fork 230
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix collection_ops_tests
for Spark 4.0 [databricks]
#11414
base: branch-24.12
Are you sure you want to change the base?
Changes from 2 commits
b8bd960
ee2eb81
1037b69
3bcf04f
2427bf3
cc4ae45
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,7 +42,12 @@ import com.nvidia.spark.rapids.Arm._ | |
import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH | ||
|
||
object GetSequenceSize { | ||
val TOO_LONG_SEQUENCE = s"Too long sequence found. Should be <= $MAX_ROUNDED_ARRAY_LENGTH" | ||
def TOO_LONG_SEQUENCE(sequenceLength: Int, functionName: String) = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. function name should be camelCase |
||
// For these Spark versions, the sequence length and function name | ||
// do not appear in the exception message. | ||
s"Too long sequence found. Should be <= $MAX_ROUNDED_ARRAY_LENGTH" | ||
} | ||
|
||
/** | ||
* Compute the size of each sequence according to 'start', 'stop' and 'step'. | ||
* A row (Row[start, stop, step]) contains at least one null element will produce | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/* | ||
* Copyright (c) 2024, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
/*** spark-rapids-shim-json-lines | ||
{"spark": "334"} | ||
{"spark": "342"} | ||
{"spark": "343"} | ||
{"spark": "351"} | ||
{"spark": "352"} | ||
spark-rapids-shim-json-lines ***/ | ||
package org.apache.spark.sql.rapids.shims | ||
|
||
import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH | ||
|
||
object SequenceSizeError { | ||
def getTooLongSequenceErrorString(sequenceSize: Int, functionName: String): String = { | ||
// The errant function's name does not feature in the exception message | ||
// prior to Spark 4.0. Neither does the attempted allocation size. | ||
"Unsuccessful try to create array with elements exceeding the array " + | ||
s"size limit $MAX_ROUNDED_ARRAY_LENGTH" | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
/* | ||
* Copyright (c) 2024, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
/*** spark-rapids-shim-json-lines | ||
{"spark": "400"} | ||
spark-rapids-shim-json-lines ***/ | ||
package org.apache.spark.sql.rapids.shims | ||
|
||
import org.apache.spark.sql.errors.QueryExecutionErrors | ||
|
||
object SequenceSizeError { | ||
def getTooLongSequenceErrorString(sequenceSize: Int, functionName: String): String = { | ||
QueryExecutionErrors.createArrayWithElementsExceedLimitError(functionName, sequenceSize) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should move this to RapidsErrorUtils The way I would do it is remove the Then introduce another trait There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we're talking at cross purposes. Or maybe I'm simply unable to grok the suggestion yet.
This largely describes what my patch currently does, save for moving it into The reason (I think) I can't move this into The error messages are split as follows:
The
Now if I'm trying to accommodate the correct error message for, say, Is the suggestion to further slice up Maybe I've missed something. Perhaps we should discuss this offline, and update the result on this bug. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This refactor is turning into a rats' nest. When the next shim needs to be added, and things need to be split further, I think it's going to be unreadable.
I'm going to try to add this with as little collateral damage as I can. |
||
.getMessage | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: To be consistent with other files, this should just be