diff --git a/velox/core/QueryConfig.h b/velox/core/QueryConfig.h index 7bbbd5b58828..af54e369beb5 100644 --- a/velox/core/QueryConfig.h +++ b/velox/core/QueryConfig.h @@ -294,10 +294,6 @@ class QueryConfig { static constexpr const char* kPrestoArrayAggIgnoreNulls = "presto.array_agg.ignore_nulls"; - /// If false, size function returns null for null input. - static constexpr const char* kSparkLegacySizeOfNull = - "spark.legacy_size_of_null"; - // The default number of expected items for the bloomfilter. static constexpr const char* kSparkBloomFilterExpectedNumItems = "spark.bloom_filter.expected_num_items"; @@ -632,11 +628,6 @@ class QueryConfig { return get(kSpillableReservationGrowthPct, kDefaultPct); } - bool sparkLegacySizeOfNull() const { - constexpr bool kDefault{true}; - return get(kSparkLegacySizeOfNull, kDefault); - } - bool prestoArrayAggIgnoreNulls() const { return get(kPrestoArrayAggIgnoreNulls, false); } diff --git a/velox/docs/functions/spark/array.rst b/velox/docs/functions/spark/array.rst index 6b981a44228f..aa6a39015ab0 100644 --- a/velox/docs/functions/spark/array.rst +++ b/velox/docs/functions/spark/array.rst @@ -101,12 +101,6 @@ Array Functions SELECT array_repeat(100, 0); -- [] SELECT array_repeat(100, -1); -- [] -.. spark:function:: array_size(array(E)) -> integer - - Returns the size of the array. :: - - SELECT array_size(array(1, 2, 3)); -- 3 - .. spark:function:: array_sort(array(E)) -> array(E) Returns an array which has the sorted order of the input array(E). The elements of array(E) must @@ -193,11 +187,14 @@ Array Functions SELECT shuffle(array(0, 0, 0), 0); -- [0, 0, 0] SELECT shuffle(array(1, NULL, 1, NULL, 2), 0); -- [2, 1, NULL, NULL, 1] -.. spark:function:: size(array(E)) -> bigint +.. spark:function:: size(array(E), legacySizeOfNull) -> integer + + Returns the size of the array. Returns null for null input if `legacySizeOfNull` + is set to false. Otherwise, returns -1 for null input. :: - Returns the size of the array. Returns null for null input - if :doc:`spark.legacy_size_of_null <../../configs>` is set to false. - Otherwise, returns -1 for null input. + SELECT size(array(1, 2, 3), true); -- 3 + SELECT size(NULL, true); -- -1 + SELECT size(NULL, false); -- NULL .. spark:function:: sort_array(array(E)) -> array(E) diff --git a/velox/docs/functions/spark/map.rst b/velox/docs/functions/spark/map.rst index 923de1b45309..74735e9d5fe0 100644 --- a/velox/docs/functions/spark/map.rst +++ b/velox/docs/functions/spark/map.rst @@ -56,9 +56,12 @@ Map Functions MAP(ARRAY['a', 'b', 'c'], ARRAY[1, 2, 3]), (k, v1, v2) -> k || CAST(v1/v2 AS VARCHAR)); -.. spark:function:: size(map(K,V)) -> bigint +.. spark:function:: size(map(K,V), legacySizeOfNull) -> integer :noindex: - Returns the size of the input map. Returns null for null input - if :doc:`spark.legacy_size_of_null <../../configs>` is set to false. - Otherwise, returns -1 for null input. + Returns the size of the input map. Returns null for null input if ``legacySizeOfNull`` + is set to false. Otherwise, returns -1 for null input. :: + + SELECT size(map(array(1, 2), array(3, 4)), true); -- 2 + SELECT size(NULL, true); -- -1 + SELECT size(NULL, false); -- NULL diff --git a/velox/functions/sparksql/ArraySizeFunction.h b/velox/functions/sparksql/ArraySizeFunction.h deleted file mode 100644 index e7857815fe2b..000000000000 --- a/velox/functions/sparksql/ArraySizeFunction.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include "velox/functions/Macros.h" - -namespace facebook::velox::functions::sparksql { - -template -struct ArraySizeFunction { - VELOX_DEFINE_FUNCTION_TYPES(T); - - FOLLY_ALWAYS_INLINE void call( - int32_t& out, - const arg_type>& inputArray) { - out = inputArray.size(); - } -}; -} // namespace facebook::velox::functions::sparksql diff --git a/velox/functions/sparksql/Register.cpp b/velox/functions/sparksql/Register.cpp index 99657adb069f..07ba98f0cdb6 100644 --- a/velox/functions/sparksql/Register.cpp +++ b/velox/functions/sparksql/Register.cpp @@ -30,7 +30,6 @@ #include "velox/functions/prestosql/URLFunctions.h" #include "velox/functions/sparksql/ArrayFlattenFunction.h" #include "velox/functions/sparksql/ArrayMinMaxFunction.h" -#include "velox/functions/sparksql/ArraySizeFunction.h" #include "velox/functions/sparksql/ArraySort.h" #include "velox/functions/sparksql/Bitwise.h" #include "velox/functions/sparksql/DateTimeFunctions.h" @@ -171,9 +170,6 @@ inline void registerArrayMinMaxFunctions(const std::string& prefix) { void registerFunctions(const std::string& prefix) { registerAllSpecialFormGeneralFunctions(); - registerFunction>( - {prefix + "array_size"}); - // Register size functions registerSize(prefix + "size"); diff --git a/velox/functions/sparksql/Size.cpp b/velox/functions/sparksql/Size.cpp index 96bd820d5680..70d7d3d509d2 100644 --- a/velox/functions/sparksql/Size.cpp +++ b/velox/functions/sparksql/Size.cpp @@ -30,33 +30,40 @@ struct Size { template FOLLY_ALWAYS_INLINE void initialize( const std::vector& /*inputTypes*/, - const core::QueryConfig& config, - const TInput* input /*input*/) { - legacySizeOfNull_ = config.sparkLegacySizeOfNull(); + const core::QueryConfig& /*config*/, + const TInput* /*input*/, + const bool* legacySizeOfNull) { + if (legacySizeOfNull == nullptr) { + VELOX_USER_FAIL("Constant legacySizeOfNull is expected."); + } + legacySizeOfNull_ = *legacySizeOfNull; } template - FOLLY_ALWAYS_INLINE bool callNullable(int32_t& out, const TInput* input) { + FOLLY_ALWAYS_INLINE bool callNullable( + int32_t& out, + const TInput* input, + const bool* /*legacySizeOfNull*/) { if (input == nullptr) { if (legacySizeOfNull_) { out = -1; return true; - } else { - return false; } + return false; } out = input->size(); return true; } private: + // If true, returns -1 for null input. Otherwise, returns null. bool legacySizeOfNull_; }; } // namespace void registerSize(const std::string& prefix) { - registerFunction>({prefix}); - registerFunction>({prefix}); + registerFunction, bool>({prefix}); + registerFunction, bool>({prefix}); } } // namespace facebook::velox::functions::sparksql diff --git a/velox/functions/sparksql/tests/ArraySizeTest.cpp b/velox/functions/sparksql/tests/ArraySizeTest.cpp deleted file mode 100644 index 1591ff484a9a..000000000000 --- a/velox/functions/sparksql/tests/ArraySizeTest.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include "velox/functions/sparksql/tests/SparkFunctionBaseTest.h" -#include "velox/type/Timestamp.h" - -using namespace facebook::velox; -using namespace facebook::velox::test; -using namespace facebook::velox::functions::test; - -namespace facebook::velox::functions::sparksql::test { -namespace { - -class ArraySizeTest : public SparkFunctionBaseTest { - protected: - template - int32_t arraySize(const std::vector>& input) { - auto row = makeRowVector({makeNullableArrayVector( - std::vector>>{input})}); - return evaluateOnce("array_size(c0)", row).value(); - } -}; - -TEST_F(ArraySizeTest, boolean) { - EXPECT_EQ(arraySize({true, false}), 2); - EXPECT_EQ(arraySize({true}), 1); - EXPECT_EQ(arraySize({}), 0); - EXPECT_EQ(arraySize({true, false, true, std::nullopt}), 4); -} - -TEST_F(ArraySizeTest, smallint) { - EXPECT_EQ(arraySize({}), 0); - EXPECT_EQ(arraySize({1}), 1); - EXPECT_EQ(arraySize({std::nullopt}), 1); - EXPECT_EQ(arraySize({std::nullopt, 1}), 2); -} - -TEST_F(ArraySizeTest, real) { - EXPECT_EQ(arraySize({}), 0); - EXPECT_EQ(arraySize({1.1}), 1); - EXPECT_EQ(arraySize({std::nullopt}), 1); - EXPECT_EQ(arraySize({std::nullopt, 1.1}), 2); -} - -TEST_F(ArraySizeTest, varchar) { - EXPECT_EQ(arraySize({"red", "blue"}), 2); - EXPECT_EQ( - arraySize({std::nullopt, "blue", "yellow", "orange"}), 4); - EXPECT_EQ(arraySize({}), 0); - EXPECT_EQ(arraySize({std::nullopt}), 1); -} - -TEST_F(ArraySizeTest, integer) { - EXPECT_EQ(arraySize({1, 2}), 2); -} - -TEST_F(ArraySizeTest, timestamp) { - auto ts = [](int64_t micros) { return Timestamp::fromMicros(micros); }; - EXPECT_EQ(arraySize({}), 0); - EXPECT_EQ(arraySize({std::nullopt}), 1); - EXPECT_EQ(arraySize({ts(0), ts(1)}), 2); -} -} // namespace -} // namespace facebook::velox::functions::sparksql::test diff --git a/velox/functions/sparksql/tests/CMakeLists.txt b/velox/functions/sparksql/tests/CMakeLists.txt index 86dfe4f07918..e3e4e823fd78 100644 --- a/velox/functions/sparksql/tests/CMakeLists.txt +++ b/velox/functions/sparksql/tests/CMakeLists.txt @@ -19,7 +19,6 @@ add_executable( ArrayGetTest.cpp ArrayMaxTest.cpp ArrayMinTest.cpp - ArraySizeTest.cpp ArraySortTest.cpp ArrayShuffleTest.cpp ArgGeneratorTest.cpp diff --git a/velox/functions/sparksql/tests/SizeTest.cpp b/velox/functions/sparksql/tests/SizeTest.cpp index d885e41705af..ff69ca04a5ac 100644 --- a/velox/functions/sparksql/tests/SizeTest.cpp +++ b/velox/functions/sparksql/tests/SizeTest.cpp @@ -15,8 +15,11 @@ */ #include +#include #include +#include #include "velox/functions/sparksql/tests/SparkFunctionBaseTest.h" +#include "velox/type/Timestamp.h" using namespace facebook::velox; using namespace facebook::velox::exec; @@ -28,9 +31,9 @@ class SizeTest : public SparkFunctionBaseTest { std::function sizeAt = [](vector_size_t row) { return 1 + row % 7; }; - void testSize(VectorPtr vector, vector_size_t numRows) { - auto result = - evaluate>("size(c0)", makeRowVector({vector})); + void testLegacySizeOfNull(VectorPtr vector, vector_size_t numRows) { + auto result = evaluate>( + "size(c0, true)", makeRowVector({vector})); for (vector_size_t i = 0; i < numRows; ++i) { if (vector->isNullAt(i)) { EXPECT_EQ(result->valueAt(i), -1) << "at " << i; @@ -40,18 +43,19 @@ class SizeTest : public SparkFunctionBaseTest { } } - void testSizeLegacyNull(VectorPtr vector, vector_size_t numRows) { - auto result = - evaluate>("size(c0)", makeRowVector({vector})); + void testSize(VectorPtr vector, vector_size_t numRows) { + auto result = evaluate>( + "size(c0, false)", makeRowVector({vector})); for (vector_size_t i = 0; i < numRows; ++i) { EXPECT_EQ(result->isNullAt(i), vector->isNullAt(i)) << "at " << i; } } - void setConfig(std::string configStr, bool value) { - execCtx_.queryCtx()->testingOverrideConfigUnsafe({ - {configStr, std::to_string(value)}, - }); + template + int32_t testArraySize(const std::vector>& input) { + auto row = makeRowVector({makeNullableArrayVector( + std::vector>>{input})}); + return evaluateOnce("size(c0, false)", row).value(); } static inline vector_size_t valueAt(vector_size_t idx) { @@ -59,32 +63,73 @@ class SizeTest : public SparkFunctionBaseTest { } }; -TEST_F(SizeTest, sizetest) { +// Ensure that out is set to -1 for null input if legacySizeOfNull = true. +TEST_F(SizeTest, legacySizeOfNull) { vector_size_t numRows = 100; auto arrayVector = makeArrayVector(numRows, sizeAt, valueAt, nullptr); - testSize(arrayVector, numRows); + testLegacySizeOfNull(arrayVector, numRows); arrayVector = makeArrayVector(numRows, sizeAt, valueAt, nullEvery(5)); - testSize(arrayVector, numRows); + testLegacySizeOfNull(arrayVector, numRows); auto mapVector = makeMapVector( numRows, sizeAt, valueAt, valueAt, nullptr); - testSize(mapVector, numRows); + testLegacySizeOfNull(mapVector, numRows); mapVector = makeMapVector( numRows, sizeAt, valueAt, valueAt, nullEvery(5)); - testSize(mapVector, numRows); + testLegacySizeOfNull(mapVector, numRows); } -// Ensure that out if set to -1 if SparkLegacySizeOfNull is specified. -TEST_F(SizeTest, legacySizeOfNull) { +// Ensure that out is set to null for null input if legacySizeOfNull = false. +TEST_F(SizeTest, size) { vector_size_t numRows = 100; - setConfig(core::QueryConfig::kSparkLegacySizeOfNull, false); auto arrayVector = makeArrayVector(numRows, sizeAt, valueAt, nullEvery(1)); - testSizeLegacyNull(arrayVector, numRows); + testSize(arrayVector, numRows); auto mapVector = makeMapVector( numRows, sizeAt, valueAt, valueAt, nullEvery(1)); - testSizeLegacyNull(mapVector, numRows); + testSize(mapVector, numRows); +} + +TEST_F(SizeTest, boolean) { + EXPECT_EQ(testArraySize({true, false}), 2); + EXPECT_EQ(testArraySize({true}), 1); + EXPECT_EQ(testArraySize({}), 0); + EXPECT_EQ(testArraySize({true, false, true, std::nullopt}), 4); +} + +TEST_F(SizeTest, smallint) { + EXPECT_EQ(testArraySize({}), 0); + EXPECT_EQ(testArraySize({1}), 1); + EXPECT_EQ(testArraySize({std::nullopt}), 1); + EXPECT_EQ(testArraySize({std::nullopt, 1}), 2); +} + +TEST_F(SizeTest, real) { + EXPECT_EQ(testArraySize({}), 0); + EXPECT_EQ(testArraySize({1.1}), 1); + EXPECT_EQ(testArraySize({std::nullopt}), 1); + EXPECT_EQ(testArraySize({std::nullopt, 1.1}), 2); +} + +TEST_F(SizeTest, varchar) { + EXPECT_EQ(testArraySize({"red", "blue"}), 2); + EXPECT_EQ( + testArraySize({std::nullopt, "blue", "yellow", "orange"}), + 4); + EXPECT_EQ(testArraySize({}), 0); + EXPECT_EQ(testArraySize({std::nullopt}), 1); +} + +TEST_F(SizeTest, integer) { + EXPECT_EQ(testArraySize({1, 2}), 2); +} + +TEST_F(SizeTest, timestamp) { + auto ts = [](int64_t micros) { return Timestamp::fromMicros(micros); }; + EXPECT_EQ(testArraySize({}), 0); + EXPECT_EQ(testArraySize({std::nullopt}), 1); + EXPECT_EQ(testArraySize({ts(0), ts(1)}), 2); } } // namespace facebook::velox::functions::sparksql::test