diff --git a/velox/common/base/Macros.h b/velox/common/base/Macros.h index 763ebcc6074b..03e45f7ce1f9 100644 --- a/velox/common/base/Macros.h +++ b/velox/common/base/Macros.h @@ -33,3 +33,8 @@ #define VELOX_UNSUPPRESS_RETURN_LOCAL_ADDR_WARNING \ _Pragma("GCC diagnostic pop"); #endif + +#define VELOX_CONCAT(x, y) x##y +// Need this extra layer to expand __COUNTER__. +#define VELOX_VARNAME_IMPL(x, y) VELOX_CONCAT(x, y) +#define VELOX_VARNAME(x) VELOX_VARNAME_IMPL(x, __COUNTER__) diff --git a/velox/common/testutil/TestValue.h b/velox/common/testutil/TestValue.h index 8c75146fc6ee..ba988a6935bb 100644 --- a/velox/common/testutil/TestValue.h +++ b/velox/common/testutil/TestValue.h @@ -20,6 +20,7 @@ #include #include "velox/common/base/Exceptions.h" +#include "velox/common/base/Macros.h" namespace facebook::velox::common::testutil { @@ -109,11 +110,8 @@ inline void TestValue::adjust( void* testData) {} #endif -#define VELOX_CONCAT(x, y) __##x##y -#define VELOX_VARNAME(x) VELOX_CONCAT(x, Obj) - #define SCOPED_TESTVALUE_SET(point, ...) \ ::facebook::velox::common::testutil::ScopedTestValue VELOX_VARNAME( \ - __LINE__)(point, ##__VA_ARGS__) + _scopedTestValue)(point, ##__VA_ARGS__) } // namespace facebook::velox::common::testutil diff --git a/velox/functions/prestosql/SIMDJsonFunctions.h b/velox/functions/prestosql/SIMDJsonFunctions.h index 3238aa1b2e7e..a4341e507ad8 100644 --- a/velox/functions/prestosql/SIMDJsonFunctions.h +++ b/velox/functions/prestosql/SIMDJsonFunctions.h @@ -182,26 +182,32 @@ struct SIMDJsonExtractScalarFunction { // We should just get a single value, if we see multiple, it's an error // and we should return null. resultStr = std::nullopt; - return; + return true; } resultPopulated = true; - switch (v.type()) { - case simdjson::ondemand::json_type::boolean: - resultStr = v.get_bool().value() ? "true" : "false"; + SIMDJSON_ASSIGN_OR_RAISE(auto vtype, v.type()); + switch (vtype) { + case simdjson::ondemand::json_type::boolean: { + SIMDJSON_ASSIGN_OR_RAISE(bool vbool, v.get_bool()); + resultStr = vbool ? "true" : "false"; break; - case simdjson::ondemand::json_type::string: - resultStr = v.get_string().value(); + } + case simdjson::ondemand::json_type::string: { + SIMDJSON_ASSIGN_OR_RAISE(resultStr, v.get_string()); break; + } case simdjson::ondemand::json_type::object: case simdjson::ondemand::json_type::array: case simdjson::ondemand::json_type::null: // Do nothing. break; - default: - resultStr = simdjson::to_json_string(v).value(); + default: { + SIMDJSON_ASSIGN_OR_RAISE(resultStr, simdjson::to_json_string(v)); + } } + return true; }; if (!simdJsonExtract(json, jsonPath, consumer)) { @@ -237,22 +243,32 @@ struct SIMDJsonExtractFunction { // We could just convert v to a string using to_json_string directly, but // in that case the JSON wouldn't be parsed (it would just return the // contents directly) and we might miss invalid JSON. - switch (v.type()) { - case simdjson::ondemand::json_type::object: - results += simdjson::to_json_string(v.get_object()).value(); + SIMDJSON_ASSIGN_OR_RAISE(auto vtype, v.type()); + switch (vtype) { + case simdjson::ondemand::json_type::object: { + SIMDJSON_ASSIGN_OR_RAISE( + auto jsonStr, simdjson::to_json_string(v.get_object())); + results += jsonStr; break; - case simdjson::ondemand::json_type::array: - results += simdjson::to_json_string(v.get_array()).value(); + } + case simdjson::ondemand::json_type::array: { + SIMDJSON_ASSIGN_OR_RAISE( + auto jsonStr, simdjson::to_json_string(v.get_array())); + results += jsonStr; break; + } case simdjson::ondemand::json_type::string: case simdjson::ondemand::json_type::number: - case simdjson::ondemand::json_type::boolean: - results += simdjson::to_json_string(v).value(); + case simdjson::ondemand::json_type::boolean: { + SIMDJSON_ASSIGN_OR_RAISE(auto jsonStr, simdjson::to_json_string(v)); + results += jsonStr; break; + } case simdjson::ondemand::json_type::null: results += kNullString; break; } + return true; }; if (!simdJsonExtract(json, jsonPath, consumer)) { @@ -294,13 +310,16 @@ struct SIMDJsonSizeFunction { // We only need the size of the actual object if there's only one // returned, if multiple are returned we use the number of objects // returned instead. - switch (v.type()) { - case simdjson::ondemand::json_type::object: - singleResultSize = v.count_fields().value(); + SIMDJSON_ASSIGN_OR_RAISE(auto vtype, v.type()); + switch (vtype) { + case simdjson::ondemand::json_type::object: { + SIMDJSON_ASSIGN_OR_RAISE(singleResultSize, v.count_fields()); break; - case simdjson::ondemand::json_type::array: - singleResultSize = v.count_elements().value(); + } + case simdjson::ondemand::json_type::array: { + SIMDJSON_ASSIGN_OR_RAISE(singleResultSize, v.count_elements()); break; + } case simdjson::ondemand::json_type::string: case simdjson::ondemand::json_type::number: case simdjson::ondemand::json_type::boolean: @@ -309,6 +328,7 @@ struct SIMDJsonSizeFunction { break; } } + return true; }; if (!simdJsonExtract(json, jsonPath, consumer)) { diff --git a/velox/functions/prestosql/json/SIMDJsonExtractor.cpp b/velox/functions/prestosql/json/SIMDJsonExtractor.cpp index 83f7e8d4d69a..5d6de816f841 100644 --- a/velox/functions/prestosql/json/SIMDJsonExtractor.cpp +++ b/velox/functions/prestosql/json/SIMDJsonExtractor.cpp @@ -42,8 +42,8 @@ namespace facebook::velox::functions::detail { return *it.first->second; } -simdjson::ondemand::document SIMDJsonExtractor::parse( - const simdjson::padded_string& json) { +simdjson::simdjson_result +SIMDJsonExtractor::parse(const simdjson::padded_string& json) { thread_local static simdjson::ondemand::parser parser; return parser.iterate(json); } @@ -71,23 +71,26 @@ bool SIMDJsonExtractor::tokenize(const std::string& path) { return true; } -void extractObject( - simdjson::ondemand::value& jsonObj, +bool extractObject( + simdjson::ondemand::value& jsonValue, const std::string& key, std::optional& ret) { - for (auto field : jsonObj.get_object()) { - if (field.unescaped_key().value() == key) { + SIMDJSON_ASSIGN_OR_RAISE(auto jsonObj, jsonValue.get_object()); + for (auto field : jsonObj) { + SIMDJSON_ASSIGN_OR_RAISE(auto currentKey, field.unescaped_key()); + if (currentKey == key) { ret.emplace(field.value()); - return; + return true; } } + return true; } -void extractArray( +bool extractArray( simdjson::ondemand::value& jsonValue, const std::string& index, std::optional& ret) { - auto jsonArray = jsonValue.get_array(); + SIMDJSON_ASSIGN_OR_RAISE(auto jsonArray, jsonValue.get_array()); auto rv = folly::tryTo(index); if (rv.hasValue()) { auto val = jsonArray.at(rv.value()); @@ -95,5 +98,6 @@ void extractArray( ret.emplace(std::move(val)); } } + return true; } } // namespace facebook::velox::functions::detail diff --git a/velox/functions/prestosql/json/SIMDJsonExtractor.h b/velox/functions/prestosql/json/SIMDJsonExtractor.h index 9ff1f18b2a3b..5dc4c9a48e2b 100644 --- a/velox/functions/prestosql/json/SIMDJsonExtractor.h +++ b/velox/functions/prestosql/json/SIMDJsonExtractor.h @@ -21,10 +21,21 @@ #include "folly/Range.h" #include "folly/dynamic.h" #include "velox/common/base/Exceptions.h" +#include "velox/common/base/Macros.h" #include "velox/functions/prestosql/json/JsonPathTokenizer.h" #include "velox/functions/prestosql/json/SIMDJsonWrapper.h" #include "velox/type/StringView.h" +#define SIMDJSON_ASSIGN_OR_RAISE_IMPL(_resultName, _lhs, _rexpr) \ + auto&& _resultName = (_rexpr); \ + if (_resultName.error() != ::simdjson::SUCCESS) { \ + return false; \ + } \ + _lhs = std::move(_resultName).value_unsafe(); + +#define SIMDJSON_ASSIGN_OR_RAISE(_lhs, _rexpr) \ + SIMDJSON_ASSIGN_OR_RAISE_IMPL(VELOX_VARNAME(_simdjsonResult), _lhs, _rexpr) + namespace facebook::velox::functions { template @@ -40,7 +51,7 @@ using JsonVector = std::vector; class SIMDJsonExtractor { public: template - void extract( + bool extract( simdjson::ondemand::value& json, TConsumer& consumer, size_t tokenStartIndex = 0); @@ -50,7 +61,8 @@ class SIMDJsonExtractor { return tokens_.empty(); } - simdjson::ondemand::document parse(const simdjson::padded_string& json); + simdjson::simdjson_result parse( + const simdjson::padded_string& json); private: // Use this method to get an instance of SIMDJsonExtractor given a JSON path. @@ -79,18 +91,18 @@ class SIMDJsonExtractor { TConsumer&& consumer); }; -void extractObject( +bool extractObject( simdjson::ondemand::value& jsonObj, const std::string& key, std::optional& ret); -void extractArray( +bool extractArray( simdjson::ondemand::value& jsonValue, const std::string& index, std::optional& ret); template -void SIMDJsonExtractor::extract( +bool SIMDJsonExtractor::extract( simdjson::ondemand::value& json, TConsumer& consumer, size_t tokenStartIndex) { @@ -102,35 +114,43 @@ void SIMDJsonExtractor::extract( tokenIndex++) { auto& token = tokens_[tokenIndex]; if (input.type() == simdjson::ondemand::json_type::object) { - extractObject(input, token, result); + if (!extractObject(input, token, result)) { + return false; + } } else if (input.type() == simdjson::ondemand::json_type::array) { if (token == "*") { for (auto child : input.get_array()) { if (tokenIndex == tokens_.size() - 1) { // If this is the last token in the path, consume each element in // the array. - consumer(child.value()); + if (!consumer(child.value())) { + return false; + } } else { // If not, then recursively call the extract function on each // element in the array. - extract(child.value(), consumer, tokenIndex + 1); + if (!extract(child.value(), consumer, tokenIndex + 1)) { + return false; + } } } - return; + return true; } else { - extractArray(input, token, result); + if (!extractArray(input, token, result)) { + return false; + } } } if (!result) { - return; + return true; } input = result.value(); result.reset(); } - consumer(input); + return consumer(input); } } // namespace detail @@ -158,30 +178,20 @@ bool simdJsonExtract( const velox::StringView& json, const velox::StringView& path, TConsumer&& consumer) { - try { - // If extractor fails to parse the path, this will throw a VeloxUserError, - // and we want to let this exception bubble up to the client. We only catch - // JSON parsing failures (in which cases we return folly::none instead of - // throw). - auto& extractor = detail::SIMDJsonExtractor::getInstance(path); - simdjson::padded_string paddedJson(json.data(), json.size()); - simdjson::ondemand::document jsonDoc = extractor.parse(paddedJson); - - if (extractor.isRootOnlyPath()) { - // If the path is just to return the original object, call consumer on the - // document. Note, we cannot convert this to a value as this is not - // supported if the object is a scalar. - consumer(jsonDoc); - } else { - auto value = jsonDoc.get_value().value(); - extractor.extract(value, std::forward(consumer)); - } - } catch (const simdjson::simdjson_error&) { - // simdjson might throw a conversion error while parsing the input JSON. - return false; + // If extractor fails to parse the path, this will throw a VeloxUserError, and + // we want to let this exception bubble up to the client. + auto& extractor = detail::SIMDJsonExtractor::getInstance(path); + simdjson::padded_string paddedJson(json.data(), json.size()); + SIMDJSON_ASSIGN_OR_RAISE(auto jsonDoc, extractor.parse(paddedJson)); + + if (extractor.isRootOnlyPath()) { + // If the path is just to return the original object, call consumer on the + // document. Note, we cannot convert this to a value as this is not + // supported if the object is a scalar. + return consumer(jsonDoc); } - - return true; + SIMDJSON_ASSIGN_OR_RAISE(auto value, jsonDoc.get_value()); + return extractor.extract(value, std::forward(consumer)); } template diff --git a/velox/functions/prestosql/json/tests/SIMDJsonExtractorTest.cpp b/velox/functions/prestosql/json/tests/SIMDJsonExtractorTest.cpp index 783bdcebba19..0cb955fcb309 100644 --- a/velox/functions/prestosql/json/tests/SIMDJsonExtractorTest.cpp +++ b/velox/functions/prestosql/json/tests/SIMDJsonExtractorTest.cpp @@ -47,7 +47,9 @@ class SIMDJsonExtractorTest : public testing::Test { const std::optional>& expected) { std::vector res; auto consumer = [&res](auto& v) { - res.push_back(std::string(simdjson::to_json_string(v).value())); + SIMDJSON_ASSIGN_OR_RAISE(auto jsonStr, simdjson::to_json_string(v)); + res.emplace_back(jsonStr); + return true; }; EXPECT_TRUE(simdJsonExtract(json, path, consumer)) @@ -78,26 +80,32 @@ class SIMDJsonExtractorTest : public testing::Test { // We expect a single value, if consumer gets called multiple times, // e.g. the path contains [*], return null. actual = std::nullopt; - return; + return true; } resultPopulated = true; - switch (v.type()) { - case simdjson::ondemand::json_type::boolean: - actual = v.get_bool().value() ? "true" : "false"; + SIMDJSON_ASSIGN_OR_RAISE(auto vtype, v.type()); + switch (vtype) { + case simdjson::ondemand::json_type::boolean: { + SIMDJSON_ASSIGN_OR_RAISE(bool vbool, v.get_bool()); + actual = vbool ? "true" : "false"; break; - case simdjson::ondemand::json_type::string: - actual = v.get_string().value(); + } + case simdjson::ondemand::json_type::string: { + SIMDJSON_ASSIGN_OR_RAISE(actual, v.get_string()); break; + } case simdjson::ondemand::json_type::object: case simdjson::ondemand::json_type::array: case simdjson::ondemand::json_type::null: // Do nothing. break; - default: - actual = simdjson::to_json_string(v).value(); + default: { + SIMDJSON_ASSIGN_OR_RAISE(actual, simdjson::to_json_string(v)); + } } + return true; }; EXPECT_TRUE(simdJsonExtract(json, path, consumer)) @@ -455,7 +463,8 @@ TEST_F(SIMDJsonExtractorTest, reextractJsonTest) { std::string extract; std::string ret; auto consumer = [&ret](auto& v) { - ret = simdjson::to_json_string(v).value(); + SIMDJSON_ASSIGN_OR_RAISE(ret, simdjson::to_json_string(v)); + return true; }; simdJsonExtract(json, "$", consumer); @@ -500,7 +509,8 @@ TEST_F(SIMDJsonExtractorTest, jsonMultipleExtractsTest) { std::string extract2; std::string ret; auto consumer = [&ret](auto& v) { - ret = simdjson::to_json_string(v).value(); + SIMDJSON_ASSIGN_OR_RAISE(ret, simdjson::to_json_string(v)); + return true; }; simdJsonExtract(json, "$.store", consumer); @@ -513,7 +523,7 @@ TEST_F(SIMDJsonExtractorTest, jsonMultipleExtractsTest) { TEST_F(SIMDJsonExtractorTest, invalidJson) { // No-op consumer. - auto consumer = [](auto& /* unused */) {}; + auto consumer = [](auto& /* unused */) { return true; }; // Object key is invalid. std::string json = "{\"foo: \"bar\"}";