From cfbc583e1a5d19c3b141b87f5080e58ae96bd8bd Mon Sep 17 00:00:00 2001 From: FourierTransformer Date: Sat, 3 Aug 2024 19:38:23 -0500 Subject: [PATCH] updated to simdjson 3.10.0 --- README.md | 4 +- ....rockspec => lua-simdjson-0.0.4-1.rockspec | 4 +- src/luasimdjson.cpp | 2 +- src/simdjson.cpp | 46 +- src/simdjson.h | 648 ++++++++++++++++-- 5 files changed, 611 insertions(+), 93 deletions(-) rename lua-simdjson-0.0.3-1.rockspec => lua-simdjson-0.0.4-1.rockspec (96%) diff --git a/README.md b/README.md index 460c26f..e759ea4 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ A basic Lua binding to [simdjson](https://simdjson.org). The simdjson library is an incredibly fast JSON parser that uses SIMD instructions and fancy algorithms to parse JSON very quickly. It's been tested with LuaJIT 2.0/2.1 and Lua 5.1, 5.2, 5.3, and 5.4 on linux/osx/windows. It has a general parsing mode and a lazy mode that uses a JSON pointer. -Current simdjson version: 3.9.5 +Current simdjson version: 3.10.0 ## Installation If all the requirements are met, lua-simdjson can be install via luarocks with: @@ -87,7 +87,7 @@ local fileResponse = simdjson.openFile("jsonexamples/twitter.json") print(fileResponse:atPointer("/statuses/0/id")) --using a JSON pointer ``` -Starting with version 0.5.0, the `atPointer` method is JSON pointer compliant. The previous pointer implementation is considered deprecated, but is still available with the `at` method. +Starting with version 0.2.0, the `atPointer` method is JSON pointer compliant. The previous pointer implementation is considered deprecated, but is still available with the `at` method. The `open` and `parse` codeblocks should print out the same values. It's worth noting that the JSON pointer indexes from 0. diff --git a/lua-simdjson-0.0.3-1.rockspec b/lua-simdjson-0.0.4-1.rockspec similarity index 96% rename from lua-simdjson-0.0.3-1.rockspec rename to lua-simdjson-0.0.4-1.rockspec index 5b934d0..1bc65a8 100644 --- a/lua-simdjson-0.0.3-1.rockspec +++ b/lua-simdjson-0.0.4-1.rockspec @@ -1,8 +1,8 @@ package="lua-simdjson" -version="0.0.3-1" +version="0.0.4-1" source = { url = "git://github.com/FourierTransformer/lua-simdjson", - tag = "0.0.3" + tag = "0.0.4" } description = { summary = "This is a simple Lua binding for simdjson", diff --git a/src/luasimdjson.cpp b/src/luasimdjson.cpp index dc4893c..d415e34 100644 --- a/src/luasimdjson.cpp +++ b/src/luasimdjson.cpp @@ -15,7 +15,7 @@ #include "luasimdjson.h" #define LUA_SIMDJSON_NAME "simdjson" -#define LUA_SIMDJSON_VERSION "0.0.3" +#define LUA_SIMDJSON_VERSION "0.0.4" using namespace simdjson; diff --git a/src/simdjson.cpp b/src/simdjson.cpp index 375b9b7..b5dd724 100644 --- a/src/simdjson.cpp +++ b/src/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2024-07-04 16:26:22 -0400. Do not edit! */ +/* auto-generated on 2024-08-01 09:31:50 -0400. Do not edit! */ /* including simdjson.cpp: */ /* begin file simdjson.cpp */ #define SIMDJSON_SRC_SIMDJSON_CPP @@ -40,6 +40,16 @@ #endif #endif +// C++ 23 +#if !defined(SIMDJSON_CPLUSPLUS23) && (SIMDJSON_CPLUSPLUS >= 202302L) +#define SIMDJSON_CPLUSPLUS23 1 +#endif + +// C++ 20 +#if !defined(SIMDJSON_CPLUSPLUS20) && (SIMDJSON_CPLUSPLUS >= 202002L) +#define SIMDJSON_CPLUSPLUS20 1 +#endif + // C++ 17 #if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L) #define SIMDJSON_CPLUSPLUS17 1 @@ -5954,7 +5964,7 @@ class dom_parser_implementation { * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. * @@ -5971,7 +5981,7 @@ class dom_parser_implementation { * Unescape a NON-valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. * @@ -12482,7 +12492,7 @@ simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementa } parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); /*** - * The On Demand API requires special padding. + * The On-Demand API requires special padding. * * This is related to https://github.com/simdjson/simdjson/issues/906 * Basically, we want to make sure that if the parsing continues beyond the last (valid) @@ -13357,7 +13367,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ @@ -18701,7 +18711,7 @@ simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementa } parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); /*** - * The On Demand API requires special padding. + * The On-Demand API requires special padding. * * This is related to https://github.com/simdjson/simdjson/issues/906 * Basically, we want to make sure that if the parsing continues beyond the last (valid) @@ -19576,7 +19586,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ @@ -24913,7 +24923,7 @@ simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementa } parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); /*** - * The On Demand API requires special padding. + * The On-Demand API requires special padding. * * This is related to https://github.com/simdjson/simdjson/issues/906 * Basically, we want to make sure that if the parsing continues beyond the last (valid) @@ -25788,7 +25798,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ @@ -31396,7 +31406,7 @@ simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementa } parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); /*** - * The On Demand API requires special padding. + * The On-Demand API requires special padding. * * This is related to https://github.com/simdjson/simdjson/issues/906 * Basically, we want to make sure that if the parsing continues beyond the last (valid) @@ -32271,7 +32281,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ @@ -38453,7 +38463,7 @@ simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementa } parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); /*** - * The On Demand API requires special padding. + * The On-Demand API requires special padding. * * This is related to https://github.com/simdjson/simdjson/issues/906 * Basically, we want to make sure that if the parsing continues beyond the last (valid) @@ -39328,7 +39338,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ @@ -44477,7 +44487,7 @@ simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementa } parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); /*** - * The On Demand API requires special padding. + * The On-Demand API requires special padding. * * This is related to https://github.com/simdjson/simdjson/issues/906 * Basically, we want to make sure that if the parsing continues beyond the last (valid) @@ -45352,7 +45362,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ @@ -50492,7 +50502,7 @@ simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementa } parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); /*** - * The On Demand API requires special padding. + * The On-Demand API requires special padding. * * This is related to https://github.com/simdjson/simdjson/issues/906 * Basically, we want to make sure that if the parsing continues beyond the last (valid) @@ -51367,7 +51377,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ @@ -54568,7 +54578,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ diff --git a/src/simdjson.h b/src/simdjson.h index 0912825..ddb6f2e 100644 --- a/src/simdjson.h +++ b/src/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on 2024-07-04 16:26:22 -0400. Do not edit! */ +/* auto-generated on 2024-08-01 09:31:50 -0400. Do not edit! */ /* including simdjson.h: */ /* begin file simdjson.h */ #ifndef SIMDJSON_H @@ -60,6 +60,16 @@ #endif #endif +// C++ 23 +#if !defined(SIMDJSON_CPLUSPLUS23) && (SIMDJSON_CPLUSPLUS >= 202302L) +#define SIMDJSON_CPLUSPLUS23 1 +#endif + +// C++ 20 +#if !defined(SIMDJSON_CPLUSPLUS20) && (SIMDJSON_CPLUSPLUS >= 202002L) +#define SIMDJSON_CPLUSPLUS20 1 +#endif + // C++ 17 #if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L) #define SIMDJSON_CPLUSPLUS17 1 @@ -2356,7 +2366,7 @@ namespace std { #define SIMDJSON_SIMDJSON_VERSION_H /** The version of simdjson being used (major.minor.revision) */ -#define SIMDJSON_VERSION "3.9.5" +#define SIMDJSON_VERSION "3.10.0" namespace simdjson { enum { @@ -2367,11 +2377,11 @@ enum { /** * The minor version (major.MINOR.revision) of simdjson being used. */ - SIMDJSON_VERSION_MINOR = 9, + SIMDJSON_VERSION_MINOR = 10, /** * The revision (major.minor.REVISION) of simdjson being used. */ - SIMDJSON_VERSION_REVISION = 5 + SIMDJSON_VERSION_REVISION = 0 }; } // namespace simdjson @@ -3099,7 +3109,7 @@ class dom_parser_implementation { * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. * @@ -3116,7 +3126,7 @@ class dom_parser_implementation { * Unescape a NON-valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. * @@ -4279,6 +4289,11 @@ class array { */ inline simdjson_result at(size_t index) const noexcept; + /** + * Implicitly convert object to element + */ + inline operator element() const noexcept; + private: simdjson_inline array(const internal::tape_ref &tape) noexcept; internal::tape_ref tape; @@ -6160,6 +6175,11 @@ class object { */ inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + /** + * Implicitly convert object to element + */ + inline operator element() const noexcept; + private: simdjson_inline object(const internal::tape_ref &tape) noexcept; @@ -6771,6 +6791,10 @@ inline simdjson_result array::at(size_t index) const noexcept { return INDEX_OUT_OF_BOUNDS; } +inline array::operator element() const noexcept { + return element(tape); +} + // // array::iterator inline implementation // @@ -6979,6 +7003,10 @@ inline simdjson_result object::at_key_case_insensitive(std::string_view return NO_SUCH_FIELD; } +inline object::operator element() const noexcept { + return element(tape); +} + // // object::iterator inline implementation // @@ -33659,8 +33687,8 @@ namespace arm64 { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -33973,6 +34001,17 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -35620,21 +35659,37 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; /** * Get the unprocessed key as a string_view. This includes the quotes and may include - * some spaces after the last quote. + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). */ simdjson_inline std::string_view key_raw_json_token() const noexcept; /** * Get the key as a string_view. This does not include the quotes and * the string is unprocessed key so it may contain escape characters - * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. */ simdjson_inline std::string_view escaped_key() const noexcept; /** @@ -35668,6 +35723,8 @@ struct simdjson_result : public arm64::implementation_si simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result key_raw_json_token() noexcept; simdjson_inline simdjson_result escaped_key() noexcept; @@ -35842,8 +35899,8 @@ class object { /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -37895,6 +37952,14 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; @@ -37962,6 +38027,12 @@ simdjson_inline simdjson_result simdjson_result +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); @@ -38329,11 +38400,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -39220,6 +39303,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -44065,8 +44153,8 @@ namespace fallback { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -44379,6 +44467,17 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -46026,21 +46125,37 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; /** * Get the unprocessed key as a string_view. This includes the quotes and may include - * some spaces after the last quote. + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). */ simdjson_inline std::string_view key_raw_json_token() const noexcept; /** * Get the key as a string_view. This does not include the quotes and * the string is unprocessed key so it may contain escape characters - * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. */ simdjson_inline std::string_view escaped_key() const noexcept; /** @@ -46074,6 +46189,8 @@ struct simdjson_result : public fallback::implementat simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result key_raw_json_token() noexcept; simdjson_inline simdjson_result escaped_key() noexcept; @@ -46248,8 +46365,8 @@ class object { /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -48301,6 +48418,14 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; @@ -48368,6 +48493,12 @@ simdjson_inline simdjson_result simdjson_result +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); @@ -48735,11 +48866,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -49626,6 +49769,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -54963,8 +55111,8 @@ namespace haswell { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -55277,6 +55425,17 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -56924,21 +57083,37 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; /** * Get the unprocessed key as a string_view. This includes the quotes and may include - * some spaces after the last quote. + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). */ simdjson_inline std::string_view key_raw_json_token() const noexcept; /** * Get the key as a string_view. This does not include the quotes and * the string is unprocessed key so it may contain escape characters - * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. */ simdjson_inline std::string_view escaped_key() const noexcept; /** @@ -56972,6 +57147,8 @@ struct simdjson_result : public haswell::implementatio simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result key_raw_json_token() noexcept; simdjson_inline simdjson_result escaped_key() noexcept; @@ -57146,8 +57323,8 @@ class object { /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -59199,6 +59376,14 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; @@ -59266,6 +59451,12 @@ simdjson_inline simdjson_result simdjson_result +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); @@ -59633,11 +59824,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -60524,6 +60727,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -65860,8 +66068,8 @@ namespace icelake { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -66174,6 +66382,17 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -67821,21 +68040,37 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; /** * Get the unprocessed key as a string_view. This includes the quotes and may include - * some spaces after the last quote. + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). */ simdjson_inline std::string_view key_raw_json_token() const noexcept; /** * Get the key as a string_view. This does not include the quotes and * the string is unprocessed key so it may contain escape characters - * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. */ simdjson_inline std::string_view escaped_key() const noexcept; /** @@ -67869,6 +68104,8 @@ struct simdjson_result : public icelake::implementatio simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result key_raw_json_token() noexcept; simdjson_inline simdjson_result escaped_key() noexcept; @@ -68043,8 +68280,8 @@ class object { /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -70096,6 +70333,14 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; @@ -70163,6 +70408,12 @@ simdjson_inline simdjson_result simdjson_result +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); @@ -70530,11 +70781,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -71421,6 +71684,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -76872,8 +77140,8 @@ namespace ppc64 { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -77186,6 +77454,17 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -78833,21 +79112,37 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; /** * Get the unprocessed key as a string_view. This includes the quotes and may include - * some spaces after the last quote. + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). */ simdjson_inline std::string_view key_raw_json_token() const noexcept; /** * Get the key as a string_view. This does not include the quotes and * the string is unprocessed key so it may contain escape characters - * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. */ simdjson_inline std::string_view escaped_key() const noexcept; /** @@ -78881,6 +79176,8 @@ struct simdjson_result : public ppc64::implementation_si simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result key_raw_json_token() noexcept; simdjson_inline simdjson_result escaped_key() noexcept; @@ -79055,8 +79352,8 @@ class object { /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -81108,6 +81405,14 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; @@ -81175,6 +81480,12 @@ simdjson_inline simdjson_result simdjson_result +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); @@ -81542,11 +81853,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -82433,6 +82756,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -88207,8 +88535,8 @@ namespace westmere { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -88521,6 +88849,17 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -90168,21 +90507,37 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; /** * Get the unprocessed key as a string_view. This includes the quotes and may include - * some spaces after the last quote. + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). */ simdjson_inline std::string_view key_raw_json_token() const noexcept; /** * Get the key as a string_view. This does not include the quotes and * the string is unprocessed key so it may contain escape characters - * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. */ simdjson_inline std::string_view escaped_key() const noexcept; /** @@ -90216,6 +90571,8 @@ struct simdjson_result : public westmere::implementat simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result key_raw_json_token() noexcept; simdjson_inline simdjson_result escaped_key() noexcept; @@ -90390,8 +90747,8 @@ class object { /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -92443,6 +92800,14 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; @@ -92510,6 +92875,12 @@ simdjson_inline simdjson_result simdjson_result +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); @@ -92877,11 +93248,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -93768,6 +94151,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -99013,8 +99401,8 @@ namespace lsx { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -99327,6 +99715,17 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -100974,21 +101373,37 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; /** * Get the unprocessed key as a string_view. This includes the quotes and may include - * some spaces after the last quote. + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). */ simdjson_inline std::string_view key_raw_json_token() const noexcept; /** * Get the key as a string_view. This does not include the quotes and * the string is unprocessed key so it may contain escape characters - * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. */ simdjson_inline std::string_view escaped_key() const noexcept; /** @@ -101022,6 +101437,8 @@ struct simdjson_result : public lsx::implementation_simdjs simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result key_raw_json_token() noexcept; simdjson_inline simdjson_result escaped_key() noexcept; @@ -101196,8 +101613,8 @@ class object { /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -103249,6 +103666,14 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; @@ -103316,6 +103741,12 @@ simdjson_inline simdjson_result simdjson_result +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); @@ -103683,11 +104114,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -104574,6 +105017,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -109832,8 +110280,8 @@ namespace lasx { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -110146,6 +110594,17 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -111793,21 +112252,37 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; /** * Get the unprocessed key as a string_view. This includes the quotes and may include - * some spaces after the last quote. + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). */ simdjson_inline std::string_view key_raw_json_token() const noexcept; /** * Get the key as a string_view. This does not include the quotes and * the string is unprocessed key so it may contain escape characters - * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. */ simdjson_inline std::string_view escaped_key() const noexcept; /** @@ -111841,6 +112316,8 @@ struct simdjson_result : public lasx::implementation_simd simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result key_raw_json_token() noexcept; simdjson_inline simdjson_result escaped_key() noexcept; @@ -112015,8 +112492,8 @@ class object { /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -114068,6 +114545,14 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; @@ -114135,6 +114620,12 @@ simdjson_inline simdjson_result simdjson_result +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); @@ -114502,11 +114993,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -115393,6 +115896,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; }