From cce09b717e2ac363396f38909126b21fa6a8e580 Mon Sep 17 00:00:00 2001 From: Scott Determan Date: Tue, 5 Mar 2024 15:23:27 -0500 Subject: [PATCH] Fast base58 codec: (#4327) This algorithm is about an order of magnitude faster than the existing algorithm (about 10x faster for encoding and about 15x faster for decoding - including the double hash for the checksum). The algorithms use gcc's int128 (fast MS version will have to wait, in the meantime MS falls back to the slow code). --- Builds/CMake/RippledCore.cmake | 3 + src/ripple/protocol/impl/b58_utils.h | 192 +++++++++ src/ripple/protocol/impl/token_errors.h | 101 +++++ src/ripple/protocol/impl/tokens.cpp | 502 +++++++++++++++++++++++- src/ripple/protocol/tokens.h | 78 +++- src/test/basics/base58_test.cpp | 439 +++++++++++++++++++++ 6 files changed, 1296 insertions(+), 19 deletions(-) create mode 100644 src/ripple/protocol/impl/b58_utils.h create mode 100644 src/ripple/protocol/impl/token_errors.h create mode 100644 src/test/basics/base58_test.cpp diff --git a/Builds/CMake/RippledCore.cmake b/Builds/CMake/RippledCore.cmake index 1dabbe83bd6..efe44f820df 100644 --- a/Builds/CMake/RippledCore.cmake +++ b/Builds/CMake/RippledCore.cmake @@ -305,8 +305,10 @@ install ( DESTINATION include/ripple/protocol) install ( FILES + src/ripple/protocol/impl/b58_utils.h src/ripple/protocol/impl/STVar.h src/ripple/protocol/impl/secp256k1.h + src/ripple/protocol/impl/token_errors.h DESTINATION include/ripple/protocol/impl) install ( FILES @@ -887,6 +889,7 @@ if (tests) src/test/basics/StringUtilities_test.cpp src/test/basics/TaggedCache_test.cpp src/test/basics/XRPAmount_test.cpp + src/test/basics/base58_test.cpp src/test/basics/base64_test.cpp src/test/basics/base_uint_test.cpp src/test/basics/contract_test.cpp diff --git a/src/ripple/protocol/impl/b58_utils.h b/src/ripple/protocol/impl/b58_utils.h new file mode 100644 index 00000000000..c3bb0c03750 --- /dev/null +++ b/src/ripple/protocol/impl/b58_utils.h @@ -0,0 +1,192 @@ +//------------------------------------------------------------------------------ +/* + This file is part of rippled: https://github.com/ripple/rippled + Copyright (c) 2022 Ripple Labs Inc. + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef RIPPLE_PROTOCOL_B58_UTILS_H_INCLUDED +#define RIPPLE_PROTOCOL_B58_UTILS_H_INCLUDED + +#include + +#include +#include + +#include +#include +#include +#include +#include + +namespace ripple { + +template +using Result = boost::outcome_v2::result; + +#ifndef _MSC_VER +namespace b58_fast { +namespace detail { + +// This optimizes to what hand written asm would do (single divide) +[[nodiscard]] inline std::tuple +div_rem(std::uint64_t a, std::uint64_t b) +{ + return {a / b, a % b}; +} + +// This optimizes to what hand written asm would do (single multiply) +[[nodiscard]] inline std::tuple +carrying_mul(std::uint64_t a, std::uint64_t b, std::uint64_t carry) +{ + unsigned __int128 const x = a; + unsigned __int128 const y = b; + unsigned __int128 const c = x * y + carry; + return {c & 0xffff'ffff'ffff'ffff, c >> 64}; +} + +[[nodiscard]] inline std::tuple +carrying_add(std::uint64_t a, std::uint64_t b) +{ + unsigned __int128 const x = a; + unsigned __int128 const y = b; + unsigned __int128 const c = x + y; + return {c & 0xffff'ffff'ffff'ffff, c >> 64}; +} + +// Add a u64 to a "big uint" value inplace. +// The bigint value is stored with the smallest coefficients first +// (i.e a[0] is the 2^0 coefficient, a[n] is the 2^(64*n) coefficient) +// panics if overflows (this is a specialized adder for b58 decoding. +// it should never overflow). +inline void +inplace_bigint_add(std::span a, std::uint64_t b) +{ + if (a.size() <= 1) + { + ripple::LogicError("Input span too small for inplace_bigint_add"); + } + + std::uint64_t carry; + std::tie(a[0], carry) = carrying_add(a[0], b); + + for (auto& v : a.subspan(1)) + { + if (!carry) + { + return; + } + std::tie(v, carry) = carrying_add(v, 1); + } + if (carry) + { + LogicError("Overflow in inplace_bigint_add"); + } +} + +inline void +inplace_bigint_mul(std::span a, std::uint64_t b) +{ + if (a.empty()) + { + LogicError("Empty span passed to inplace_bigint_mul"); + } + + auto const last_index = a.size() - 1; + if (a[last_index] != 0) + { + LogicError("Non-zero element in inplace_bigint_mul last index"); + } + + std::uint64_t carry = 0; + for (auto& coeff : a.subspan(0, last_index)) + { + std::tie(coeff, carry) = carrying_mul(coeff, b, carry); + } + a[last_index] = carry; +} +// divide a "big uint" value inplace and return the mod +// numerator is stored so smallest coefficients come first +[[nodiscard]] inline std::uint64_t +inplace_bigint_div_rem(std::span numerator, std::uint64_t divisor) +{ + if (numerator.size() == 0) + { + // should never happen, but if it does then it seems natural to define + // the a null set of numbers to be zero, so the remainder is also zero. + assert(0); + return 0; + } + + auto to_u128 = [](std::uint64_t high, + std::uint64_t low) -> unsigned __int128 { + unsigned __int128 const high128 = high; + unsigned __int128 const low128 = low; + return ((high128 << 64) | low128); + }; + auto div_rem_64 = + [](unsigned __int128 num, + std::uint64_t denom) -> std::tuple { + unsigned __int128 const denom128 = denom; + unsigned __int128 const d = num / denom128; + unsigned __int128 const r = num - (denom128 * d); + assert(d >> 64 == 0); + assert(r >> 64 == 0); + return {static_cast(d), static_cast(r)}; + }; + + std::uint64_t prev_rem = 0; + int const last_index = numerator.size() - 1; + std::tie(numerator[last_index], prev_rem) = + div_rem(numerator[last_index], divisor); + for (int i = last_index - 1; i >= 0; --i) + { + unsigned __int128 const cur_num = to_u128(prev_rem, numerator[i]); + std::tie(numerator[i], prev_rem) = div_rem_64(cur_num, divisor); + } + return prev_rem; +} + +// convert from base 58^10 to base 58 +// put largest coeffs first +// the `_be` suffix stands for "big endian" +[[nodiscard]] inline std::array +b58_10_to_b58_be(std::uint64_t input) +{ + constexpr std::uint64_t B_58_10 = 430804206899405824; // 58^10; + if (input >= B_58_10) + { + LogicError("Input to b58_10_to_b58_be equals or exceeds 58^10."); + } + + constexpr std::size_t resultSize = 10; + std::array result{}; + int i = 0; + while (input > 0) + { + std::uint64_t rem; + std::tie(input, rem) = div_rem(input, 58); + result[resultSize - 1 - i] = rem; + i += 1; + } + + return result; +} +} // namespace detail +} // namespace b58_fast +#endif + +} // namespace ripple +#endif // RIPPLE_PROTOCOL_B58_UTILS_H_INCLUDED diff --git a/src/ripple/protocol/impl/token_errors.h b/src/ripple/protocol/impl/token_errors.h new file mode 100644 index 00000000000..59b09974149 --- /dev/null +++ b/src/ripple/protocol/impl/token_errors.h @@ -0,0 +1,101 @@ +//------------------------------------------------------------------------------ +/* + This file is part of rippled: https://github.com/ripple/rippled + Copyright (c) 2022 Ripple Labs Inc. + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef RIPPLE_PROTOCOL_TOKEN_ERRORS_H_INCLUDED +#define RIPPLE_PROTOCOL_TOKEN_ERRORS_H_INCLUDED + +#include + +namespace ripple { +enum class TokenCodecErrc { + success = 0, + inputTooLarge, + inputTooSmall, + badB58Character, + outputTooSmall, + mismatchedTokenType, + mismatchedChecksum, + invalidEncodingChar, + unknown, +}; +} + +namespace std { +template <> +struct is_error_code_enum : true_type +{ +}; +} // namespace std + +namespace ripple { +namespace detail { +class TokenCodecErrcCategory : public std::error_category +{ +public: + // Return a short descriptive name for the category + virtual const char* + name() const noexcept override final + { + return "TokenCodecError"; + } + // Return what each enum means in text + virtual std::string + message(int c) const override final + { + switch (static_cast(c)) + { + case TokenCodecErrc::success: + return "conversion successful"; + case TokenCodecErrc::inputTooLarge: + return "input too large"; + case TokenCodecErrc::inputTooSmall: + return "input too small"; + case TokenCodecErrc::badB58Character: + return "bad base 58 character"; + case TokenCodecErrc::outputTooSmall: + return "output too small"; + case TokenCodecErrc::mismatchedTokenType: + return "mismatched token type"; + case TokenCodecErrc::mismatchedChecksum: + return "mismatched checksum"; + case TokenCodecErrc::invalidEncodingChar: + return "invalid encoding char"; + case TokenCodecErrc::unknown: + return "unknown"; + default: + return "unknown"; + } + } +}; +} // namespace detail + +inline const ripple::detail::TokenCodecErrcCategory& +TokenCodecErrcCategory() +{ + static ripple::detail::TokenCodecErrcCategory c; + return c; +} + +inline std::error_code +make_error_code(ripple::TokenCodecErrc e) +{ + return {static_cast(e), TokenCodecErrcCategory()}; +} +} // namespace ripple +#endif // TOKEN_ERRORS_H_ diff --git a/src/ripple/protocol/impl/tokens.cpp b/src/ripple/protocol/impl/tokens.cpp index 816d49e40df..8445eec38ca 100644 --- a/src/ripple/protocol/impl/tokens.cpp +++ b/src/ripple/protocol/impl/tokens.cpp @@ -16,11 +16,25 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ //============================================================================== +// +/* The base58 encoding & decoding routines in the b58_ref namespace are taken + * from Bitcoin but have been modified from the original. + * + * Copyright (c) 2014 The Bitcoin Core developers + * Distributed under the MIT software license, see the accompanying + * file COPYING or http://www.opensource.org/licenses/mit-license.php. + */ + +#include #include #include -#include +#include + #include +#include +#include + #include #include #include @@ -28,6 +42,97 @@ #include #include +/* +Converting between bases is straight forward. First, some background: + +Given the coefficients C[m], ... ,C[0] and base B, those coefficients represent +the number C[m]*B^m + ... + C[0]*B^0; The following pseudo-code converts the +coefficients to the (infinite precision) integer N: + +``` +N = 0; +i = m ;; N.B. m is the index of the largest coefficient +while (i>=0) + N = N + C[i]*B^i + i = i - 1 +``` + +For example, in base 10, the number 437 represents the integer 4*10^2 + 3*10^1 + +7*10^0. In base 16, 437 is the same as 4*16^2 + 3*16^1 + 7*16^0. + +To find the coefficients that represent the integer N in base B, we start by +computing the lowest order coefficients and work up to the highest order +coefficients. The following pseudo-code converts the (infinite precision) +integer N to the correct coefficients: + +``` +i = 0 +while(N) + C[i] = N mod B + N = floor(N/B) + i = i + 1 +``` + +For example, to find the coefficients of the integer 437 in base 10: + +C[0] is 437 mod 10; C[0] = 7; +N is floor(437/10); N = 43; +C[1] is 43 mod 10; C[1] = 3; +N is floor(43/10); N = 4; +C[2] is 4 mod 10; C[2] = 4; +N is floor(4/10); N = 0; +Since N is 0, the algorithm stops. + + +To convert between a number represented with coefficients from base B1 to that +same number represented with coefficients from base B2, we can use the algorithm +that converts coefficients from base B1 to an integer, and then use the +algorithm that converts a number to coefficients from base B2. + +There is a useful shortcut that can be used if one of the bases is a power of +the other base. If B1 == B2^G, then each coefficient from base B1 can be +converted to base B2 independently to create a a group of "G" B2 coefficient. +These coefficients can be simply concatenated together. Since 16 == 2^4, this +property is what makes base 16 useful when dealing with binary numbers. For +example consider converting the base 16 number "93" to binary. The base 16 +coefficient 9 is represented in base 2 with the coefficients 1,0,0,1. The base +16 coefficient 3 is represented in base 2 with the coefficients 0,0,1,1. To get +the final answer, just concatenate those two independent conversions together. +The base 16 number "93" is the binary number "10010011". + +The original (now reference) algorithm to convert from base 58 to a binary +number used the + +``` +N = 0; +for i in m to 0 inclusive + N = N + C[i]*B^i +``` + +algorithm. + +However, the algorithm above is pseudo-code. In particular, the variable "N" is +an infinite precision integer in that pseudo-code. Real computers do +computations on registers, and these registers have limited length. Modern +computers use 64-bit general purpose registers, and can multiply two 64 bit +numbers and obtain a 128 bit result (in two registers). + +The original algorithm in essence converted from base 58 to base 256 (base +2^8). The new, faster algorithm converts from base 58 to base 58^10 (this is +fast using the shortcut described above), then from base 58^10 to base 2^64 +(this is slow, and requires multi-precision arithmetic), and then from base 2^64 +to base 2^8 (this is fast, using the shortcut described above). Base 58^10 is +chosen because it is the largest power of 58 that will fit into a 64-bit +register. + +While it may seem counter-intuitive that converting from base 58 -> base 58^10 +-> base 2^64 -> base 2^8 is faster than directly converting from base 58 -> base +2^8, it is actually 10x-15x faster. The reason for the speed increase is two of +the conversions are trivial (converting between bases where one base is a power +of another base), and doing the multi-precision computations with larger +coefficients sizes greatly speeds up the multi-precision computations. +*/ + namespace ripple { static constexpr char const* alphabetForward = @@ -86,16 +191,31 @@ checksum(void* out, void const* message, std::size_t size) std::memcpy(out, h.data(), 4); } +[[nodiscard]] std::string +encodeBase58Token(TokenType type, void const* token, std::size_t size) +{ +#ifndef _MSC_VER + return b58_fast::encodeBase58Token(type, token, size); +#else + return b58_ref::encodeBase58Token(type, token, size); +#endif +} + +[[nodiscard]] std::string +decodeBase58Token(std::string const& s, TokenType type) +{ +#ifndef _MSC_VER + return b58_fast::decodeBase58Token(s, type); +#else + return b58_ref::decodeBase58Token(s, type); +#endif +} + +namespace b58_ref { + namespace detail { -/* The base58 encoding & decoding routines in this namespace are taken from - * Bitcoin but have been modified from the original. - * - * Copyright (c) 2014 The Bitcoin Core developers - * Distributed under the MIT software license, see the accompanying - * file COPYING or http://www.opensource.org/licenses/mit-license.php. - */ -static std::string +std::string encodeBase58( void const* message, std::size_t size, @@ -146,7 +266,7 @@ encodeBase58( return str; } -static std::string +std::string decodeBase58(std::string const& s) { auto psz = reinterpret_cast(s.c_str()); @@ -241,5 +361,367 @@ decodeBase58Token(std::string const& s, TokenType type) // Skip the leading type byte and the trailing checksum. return ret.substr(1, ret.size() - 1 - guard.size()); } +} // namespace b58_ref + +#ifndef _MSC_VER +// The algorithms use gcc's int128 (fast MS version will have to wait, in the +// meantime MS falls back to the slower reference implementation) +namespace b58_fast { +namespace detail { +// Note: both the input and output will be BIG ENDIAN +B58Result> +b256_to_b58_be(std::span input, std::span out) +{ + // Max valid input is 38 bytes: + // (33 bytes for nodepublic + 1 byte token + 4 bytes checksum) + if (input.size() > 38) + { + return Unexpected(TokenCodecErrc::inputTooLarge); + }; + + auto count_leading_zeros = + [](std::span const& col) -> std::size_t { + std::size_t count = 0; + for (auto const& c : col) + { + if (c != 0) + { + return count; + } + count += 1; + } + return count; + }; + + auto const input_zeros = count_leading_zeros(input); + input = input.subspan(input_zeros); + + // Allocate enough base 2^64 coeff for encoding 38 bytes + // log(2^(38*8),2^64)) ~= 4.75. So 5 coeff are enough + std::array base_2_64_coeff_buf{}; + std::span const base_2_64_coeff = + [&]() -> std::span { + // convert input from big endian to native u64, lowest coeff first + std::size_t num_coeff = 0; + for (int i = 0; i < base_2_64_coeff_buf.size(); ++i) + { + if (i * 8 >= input.size()) + { + break; + } + auto const src_i_end = input.size() - i * 8; + if (src_i_end >= 8) + { + std::memcpy( + &base_2_64_coeff_buf[num_coeff], &input[src_i_end - 8], 8); + boost::endian::big_to_native_inplace( + base_2_64_coeff_buf[num_coeff]); + } + else + { + std::uint64_t be = 0; + for (int bi = 0; bi < src_i_end; ++bi) + { + be <<= 8; + be |= input[bi]; + } + base_2_64_coeff_buf[num_coeff] = be; + }; + num_coeff += 1; + } + return std::span(base_2_64_coeff_buf.data(), num_coeff); + }(); + + // Allocate enough base 58^10 coeff for encoding 38 bytes + // log(2^(38*8),58^10)) ~= 5.18. So 6 coeff are enough + std::array base_58_10_coeff{}; + constexpr std::uint64_t B_58_10 = 430804206899405824; // 58^10; + std::size_t num_58_10_coeffs = 0; + std::size_t cur_2_64_end = base_2_64_coeff.size(); + // compute the base 58^10 coeffs + while (cur_2_64_end > 0) + { + base_58_10_coeff[num_58_10_coeffs] = + ripple::b58_fast::detail::inplace_bigint_div_rem( + base_2_64_coeff.subspan(0, cur_2_64_end), B_58_10); + num_58_10_coeffs += 1; + if (base_2_64_coeff[cur_2_64_end - 1] == 0) + { + cur_2_64_end -= 1; + } + } + + // Translate the result into the alphabet + // Put all the zeros at the beginning, then all the values from the output + std::fill( + out.begin(), out.begin() + input_zeros, ::ripple::alphabetForward[0]); + + // iterate through the base 58^10 coeff + // convert to base 58 big endian then + // convert to alphabet big endian + bool skip_zeros = true; + auto out_index = input_zeros; + for (int i = num_58_10_coeffs - 1; i >= 0; --i) + { + if (skip_zeros && base_58_10_coeff[i] == 0) + { + continue; + } + std::array const b58_be = + ripple::b58_fast::detail::b58_10_to_b58_be(base_58_10_coeff[i]); + std::size_t to_skip = 0; + std::span b58_be_s{b58_be.data(), b58_be.size()}; + if (skip_zeros) + { + to_skip = count_leading_zeros(b58_be_s); + skip_zeros = false; + if (out.size() < (i + 1) * 10 - to_skip) + { + return Unexpected(TokenCodecErrc::outputTooSmall); + } + } + for (auto b58_coeff : b58_be_s.subspan(to_skip)) + { + out[out_index] = ::ripple::alphabetForward[b58_coeff]; + out_index += 1; + } + } + + return out.subspan(0, out_index); +} + +// Note the input is BIG ENDIAN (some fn in this module use little endian) +B58Result> +b58_to_b256_be(std::string_view input, std::span out) +{ + // Convert from b58 to b 58^10 + + // Max encoded value is 38 bytes + // log(2^(38*8),58) ~= 51.9 + if (input.size() > 52) + { + return Unexpected(TokenCodecErrc::inputTooLarge); + }; + if (out.size() < 8) + { + return Unexpected(TokenCodecErrc::outputTooSmall); + } + + auto count_leading_zeros = [&](auto const& col) -> std::size_t { + std::size_t count = 0; + for (auto const& c : col) + { + if (c != ::ripple::alphabetForward[0]) + { + return count; + } + count += 1; + } + return count; + }; + + auto const input_zeros = count_leading_zeros(input); + + // Allocate enough base 58^10 coeff for encoding 38 bytes + // (33 bytes for nodepublic + 1 byte token + 4 bytes checksum) + // log(2^(38*8),58^10)) ~= 5.18. So 6 coeff are enough + std::array b_58_10_coeff{}; + auto [num_full_coeffs, partial_coeff_len] = + ripple::b58_fast::detail::div_rem(input.size(), 10); + auto const num_partial_coeffs = partial_coeff_len ? 1 : 0; + auto const num_b_58_10_coeffs = num_full_coeffs + num_partial_coeffs; + assert(num_b_58_10_coeffs <= b_58_10_coeff.size()); + for (auto c : input.substr(0, partial_coeff_len)) + { + auto cur_val = ::ripple::alphabetReverse[c]; + if (cur_val < 0) + { + return Unexpected(TokenCodecErrc::invalidEncodingChar); + } + b_58_10_coeff[0] *= 58; + b_58_10_coeff[0] += cur_val; + } + for (int i = 0; i < 10; ++i) + { + for (int j = 0; j < num_full_coeffs; ++j) + { + auto c = input[partial_coeff_len + j * 10 + i]; + auto cur_val = ::ripple::alphabetReverse[c]; + if (cur_val < 0) + { + return Unexpected(TokenCodecErrc::invalidEncodingChar); + } + b_58_10_coeff[num_partial_coeffs + j] *= 58; + b_58_10_coeff[num_partial_coeffs + j] += cur_val; + } + } + + constexpr std::uint64_t B_58_10 = 430804206899405824; // 58^10; + + // log(2^(38*8),2^64) ~= 4.75) + std::array result{}; + result[0] = b_58_10_coeff[0]; + std::size_t cur_result_size = 1; + for (int i = 1; i < num_b_58_10_coeffs; ++i) + { + std::uint64_t const c = b_58_10_coeff[i]; + ripple::b58_fast::detail::inplace_bigint_mul( + std::span(&result[0], cur_result_size + 1), B_58_10); + ripple::b58_fast::detail::inplace_bigint_add( + std::span(&result[0], cur_result_size + 1), c); + if (result[cur_result_size] != 0) + { + cur_result_size += 1; + } + } + std::fill(out.begin(), out.begin() + input_zeros, 0); + auto cur_out_i = input_zeros; + // Don't write leading zeros to the output for the most significant + // coeff + { + std::uint64_t const c = result[cur_result_size - 1]; + auto skip_zero = true; + // start and end of output range + for (int i = 0; i < 8; ++i) + { + std::uint8_t const b = (c >> (8 * (7 - i))) & 0xff; + if (skip_zero) + { + if (b == 0) + { + continue; + } + skip_zero = false; + } + out[cur_out_i] = b; + cur_out_i += 1; + } + } + if ((cur_out_i + 8 * (cur_result_size - 1)) > out.size()) + { + return Unexpected(TokenCodecErrc::outputTooSmall); + } + + for (int i = cur_result_size - 2; i >= 0; --i) + { + auto c = result[i]; + boost::endian::native_to_big_inplace(c); + memcpy(&out[cur_out_i], &c, 8); + cur_out_i += 8; + } + + return out.subspan(0, cur_out_i); +} +} // namespace detail + +B58Result> +encodeBase58Token( + TokenType token_type, + std::span input, + std::span out) +{ + constexpr std::size_t tmpBufSize = 128; + std::array buf; + if (input.size() > tmpBufSize - 5) + { + return Unexpected(TokenCodecErrc::inputTooLarge); + } + if (input.size() == 0) + { + return Unexpected(TokenCodecErrc::inputTooSmall); + } + // + buf[0] = static_cast(token_type); + // buf[1..=input.len()] = input; + memcpy(&buf[1], input.data(), input.size()); + size_t const checksum_i = input.size() + 1; + // buf[checksum_i..checksum_i + 4] = checksum + checksum(buf.data() + checksum_i, buf.data(), checksum_i); + std::span b58Span(buf.data(), input.size() + 5); + return detail::b256_to_b58_be(b58Span, out); +} +// Convert from base 58 to base 256, largest coefficients first +// The input is encoded in XPRL format, with the token in the first +// byte and the checksum in the last four bytes. +// The decoded base 256 value does not include the token type or checksum. +// It is an error if the token type or checksum does not match. +B58Result> +decodeBase58Token( + TokenType type, + std::string_view s, + std::span outBuf) +{ + std::array tmpBuf; + auto const decodeResult = + detail::b58_to_b256_be(s, std::span(tmpBuf.data(), tmpBuf.size())); + + if (!decodeResult) + return decodeResult; + + auto const ret = decodeResult.value(); + + // Reject zero length tokens + if (ret.size() < 6) + return Unexpected(TokenCodecErrc::inputTooSmall); + + // The type must match. + if (type != static_cast(static_cast(ret[0]))) + return Unexpected(TokenCodecErrc::mismatchedTokenType); + + // And the checksum must as well. + std::array guard; + checksum(guard.data(), ret.data(), ret.size() - guard.size()); + if (!std::equal(guard.rbegin(), guard.rend(), ret.rbegin())) + { + return Unexpected(TokenCodecErrc::mismatchedChecksum); + } + + std::size_t const outSize = ret.size() - 1 - guard.size(); + if (outBuf.size() < outSize) + return Unexpected(TokenCodecErrc::outputTooSmall); + // Skip the leading type byte and the trailing checksum. + std::copy(ret.begin() + 1, ret.begin() + outSize + 1, outBuf.begin()); + return outBuf.subspan(0, outSize); +} + +[[nodiscard]] std::string +encodeBase58Token(TokenType type, void const* token, std::size_t size) +{ + std::string sr; + // The largest object encoded as base58 is 33 bytes; This will be encoded in + // at most ceil(log(2^256,58)) bytes, or 46 bytes. 128 is plenty (and + // there's not real benefit making it smaller). Note that 46 bytes may be + // encoded in more than 46 base58 chars. Since decode uses 64 as the + // over-allocation, this function uses 128 (again, over-allocation assuming + // 2 base 58 char per byte) + sr.resize(128); + std::span outSp( + reinterpret_cast(sr.data()), sr.size()); + std::span inSp( + reinterpret_cast(token), size); + auto r = b58_fast::encodeBase58Token(type, inSp, outSp); + if (!r) + return {}; + sr.resize(r.value().size()); + return sr; +} + +[[nodiscard]] std::string +decodeBase58Token(std::string const& s, TokenType type) +{ + std::string sr; + // The largest object encoded as base58 is 33 bytes; 64 is plenty (and + // there's no benefit making it smaller) + sr.resize(64); + std::span outSp( + reinterpret_cast(sr.data()), sr.size()); + auto r = b58_fast::decodeBase58Token(type, s, outSp); + if (!r) + return {}; + sr.resize(r.value().size()); + return sr; +} +} // namespace b58_fast +#endif // _MSC_VER } // namespace ripple diff --git a/src/ripple/protocol/tokens.h b/src/ripple/protocol/tokens.h index 0afb4509f41..f51c3f96f95 100644 --- a/src/ripple/protocol/tokens.h +++ b/src/ripple/protocol/tokens.h @@ -20,12 +20,21 @@ #ifndef RIPPLE_PROTOCOL_TOKENS_H_INCLUDED #define RIPPLE_PROTOCOL_TOKENS_H_INCLUDED +#include +#include +#include + #include #include +#include #include +#include namespace ripple { +template +using B58Result = Expected; + enum class TokenType : std::uint8_t { None = 1, // unused NodePublic = 28, @@ -38,11 +47,11 @@ enum class TokenType : std::uint8_t { }; template -std::optional +[[nodiscard]] std::optional parseBase58(std::string const& s); template -std::optional +[[nodiscard]] std::optional parseBase58(TokenType type, std::string const& s); /** Encode data in Base58Check format using XRPL alphabet @@ -56,20 +65,71 @@ parseBase58(TokenType type, std::string const& s); @return the encoded token. */ -std::string +[[nodiscard]] std::string encodeBase58Token(TokenType type, void const* token, std::size_t size); -/** Decode a token of given type encoded using Base58Check and the XRPL alphabet +[[nodiscard]] std::string +decodeBase58Token(std::string const& s, TokenType type); - @param s The encoded token - @param type The type expected for this token. +namespace b58_ref { +// The reference version does not use gcc extensions (int128 in particular) +[[nodiscard]] std::string +encodeBase58Token(TokenType type, void const* token, std::size_t size); + +[[nodiscard]] std::string +decodeBase58Token(std::string const& s, TokenType type); + +namespace detail { +// Expose detail functions for unit tests only +std::string +encodeBase58( + void const* message, + std::size_t size, + void* temp, + std::size_t temp_size); - @return If the encoded token decodes correctly, the token data without - the type or checksum. And empty string otherwise. -*/ std::string +decodeBase58(std::string const& s); +} // namespace detail +} // namespace b58_ref + +#ifndef _MSC_VER +namespace b58_fast { +// Use the fast version (10-15x faster) is using gcc extensions (int128 in +// particular) +[[nodiscard]] B58Result> +encodeBase58Token( + TokenType token_type, + std::span input, + std::span out); + +[[nodiscard]] B58Result> +decodeBase58Token( + TokenType type, + std::string_view s, + std::span outBuf); + +// This interface matches the old interface, but requires additional allocation +[[nodiscard]] std::string +encodeBase58Token(TokenType type, void const* token, std::size_t size); + +// This interface matches the old interface, but requires additional allocation +[[nodiscard]] std::string decodeBase58Token(std::string const& s, TokenType type); +namespace detail { +// Expose detail functions for unit tests only +B58Result> +b256_to_b58_be( + std::span input, + std::span out); + +B58Result> +b58_to_b256_be(std::string_view input, std::span out); +} // namespace detail + +} // namespace b58_fast +#endif // _MSC_VER } // namespace ripple #endif diff --git a/src/test/basics/base58_test.cpp b/src/test/basics/base58_test.cpp new file mode 100644 index 00000000000..6f3d495d7a9 --- /dev/null +++ b/src/test/basics/base58_test.cpp @@ -0,0 +1,439 @@ +//------------------------------------------------------------------------------ +/* + This file is part of rippled: https://github.com/ripple/rippled + Copyright (c) 2022 Ripple Labs Inc. + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef _MSC_VER + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +namespace ripple { +namespace test { +namespace { + +[[nodiscard]] inline auto +randEngine() -> std::mt19937& +{ + static std::mt19937 r = [] { + std::random_device rd; + return std::mt19937{rd()}; + }(); + return r; +} + +constexpr int numTokenTypeIndexes = 9; + +[[nodiscard]] inline auto +tokenTypeAndSize(int i) -> std::tuple +{ + assert(i < numTokenTypeIndexes); + + switch (i) + { + using enum ripple::TokenType; + case 0: + return {None, 20}; + case 1: + return {NodePublic, 32}; + case 2: + return {NodePublic, 33}; + case 3: + return {NodePrivate, 32}; + case 4: + return {AccountID, 20}; + case 5: + return {AccountPublic, 32}; + case 6: + return {AccountPublic, 33}; + case 7: + return {AccountSecret, 32}; + case 8: + return {FamilySeed, 16}; + default: + throw std::invalid_argument( + "Invalid token selection passed to tokenTypeAndSize() " + "in " __FILE__); + } +} + +[[nodiscard]] inline auto +randomTokenTypeAndSize() -> std::tuple +{ + using namespace ripple; + auto& rng = randEngine(); + std::uniform_int_distribution<> d(0, 8); + return tokenTypeAndSize(d(rng)); +} + +// Return the token type and subspan of `d` to use as test data. +[[nodiscard]] inline auto +randomB256TestData(std::span d) + -> std::tuple> +{ + auto& rng = randEngine(); + std::uniform_int_distribution dist(0, 255); + auto [tokType, tokSize] = randomTokenTypeAndSize(); + std::generate(d.begin(), d.begin() + tokSize, [&] { return dist(rng); }); + return {tokType, d.subspan(0, tokSize)}; +} + +inline void +printAsChar(std::span a, std::span b) +{ + auto asString = [](std::span s) { + std::string r; + r.resize(s.size()); + std::copy(s.begin(), s.end(), r.begin()); + return r; + }; + auto sa = asString(a); + auto sb = asString(b); + std::cerr << "\n\n" << sa << "\n" << sb << "\n"; +} + +inline void +printAsInt(std::span a, std::span b) +{ + auto asString = [](std::span s) -> std::string { + std::stringstream sstr; + for (auto i : s) + { + sstr << std::setw(3) << int(i) << ','; + } + return sstr.str(); + }; + auto sa = asString(a); + auto sb = asString(b); + std::cerr << "\n\n" << sa << "\n" << sb << "\n"; +} + +} // namespace + +namespace multiprecision_utils { + +boost::multiprecision::checked_uint512_t +toBoostMP(std::span in) +{ + boost::multiprecision::checked_uint512_t mbp = 0; + for (auto i = in.rbegin(); i != in.rend(); ++i) + { + mbp <<= 64; + mbp += *i; + } + return mbp; +} + +std::vector +randomBigInt(std::uint8_t minSize = 1, std::uint8_t maxSize = 5) +{ + auto eng = randEngine(); + std::uniform_int_distribution numCoeffDist(minSize, maxSize); + std::uniform_int_distribution dist; + auto const numCoeff = numCoeffDist(eng); + std::vector coeffs; + coeffs.reserve(numCoeff); + for (int i = 0; i < numCoeff; ++i) + { + coeffs.push_back(dist(eng)); + } + return coeffs; +} +} // namespace multiprecision_utils + +class base58_test : public beast::unit_test::suite +{ + void + testMultiprecision() + { + testcase("b58_multiprecision"); + + using namespace boost::multiprecision; + + constexpr std::size_t iters = 100000; + auto eng = randEngine(); + std::uniform_int_distribution dist; + for (int i = 0; i < iters; ++i) + { + std::uint64_t const d = dist(eng); + if (!d) + continue; + auto bigInt = multiprecision_utils::randomBigInt(); + auto const boostBigInt = multiprecision_utils::toBoostMP( + std::span(bigInt.data(), bigInt.size())); + + auto const refDiv = boostBigInt / d; + auto const refMod = boostBigInt % d; + + auto const mod = b58_fast::detail::inplace_bigint_div_rem( + std::span(bigInt.data(), bigInt.size()), d); + auto const foundDiv = multiprecision_utils::toBoostMP(bigInt); + BEAST_EXPECT(refMod.convert_to() == mod); + BEAST_EXPECT(foundDiv == refDiv); + } + for (int i = 0; i < iters; ++i) + { + std::uint64_t const d = dist(eng); + auto bigInt = multiprecision_utils::randomBigInt(/*minSize*/ 2); + if (bigInt[bigInt.size() - 1] == + std::numeric_limits::max()) + { + bigInt[bigInt.size() - 1] -= 1; // Prevent overflow + } + auto const boostBigInt = multiprecision_utils::toBoostMP( + std::span(bigInt.data(), bigInt.size())); + + auto const refAdd = boostBigInt + d; + + b58_fast::detail::inplace_bigint_add( + std::span(bigInt.data(), bigInt.size()), d); + auto const foundAdd = multiprecision_utils::toBoostMP(bigInt); + BEAST_EXPECT(refAdd == foundAdd); + } + for (int i = 0; i < iters; ++i) + { + std::uint64_t const d = dist(eng); + auto bigInt = multiprecision_utils::randomBigInt(/* minSize */ 2); + // inplace mul requires the most significant coeff to be zero to + // hold the result. + bigInt[bigInt.size() - 1] = 0; + auto const boostBigInt = multiprecision_utils::toBoostMP( + std::span(bigInt.data(), bigInt.size())); + + auto const refMul = boostBigInt * d; + + b58_fast::detail::inplace_bigint_mul( + std::span(bigInt.data(), bigInt.size()), d); + auto const foundMul = multiprecision_utils::toBoostMP(bigInt); + BEAST_EXPECT(refMul == foundMul); + } + } + + void + testFastMatchesRef() + { + testcase("fast_matches_ref"); + auto testRawEncode = [&](std::span const& b256Data) { + std::array b58ResultBuf[2]; + std::array, 2> b58Result; + + std::array b256ResultBuf[2]; + std::array, 2> b256Result; + for (int i = 0; i < 2; ++i) + { + std::span const outBuf{b58ResultBuf[i]}; + if (i == 0) + { + auto const r = ripple::b58_fast::detail::b256_to_b58_be( + b256Data, outBuf); + BEAST_EXPECT(r); + b58Result[i] = r.value(); + } + else + { + std::array tmpBuf; + std::string const s = ripple::b58_ref::detail::encodeBase58( + b256Data.data(), + b256Data.size(), + tmpBuf.data(), + tmpBuf.size()); + BEAST_EXPECT(s.size()); + b58Result[i] = outBuf.subspan(0, s.size()); + std::copy(s.begin(), s.end(), b58Result[i].begin()); + } + } + if (BEAST_EXPECT(b58Result[0].size() == b58Result[1].size())) + { + if (!BEAST_EXPECT( + memcmp( + b58Result[0].data(), + b58Result[1].data(), + b58Result[0].size()) == 0)) + { + printAsChar(b58Result[0], b58Result[1]); + } + } + + for (int i = 0; i < 2; ++i) + { + std::span const outBuf{ + b256ResultBuf[i].data(), b256ResultBuf[i].size()}; + if (i == 0) + { + std::string const in( + b58Result[i].data(), + b58Result[i].data() + b58Result[i].size()); + auto const r = + ripple::b58_fast::detail::b58_to_b256_be(in, outBuf); + BEAST_EXPECT(r); + b256Result[i] = r.value(); + } + else + { + std::string const st( + b58Result[i].begin(), b58Result[i].end()); + std::string const s = + ripple::b58_ref::detail::decodeBase58(st); + BEAST_EXPECT(s.size()); + b256Result[i] = outBuf.subspan(0, s.size()); + std::copy(s.begin(), s.end(), b256Result[i].begin()); + } + } + + if (BEAST_EXPECT(b256Result[0].size() == b256Result[1].size())) + { + if (!BEAST_EXPECT( + memcmp( + b256Result[0].data(), + b256Result[1].data(), + b256Result[0].size()) == 0)) + { + printAsInt(b256Result[0], b256Result[1]); + } + } + }; + + auto testTokenEncode = [&](ripple::TokenType const tokType, + std::span const& b256Data) { + std::array b58ResultBuf[2]; + std::array, 2> b58Result; + + std::array b256ResultBuf[2]; + std::array, 2> b256Result; + for (int i = 0; i < 2; ++i) + { + std::span const outBuf{ + b58ResultBuf[i].data(), b58ResultBuf[i].size()}; + if (i == 0) + { + auto const r = ripple::b58_fast::encodeBase58Token( + tokType, b256Data, outBuf); + BEAST_EXPECT(r); + b58Result[i] = r.value(); + } + else + { + std::string const s = ripple::b58_ref::encodeBase58Token( + tokType, b256Data.data(), b256Data.size()); + BEAST_EXPECT(s.size()); + b58Result[i] = outBuf.subspan(0, s.size()); + std::copy(s.begin(), s.end(), b58Result[i].begin()); + } + } + if (BEAST_EXPECT(b58Result[0].size() == b58Result[1].size())) + { + if (!BEAST_EXPECT( + memcmp( + b58Result[0].data(), + b58Result[1].data(), + b58Result[0].size()) == 0)) + { + printAsChar(b58Result[0], b58Result[1]); + } + } + + for (int i = 0; i < 2; ++i) + { + std::span const outBuf{ + b256ResultBuf[i].data(), b256ResultBuf[i].size()}; + if (i == 0) + { + std::string const in( + b58Result[i].data(), + b58Result[i].data() + b58Result[i].size()); + auto const r = ripple::b58_fast::decodeBase58Token( + tokType, in, outBuf); + BEAST_EXPECT(r); + b256Result[i] = r.value(); + } + else + { + std::string const st( + b58Result[i].begin(), b58Result[i].end()); + std::string const s = + ripple::b58_ref::decodeBase58Token(st, tokType); + BEAST_EXPECT(s.size()); + b256Result[i] = outBuf.subspan(0, s.size()); + std::copy(s.begin(), s.end(), b256Result[i].begin()); + } + } + + if (BEAST_EXPECT(b256Result[0].size() == b256Result[1].size())) + { + if (!BEAST_EXPECT( + memcmp( + b256Result[0].data(), + b256Result[1].data(), + b256Result[0].size()) == 0)) + { + printAsInt(b256Result[0], b256Result[1]); + } + } + }; + + auto testIt = [&](ripple::TokenType const tokType, + std::span const& b256Data) { + testRawEncode(b256Data); + testTokenEncode(tokType, b256Data); + }; + + // test every token type with data where every byte is the same and the + // bytes range from 0-255 + for (int i = 0; i < numTokenTypeIndexes; ++i) + { + std::array b256DataBuf; + auto const [tokType, tokSize] = tokenTypeAndSize(i); + for (int d = 0; d <= 255; ++d) + { + memset(b256DataBuf.data(), d, tokSize); + testIt(tokType, std::span(b256DataBuf.data(), tokSize)); + } + } + + // test with random data + constexpr std::size_t iters = 100000; + for (int i = 0; i < iters; ++i) + { + std::array b256DataBuf; + auto const [tokType, b256Data] = randomB256TestData(b256DataBuf); + testIt(tokType, b256Data); + } + } + + void + run() override + { + testMultiprecision(); + testFastMatchesRef(); + } +}; + +BEAST_DEFINE_TESTSUITE(base58, ripple_basics, ripple); + +} // namespace test +} // namespace ripple +#endif // _MSC_VER