From c5455ce0d39b1efe15f7e6d60917eb5434ed2a70 Mon Sep 17 00:00:00 2001 From: Quinton Miller Date: Sat, 21 Dec 2024 19:20:09 +0800 Subject: [PATCH] Implement `fast_float` for `String#to_f` (#15195) This is a source port of https://github.com/fastfloat/fast_float, which is both locale-independent and platform-independent, meaning the special float values will work on MSYS2's MINGW64 environment too, as we are not calling `LibC.strtod` anymore. Additionally, non-ASCII whitespace characters are now stripped, just like `#to_i`. **The current implementation doesn't accept hexfloats.** This implementation brings a roughly 3x speedup, without any additional allocations. --- spec/manual/string_to_f32_spec.cr | 27 + spec/manual/string_to_f_supplemental_spec.cr | 103 +++ spec/std/string_spec.cr | 4 + spec/support/number.cr | 32 + src/float/fast_float.cr | 75 ++ src/float/fast_float/ascii_number.cr | 270 +++++++ src/float/fast_float/bigint.cr | 577 +++++++++++++++ src/float/fast_float/decimal_to_binary.cr | 177 +++++ src/float/fast_float/digit_comparison.cr | 399 +++++++++++ src/float/fast_float/fast_table.cr | 695 +++++++++++++++++++ src/float/fast_float/float_common.cr | 294 ++++++++ src/float/fast_float/parse_number.cr | 197 ++++++ src/lib_c/x86_64-windows-msvc/c/stdlib.cr | 4 +- src/string.cr | 67 +- 14 files changed, 2859 insertions(+), 62 deletions(-) create mode 100644 spec/manual/string_to_f32_spec.cr create mode 100644 spec/manual/string_to_f_supplemental_spec.cr create mode 100644 src/float/fast_float.cr create mode 100644 src/float/fast_float/ascii_number.cr create mode 100644 src/float/fast_float/bigint.cr create mode 100644 src/float/fast_float/decimal_to_binary.cr create mode 100644 src/float/fast_float/digit_comparison.cr create mode 100644 src/float/fast_float/fast_table.cr create mode 100644 src/float/fast_float/float_common.cr create mode 100644 src/float/fast_float/parse_number.cr diff --git a/spec/manual/string_to_f32_spec.cr b/spec/manual/string_to_f32_spec.cr new file mode 100644 index 000000000000..6d0940b1190c --- /dev/null +++ b/spec/manual/string_to_f32_spec.cr @@ -0,0 +1,27 @@ +require "spec" + +# Exhaustively checks that for all 4294967296 possible `Float32` values, +# `to_s.to_f32` returns the original number. Splits the floats into 4096 bins +# for better progress tracking. Also useful as a sort of benchmark. +# +# This was originally added when `String#to_f` moved from `LibC.strtod` to +# `fast_float`, but is applicable to any other implementation as well. +describe "x.to_s.to_f32 == x" do + (0_u32..0xFFF_u32).each do |i| + it "%03x00000..%03xfffff" % {i, i} do + 0x100000.times do |j| + bits = i << 20 | j + float = bits.unsafe_as(Float32) + str = float.to_s + val = str.to_f32?.should_not be_nil + + if float.nan? + val.nan?.should be_true + else + val.should eq(float) + Math.copysign(1, val).should eq(Math.copysign(1, float)) + end + end + end + end +end diff --git a/spec/manual/string_to_f_supplemental_spec.cr b/spec/manual/string_to_f_supplemental_spec.cr new file mode 100644 index 000000000000..1b016e22c86a --- /dev/null +++ b/spec/manual/string_to_f_supplemental_spec.cr @@ -0,0 +1,103 @@ +# Runs the fast_float supplemental test suite: +# https://github.com/fastfloat/supplemental_test_files +# +# Supplemental data files for testing floating parsing (credit: Nigel Tao for +# the data) +# +# LICENSE file (Apache 2): https://github.com/nigeltao/parse-number-fxx-test-data/blob/main/LICENSE +# +# Due to the sheer volume of the test cases (5.2+ million test cases across +# 270+ MB of text) these specs are not vendored into the Crystal repository. + +require "spec" +require "http/client" +require "../support/number" +require "wait_group" + +# these specs permit underflow and overflow to return 0 and infinity +# respectively (when `ret.rc == Errno::ERANGE`), so we have to use +# `Float::FastFloat` directly +def fast_float_to_f32(str) + value = uninitialized Float32 + start = str.to_unsafe + finish = start + str.bytesize + options = Float::FastFloat::ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general) + + ret = Float::FastFloat::BinaryFormat_Float32.new.from_chars_advanced(start, finish, pointerof(value), options) + {Errno::NONE, Errno::ERANGE}.should contain(ret.ec) + value +end + +def fast_float_to_f64(str) + value = uninitialized Float64 + start = str.to_unsafe + finish = start + str.bytesize + options = Float::FastFloat::ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general) + + ret = Float::FastFloat::BinaryFormat_Float64.new.from_chars_advanced(start, finish, pointerof(value), options) + {Errno::NONE, Errno::ERANGE}.should contain(ret.ec) + value +end + +RAW_BASE_URL = "https://raw.githubusercontent.com/fastfloat/supplemental_test_files/7cc512a7c60361ebe1baf54991d7905efdc62aa0/data/" # @1.0.0 + +TEST_SUITES = %w( + freetype-2-7.txt + google-double-conversion.txt + google-wuffs.txt + ibm-fpgen.txt + lemire-fast-double-parser.txt + lemire-fast-float.txt + more-test-cases.txt + remyoudompheng-fptest-0.txt + remyoudompheng-fptest-1.txt + remyoudompheng-fptest-2.txt + remyoudompheng-fptest-3.txt + tencent-rapidjson.txt + ulfjack-ryu.txt +) + +test_suite_cache = {} of String => Array({UInt32, UInt64, String}) +puts "Fetching #{TEST_SUITES.size} test suites" +WaitGroup.wait do |wg| + TEST_SUITES.each do |suite| + wg.spawn do + url = RAW_BASE_URL + suite + + cache = HTTP::Client.get(url) do |res| + res.body_io.each_line.map do |line| + args = line.split(' ') + raise "BUG: should have 4 args" unless args.size == 4 + + # f16_bits = args[0].to_u16(16) + f32_bits = args[1].to_u32(16) + f64_bits = args[2].to_u64(16) + str = args[3] + + {f32_bits, f64_bits, str} + end.to_a + end + + puts "#{cache.size} test cases cached from #{url}" + test_suite_cache[suite] = cache + end + end +end +puts "There are a total of #{test_suite_cache.sum(&.last.size)} test cases" + +describe String do + describe "#to_f" do + test_suite_cache.each do |suite, cache| + describe suite do + each_hardware_rounding_mode do |mode, mode_name| + it mode_name do + cache.each do |f32_bits, f64_bits, str| + fast_float_to_f32(str).unsafe_as(UInt32).should eq(f32_bits) + fast_float_to_f64(str).unsafe_as(UInt64).should eq(f64_bits) + end + end + end + end + end + end +end diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr index 2bbc63f7e18e..72e05adab458 100644 --- a/spec/std/string_spec.cr +++ b/spec/std/string_spec.cr @@ -482,6 +482,7 @@ describe "String" do it { "1Y2P0IJ32E8E7".to_i64(36).should eq(9223372036854775807) } end + # more specs are available in `spec/manual/string_to_f_supplemental_spec.cr` it "does to_f" do expect_raises(ArgumentError) { "".to_f } "".to_f?.should be_nil @@ -503,6 +504,7 @@ describe "String" do " 1234.56 ".to_f?(whitespace: false).should be_nil expect_raises(ArgumentError) { " 1234.56foo".to_f } " 1234.56foo".to_f?.should be_nil + "\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f.should eq(1234.56_f64) "123.45 x".to_f64(strict: false).should eq(123.45_f64) expect_raises(ArgumentError) { "x1.2".to_f64 } "x1.2".to_f64?.should be_nil @@ -547,6 +549,7 @@ describe "String" do " 1234.56 ".to_f32?(whitespace: false).should be_nil expect_raises(ArgumentError) { " 1234.56foo".to_f32 } " 1234.56foo".to_f32?.should be_nil + "\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f32.should eq(1234.56_f32) "123.45 x".to_f32(strict: false).should eq(123.45_f32) expect_raises(ArgumentError) { "x1.2".to_f32 } "x1.2".to_f32?.should be_nil @@ -590,6 +593,7 @@ describe "String" do " 1234.56 ".to_f64?(whitespace: false).should be_nil expect_raises(ArgumentError) { " 1234.56foo".to_f64 } " 1234.56foo".to_f64?.should be_nil + "\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f64.should eq(1234.56_f64) "123.45 x".to_f64(strict: false).should eq(123.45_f64) expect_raises(ArgumentError) { "x1.2".to_f64 } "x1.2".to_f64?.should be_nil diff --git a/spec/support/number.cr b/spec/support/number.cr index 4ec22f9dcf87..404d2bd32438 100644 --- a/spec/support/number.cr +++ b/spec/support/number.cr @@ -94,3 +94,35 @@ macro hexfloat(str) ::Float64.parse_hexfloat({{ str }}) {% end %} end + +# See also: https://github.com/crystal-lang/crystal/issues/15192 +lib LibC + {% if flag?(:win32) %} + FE_TONEAREST = 0x00000000 + FE_DOWNWARD = 0x00000100 + FE_UPWARD = 0x00000200 + FE_TOWARDZERO = 0x00000300 + {% else %} + FE_TONEAREST = 0x00000000 + FE_DOWNWARD = 0x00000400 + FE_UPWARD = 0x00000800 + FE_TOWARDZERO = 0x00000C00 + {% end %} + + fun fegetround : Int + fun fesetround(round : Int) : Int +end + +def with_hardware_rounding_mode(mode, &) + old_mode = LibC.fegetround + LibC.fesetround(mode) + yield ensure LibC.fesetround(old_mode) +end + +def each_hardware_rounding_mode(&) + {% for mode in %w(FE_TONEAREST FE_DOWNWARD FE_UPWARD FE_TOWARDZERO) %} + with_hardware_rounding_mode(LibC::{{ mode.id }}) do + yield LibC::{{ mode.id }}, {{ mode }} + end + {% end %} +end diff --git a/src/float/fast_float.cr b/src/float/fast_float.cr new file mode 100644 index 000000000000..010476db4bca --- /dev/null +++ b/src/float/fast_float.cr @@ -0,0 +1,75 @@ +struct Float + # :nodoc: + # Source port of the floating-point part of fast_float for C++: + # https://github.com/fastfloat/fast_float + # + # fast_float implements the C++17 `std::from_chars`, which accepts a subset of + # the C `strtod` / `strtof`'s string format: + # + # - a leading plus sign is disallowed, but both fast_float and this port + # accept it; + # - the exponent may be required or disallowed, depending on the format + # argument (this port always allows both); + # - hexfloats are not enabled by default, and fast_float doesn't implement it; + # (https://github.com/fastfloat/fast_float/issues/124) + # - hexfloats cannot start with `0x` or `0X`. + # + # The following is their license: + # + # Licensed under either of Apache License, Version 2.0 or MIT license or + # BOOST license. + # + # Unless you explicitly state otherwise, any contribution intentionally + # submitted for inclusion in this repository by you, as defined in the + # Apache-2.0 license, shall be triple licensed as above, without any + # additional terms or conditions. + # + # Main differences from the original fast_float: + # + # - Only `UC == UInt8` is implemented and tested, not the other wide chars; + # - No explicit SIMD (the original mainly uses this for wide char strings). + # + # The following compile-time configuration is assumed: + # + # - #define FASTFLOAT_ALLOWS_LEADING_PLUS + # - #define FLT_EVAL_METHOD 0 + module FastFloat + # Current revision: https://github.com/fastfloat/fast_float/tree/v6.1.6 + + def self.to_f64?(str : String, whitespace : Bool, strict : Bool) : Float64? + value = uninitialized Float64 + start = str.to_unsafe + finish = start + str.bytesize + options = ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general) + + if whitespace + start += str.calc_excess_left + finish -= str.calc_excess_right + end + + ret = BinaryFormat_Float64.new.from_chars_advanced(start, finish, pointerof(value), options) + if ret.ec == Errno::NONE && (!strict || ret.ptr == finish) + value + end + end + + def self.to_f32?(str : String, whitespace : Bool, strict : Bool) : Float32? + value = uninitialized Float32 + start = str.to_unsafe + finish = start + str.bytesize + options = ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general) + + if whitespace + start += str.calc_excess_left + finish -= str.calc_excess_right + end + + ret = BinaryFormat_Float32.new.from_chars_advanced(start, finish, pointerof(value), options) + if ret.ec == Errno::NONE && (!strict || ret.ptr == finish) + value + end + end + end +end + +require "./fast_float/parse_number" diff --git a/src/float/fast_float/ascii_number.cr b/src/float/fast_float/ascii_number.cr new file mode 100644 index 000000000000..1c4b43ea4b7d --- /dev/null +++ b/src/float/fast_float/ascii_number.cr @@ -0,0 +1,270 @@ +require "./float_common" + +module Float::FastFloat + # Next function can be micro-optimized, but compilers are entirely able to + # optimize it well. + def self.is_integer?(c : UC) : Bool forall UC + !(c > '9'.ord || c < '0'.ord) + end + + # Read 8 UC into a u64. Truncates UC if not char. + def self.read8_to_u64(chars : UC*) : UInt64 forall UC + val = uninitialized UInt64 + chars.as(UInt8*).copy_to(pointerof(val).as(UInt8*), sizeof(UInt64)) + {% if IO::ByteFormat::SystemEndian == IO::ByteFormat::BigEndian %} + val.byte_swap + {% else %} + val + {% end %} + end + + # credit @aqrit + def self.parse_eight_digits_unrolled(val : UInt64) : UInt32 + mask = 0x000000FF000000FF_u64 + mul1 = 0x000F424000000064_u64 # 100 + (1000000ULL << 32) + mul2 = 0x0000271000000001_u64 # 1 + (10000ULL << 32) + val &-= 0x3030303030303030 + val = (val &* 10) &+ val.unsafe_shr(8) # val = (val * 2561) >> 8 + val = (((val & mask) &* mul1) &+ ((val.unsafe_shr(16) & mask) &* mul2)).unsafe_shr(32) + val.to_u32! + end + + # Call this if chars are definitely 8 digits. + def self.parse_eight_digits_unrolled(chars : UC*) : UInt32 forall UC + parse_eight_digits_unrolled(read8_to_u64(chars)) + end + + # credit @aqrit + def self.is_made_of_eight_digits_fast?(val : UInt64) : Bool + ((val &+ 0x4646464646464646_u64) | (val &- 0x3030303030303030_u64)) & 0x8080808080808080_u64 == 0 + end + + # NOTE(crystal): returns {p, i} + def self.loop_parse_if_eight_digits(p : UInt8*, pend : UInt8*, i : UInt64) : {UInt8*, UInt64} + # optimizes better than parse_if_eight_digits_unrolled() for UC = char. + while pend - p >= 8 && is_made_of_eight_digits_fast?(read8_to_u64(p)) + i = i &* 100000000 &+ parse_eight_digits_unrolled(read8_to_u64(p)) # in rare cases, this will overflow, but that's ok + p += 8 + end + {p, i} + end + + enum ParseError + NoError + + # [JSON-only] The minus sign must be followed by an integer. + MissingIntegerAfterSign + + # A sign must be followed by an integer or dot. + MissingIntegerOrDotAfterSign + + # [JSON-only] The integer part must not have leading zeros. + LeadingZerosInIntegerPart + + # [JSON-only] The integer part must have at least one digit. + NoDigitsInIntegerPart + + # [JSON-only] If there is a decimal point, there must be digits in the + # fractional part. + NoDigitsInFractionalPart + + # The mantissa must have at least one digit. + NoDigitsInMantissa + + # Scientific notation requires an exponential part. + MissingExponentialPart + end + + struct ParsedNumberStringT(UC) + property exponent : Int64 = 0 + property mantissa : UInt64 = 0 + property lastmatch : UC* = Pointer(UC).null + property negative : Bool = false + property valid : Bool = false + property too_many_digits : Bool = false + # contains the range of the significant digits + property integer : Slice(UC) = Slice(UC).empty # non-nullable + property fraction : Slice(UC) = Slice(UC).empty # nullable + property error : ParseError = :no_error + end + + alias ByteSpan = ::Bytes + alias ParsedNumberString = ParsedNumberStringT(UInt8) + + def self.report_parse_error(p : UC*, error : ParseError) : ParsedNumberStringT(UC) forall UC + answer = ParsedNumberStringT(UC).new + answer.valid = false + answer.lastmatch = p + answer.error = error + answer + end + + # Assuming that you use no more than 19 digits, this will parse an ASCII + # string. + def self.parse_number_string(p : UC*, pend : UC*, options : ParseOptionsT(UC)) : ParsedNumberStringT(UC) forall UC + fmt = options.format + decimal_point = options.decimal_point + + answer = ParsedNumberStringT(UInt8).new + answer.valid = false + answer.too_many_digits = false + answer.negative = p.value === '-' + + if p.value === '-' || (!fmt.json_fmt? && p.value === '+') + p += 1 + if p == pend + return report_parse_error(p, :missing_integer_or_dot_after_sign) + end + if fmt.json_fmt? + if !is_integer?(p.value) # a sign must be followed by an integer + return report_parse_error(p, :missing_integer_after_sign) + end + else + if !is_integer?(p.value) && p.value != decimal_point # a sign must be followed by an integer or the dot + return report_parse_error(p, :missing_integer_or_dot_after_sign) + end + end + end + start_digits = p + + i = 0_u64 # an unsigned int avoids signed overflows (which are bad) + + while p != pend && is_integer?(p.value) + # a multiplication by 10 is cheaper than an arbitrary integer multiplication + i = i &* 10 &+ (p.value &- '0'.ord).to_u64! # might overflow, we will handle the overflow later + p += 1 + end + end_of_integer_part = p + digit_count = (end_of_integer_part - start_digits).to_i32! + answer.integer = Slice.new(start_digits, digit_count) + if fmt.json_fmt? + # at least 1 digit in integer part, without leading zeros + if digit_count == 0 + return report_parse_error(p, :no_digits_in_integer_part) + end + if start_digits[0] === '0' && digit_count > 1 + return report_parse_error(p, :leading_zeros_in_integer_part) + end + end + + exponent = 0_i64 + has_decimal_point = p != pend && p.value == decimal_point + if has_decimal_point + p += 1 + before = p + # can occur at most twice without overflowing, but let it occur more, since + # for integers with many digits, digit parsing is the primary bottleneck. + p, i = loop_parse_if_eight_digits(p, pend, i) + + while p != pend && is_integer?(p.value) + digit = (p.value &- '0'.ord).to_u8! + p += 1 + i = i &* 10 &+ digit # in rare cases, this will overflow, but that's ok + end + exponent = before - p + answer.fraction = Slice.new(before, (p - before).to_i32!) + digit_count &-= exponent + end + if fmt.json_fmt? + # at least 1 digit in fractional part + if has_decimal_point && exponent == 0 + return report_parse_error(p, :no_digits_in_fractional_part) + end + elsif digit_count == 0 # we must have encountered at least one integer! + return report_parse_error(p, :no_digits_in_mantissa) + end + exp_number = 0_i64 # explicit exponential part + if (fmt.scientific? && p != pend && p.value.unsafe_chr.in?('e', 'E')) || + (fmt.fortran_fmt? && p != pend && p.value.unsafe_chr.in?('+', '-', 'd', 'D')) + location_of_e = p + if p.value.unsafe_chr.in?('e', 'E', 'd', 'D') + p += 1 + end + neg_exp = false + if p != pend && p.value === '-' + neg_exp = true + p += 1 + elsif p != pend && p.value === '+' # '+' on exponent is allowed by C++17 20.19.3.(7.1) + p += 1 + end + if p == pend || !is_integer?(p.value) + if !fmt.fixed? + # The exponential part is invalid for scientific notation, so it must + # be a trailing token for fixed notation. However, fixed notation is + # disabled, so report a scientific notation error. + return report_parse_error(p, :missing_exponential_part) + end + # Otherwise, we will be ignoring the 'e'. + p = location_of_e + else + while p != pend && is_integer?(p.value) + digit = (p.value &- '0'.ord).to_u8! + if exp_number < 0x10000000 + exp_number = exp_number &* 10 &+ digit + end + p += 1 + end + if neg_exp + exp_number = 0_i64 &- exp_number + end + exponent &+= exp_number + end + else + # If it scientific and not fixed, we have to bail out. + if fmt.scientific? && !fmt.fixed? + return report_parse_error(p, :missing_exponential_part) + end + end + answer.lastmatch = p + answer.valid = true + + # If we frequently had to deal with long strings of digits, + # we could extend our code by using a 128-bit integer instead + # of a 64-bit integer. However, this is uncommon. + # + # We can deal with up to 19 digits. + if digit_count > 19 # this is uncommon + # It is possible that the integer had an overflow. + # We have to handle the case where we have 0.0000somenumber. + # We need to be mindful of the case where we only have zeroes... + # E.g., 0.000000000...000. + start = start_digits + while start != pend && (start.value === '0' || start.value == decimal_point) + if start.value === '0' + digit_count &-= 1 + end + start += 1 + end + + if digit_count > 19 + answer.too_many_digits = true + # Let us start again, this time, avoiding overflows. + # We don't need to check if is_integer, since we use the + # pre-tokenized spans from above. + i = 0_u64 + p = answer.integer.to_unsafe + int_end = p + answer.integer.size + minimal_nineteen_digit_integer = 1000000000000000000_u64 + while i < minimal_nineteen_digit_integer && p != int_end + i = i &* 10 &+ (p.value &- '0'.ord).to_u64! + p += 1 + end + if i >= minimal_nineteen_digit_integer # We have a big integers + exponent = (end_of_integer_part - p) &+ exp_number + else # We have a value with a fractional component. + p = answer.fraction.to_unsafe + frac_end = p + answer.fraction.size + while i < minimal_nineteen_digit_integer && p != frac_end + i = i &* 10 &+ (p.value &- '0'.ord).to_u64! + p += 1 + end + exponent = (answer.fraction.to_unsafe - p) &+ exp_number + end + # We have now corrected both exponent and i, to a truncated value + end + end + answer.exponent = exponent + answer.mantissa = i + answer + end +end diff --git a/src/float/fast_float/bigint.cr b/src/float/fast_float/bigint.cr new file mode 100644 index 000000000000..14b0bb2d0549 --- /dev/null +++ b/src/float/fast_float/bigint.cr @@ -0,0 +1,577 @@ +require "./float_common" + +module Float::FastFloat + # the limb width: we want efficient multiplication of double the bits in + # limb, or for 64-bit limbs, at least 64-bit multiplication where we can + # extract the high and low parts efficiently. this is every 64-bit + # architecture except for sparc, which emulates 128-bit multiplication. + # we might have platforms where `CHAR_BIT` is not 8, so let's avoid + # doing `8 * sizeof(limb)`. + {% if flag?(:bits64) %} + alias Limb = UInt64 + LIMB_BITS = 64 + {% else %} + alias Limb = UInt32 + LIMB_BITS = 32 + {% end %} + + alias LimbSpan = Slice(Limb) + + # number of bits in a bigint. this needs to be at least the number + # of bits required to store the largest bigint, which is + # `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or + # ~3600 bits, so we round to 4000. + BIGINT_BITS = 4000 + {% begin %} + BIGINT_LIMBS = {{ BIGINT_BITS // LIMB_BITS }} + {% end %} + + # vector-like type that is allocated on the stack. the entire + # buffer is pre-allocated, and only the length changes. + # NOTE(crystal): Deviates a lot from the original implementation to reuse + # `Indexable` as much as possible. Contrast with `Crystal::SmallDeque` and + # `Crystal::Tracing::BufferIO` + struct Stackvec(Size) + include Indexable::Mutable(Limb) + + @data = uninitialized Limb[Size] + + # we never need more than 150 limbs + @length = 0_u16 + + def unsafe_fetch(index : Int) : Limb + @data.to_unsafe[index] + end + + def unsafe_put(index : Int, value : Limb) : Limb + @data.to_unsafe[index] = value + end + + def size : Int32 + @length.to_i32! + end + + def to_unsafe : Limb* + @data.to_unsafe + end + + def to_slice : LimbSpan + LimbSpan.new(@data.to_unsafe, @length) + end + + def initialize + end + + # create stack vector from existing limb span. + def initialize(s : LimbSpan) + try_extend(s) + end + + # index from the end of the container + def rindex(index : Int) : Limb + rindex = @length &- index &- 1 + @data.to_unsafe[rindex] + end + + # set the length, without bounds checking. + def size=(@length : UInt16) : UInt16 + length + end + + def capacity : Int32 + Size.to_i32! + end + + # append item to vector, without bounds checking. + def push_unchecked(value : Limb) : Nil + @data.to_unsafe[@length] = value + @length &+= 1 + end + + # append item to vector, returning if item was added + def try_push(value : Limb) : Bool + if size < capacity + push_unchecked(value) + true + else + false + end + end + + # add items to the vector, from a span, without bounds checking + def extend_unchecked(s : LimbSpan) : Nil + ptr = @data.to_unsafe + @length + s.to_unsafe.copy_to(ptr, s.size) + @length &+= s.size + end + + # try to add items to the vector, returning if items were added + def try_extend(s : LimbSpan) : Bool + if size &+ s.size <= capacity + extend_unchecked(s) + true + else + false + end + end + + # resize the vector, without bounds checking + # if the new size is longer than the vector, assign value to each + # appended item. + def resize_unchecked(new_len : UInt16, value : Limb) : Nil + if new_len > @length + count = new_len &- @length + first = @data.to_unsafe + @length + count.times { |i| first[i] = value } + @length = new_len + else + @length = new_len + end + end + + # try to resize the vector, returning if the vector was resized. + def try_resize(new_len : UInt16, value : Limb) : Bool + if new_len > capacity + false + else + resize_unchecked(new_len, value) + true + end + end + + # check if any limbs are non-zero after the given index. + # this needs to be done in reverse order, since the index + # is relative to the most significant limbs. + def nonzero?(index : Int) : Bool + while index < size + if rindex(index) != 0 + return true + end + index &+= 1 + end + false + end + + # normalize the big integer, so most-significant zero limbs are removed. + def normalize : Nil + while @length > 0 && rindex(0) == 0 + @length &-= 1 + end + end + end + + # NOTE(crystal): returns also *truncated* by value (ditto below) + def self.empty_hi64 : {UInt64, Bool} + truncated = false + {0_u64, truncated} + end + + def self.uint64_hi64(r0 : UInt64) : {UInt64, Bool} + truncated = false + shl = r0.leading_zeros_count + {r0.unsafe_shl(shl), truncated} + end + + def self.uint64_hi64(r0 : UInt64, r1 : UInt64) : {UInt64, Bool} + shl = r0.leading_zeros_count + if shl == 0 + truncated = r1 != 0 + {r0, truncated} + else + shr = 64 &- shl + truncated = r1.unsafe_shl(shl) != 0 + {r0.unsafe_shl(shl) | r1.unsafe_shr(shr), truncated} + end + end + + def self.uint32_hi64(r0 : UInt32) : {UInt64, Bool} + uint64_hi64(r0.to_u64!) + end + + def self.uint32_hi64(r0 : UInt32, r1 : UInt32) : {UInt64, Bool} + x0 = r0.to_u64! + x1 = r1.to_u64! + uint64_hi64(x0.unsafe_shl(32) | x1) + end + + def self.uint32_hi64(r0 : UInt32, r1 : UInt32, r2 : UInt32) : {UInt64, Bool} + x0 = r0.to_u64! + x1 = r1.to_u64! + x2 = r2.to_u64! + uint64_hi64(x0, x1.unsafe_shl(32) | x2) + end + + # add two small integers, checking for overflow. + # we want an efficient operation. + # NOTE(crystal): returns also *overflow* by value + def self.scalar_add(x : Limb, y : Limb) : {Limb, Bool} + z = x &+ y + overflow = z < x + {z, overflow} + end + + # multiply two small integers, getting both the high and low bits. + # NOTE(crystal): passes *carry* in and out by value + def self.scalar_mul(x : Limb, y : Limb, carry : Limb) : {Limb, Limb} + {% if Limb == UInt64 %} + z = x.to_u128! &* y.to_u128! &+ carry + carry = z.unsafe_shr(LIMB_BITS).to_u64! + {z.to_u64!, carry} + {% else %} + z = x.to_u64! &* y.to_u64! &+ carry + carry = z.unsafe_shr(LIMB_BITS).to_u32! + {z.to_u32!, carry} + {% end %} + end + + # add scalar value to bigint starting from offset. + # used in grade school multiplication + def self.small_add_from(vec : Stackvec(Size)*, y : Limb, start : Int) : Bool forall Size + index = start + carry = y + + while carry != 0 && index < vec.value.size + x, overflow = scalar_add(vec.value.unsafe_fetch(index), carry) + vec.value.unsafe_put(index, x) + carry = Limb.new!(overflow ? 1 : 0) + index &+= 1 + end + if carry != 0 + fastfloat_try vec.value.try_push(carry) + end + true + end + + # add scalar value to bigint. + def self.small_add(vec : Stackvec(Size)*, y : Limb) : Bool forall Size + small_add_from(vec, y, 0) + end + + # multiply bigint by scalar value. + def self.small_mul(vec : Stackvec(Size)*, y : Limb) : Bool forall Size + carry = Limb.zero + i = 0 + while i < vec.value.size + xi = vec.value.unsafe_fetch(i) + z, carry = scalar_mul(xi, y, carry) + vec.value.unsafe_put(i, z) + i &+= 1 + end + if carry != 0 + fastfloat_try vec.value.try_push(carry) + end + true + end + + # add bigint to bigint starting from index. + # used in grade school multiplication + def self.large_add_from(x : Stackvec(Size)*, y : LimbSpan, start : Int) : Bool forall Size + # the effective x buffer is from `xstart..x.len()`, so exit early + # if we can't get that current range. + if x.value.size < start || y.size > x.value.size &- start + fastfloat_try x.value.try_resize((y.size &+ start).to_u16!, 0) + end + + carry = false + index = 0 + while index < y.size + xi = x.value.unsafe_fetch(index &+ start) + yi = y.unsafe_fetch(index) + c2 = false + xi, c1 = scalar_add(xi, yi) + if carry + xi, c2 = scalar_add(xi, 1) + end + x.value.unsafe_put(index &+ start, xi) + carry = c1 || c2 + index &+= 1 + end + + # handle overflow + if carry + fastfloat_try small_add_from(x, 1, y.size &+ start) + end + true + end + + # add bigint to bigint. + def self.large_add_from(x : Stackvec(Size)*, y : LimbSpan) : Bool forall Size + large_add_from(x, y, 0) + end + + # grade-school multiplication algorithm + def self.long_mul(x : Stackvec(Size)*, y : LimbSpan) : Bool forall Size + xs = x.value.to_slice + z = Stackvec(Size).new(xs) + zs = z.to_slice + + if y.size != 0 + y0 = y.unsafe_fetch(0) + fastfloat_try small_mul(x, y0) + (1...y.size).each do |index| + yi = y.unsafe_fetch(index) + zi = Stackvec(Size).new + if yi != 0 + # re-use the same buffer throughout + zi.size = 0 + fastfloat_try zi.try_extend(zs) + fastfloat_try small_mul(pointerof(zi), yi) + zis = zi.to_slice + fastfloat_try large_add_from(x, zis, index) + end + end + end + + x.value.normalize + true + end + + # grade-school multiplication algorithm + def self.large_mul(x : Stackvec(Size)*, y : LimbSpan) : Bool forall Size + if y.size == 1 + fastfloat_try small_mul(x, y.unsafe_fetch(0)) + else + fastfloat_try long_mul(x, y) + end + true + end + + module Pow5Tables + LARGE_STEP = 135_u32 + + SMALL_POWER_OF_5 = [ + 1_u64, + 5_u64, + 25_u64, + 125_u64, + 625_u64, + 3125_u64, + 15625_u64, + 78125_u64, + 390625_u64, + 1953125_u64, + 9765625_u64, + 48828125_u64, + 244140625_u64, + 1220703125_u64, + 6103515625_u64, + 30517578125_u64, + 152587890625_u64, + 762939453125_u64, + 3814697265625_u64, + 19073486328125_u64, + 95367431640625_u64, + 476837158203125_u64, + 2384185791015625_u64, + 11920928955078125_u64, + 59604644775390625_u64, + 298023223876953125_u64, + 1490116119384765625_u64, + 7450580596923828125_u64, + ] + + {% if Limb == UInt64 %} + LARGE_POWER_OF_5 = Slice[ + 1414648277510068013_u64, 9180637584431281687_u64, 4539964771860779200_u64, + 10482974169319127550_u64, 198276706040285095_u64, + ] + {% else %} + LARGE_POWER_OF_5 = Slice[ + 4279965485_u32, 329373468_u32, 4020270615_u32, 2137533757_u32, 4287402176_u32, + 1057042919_u32, 1071430142_u32, 2440757623_u32, 381945767_u32, 46164893_u32, + ] + {% end %} + end + + # big integer type. implements a small subset of big integer + # arithmetic, using simple algorithms since asymptotically + # faster algorithms are slower for a small number of limbs. + # all operations assume the big-integer is normalized. + # NOTE(crystal): contrast with ::BigInt + struct Bigint + # storage of the limbs, in little-endian order. + @vec = Stackvec(BIGINT_LIMBS).new + + def initialize + end + + def initialize(value : UInt64) + {% if Limb == UInt64 %} + @vec.push_unchecked(value) + {% else %} + @vec.push_unchecked(value.to_u32!) + @vec.push_unchecked(value.unsafe_shr(32).to_u32!) + {% end %} + @vec.normalize + end + + # get the high 64 bits from the vector, and if bits were truncated. + # this is to get the significant digits for the float. + # NOTE(crystal): returns also *truncated* by value + def hi64 : {UInt64, Bool} + {% if Limb == UInt64 %} + if @vec.empty? + FastFloat.empty_hi64 + elsif @vec.size == 1 + FastFloat.uint64_hi64(@vec.rindex(0)) + else + result, truncated = FastFloat.uint64_hi64(@vec.rindex(0), @vec.rindex(1)) + truncated ||= @vec.nonzero?(2) + {result, truncated} + end + {% else %} + if @vec.empty? + FastFloat.empty_hi64 + elsif @vec.size == 1 + FastFloat.uint32_hi64(@vec.rindex(0)) + elsif @vec.size == 2 + FastFloat.uint32_hi64(@vec.rindex(0), @vec.rindex(1)) + else + result, truncated = FastFloat.uint32_hi64(@vec.rindex(0), @vec.rindex(1), @vec.rindex(2)) + truncated ||= @vec.nonzero?(3) + {result, truncated} + end + {% end %} + end + + # compare two big integers, returning the large value. + # assumes both are normalized. if the return value is + # negative, other is larger, if the return value is + # positive, this is larger, otherwise they are equal. + # the limbs are stored in little-endian order, so we + # must compare the limbs in ever order. + def compare(other : Bigint*) : Int32 + if @vec.size > other.value.@vec.size + 1 + elsif @vec.size < other.value.@vec.size + -1 + else + index = @vec.size + while index > 0 + xi = @vec.unsafe_fetch(index &- 1) + yi = other.value.@vec.unsafe_fetch(index &- 1) + if xi > yi + return 1 + elsif xi < yi + return -1 + end + index &-= 1 + end + 0 + end + end + + # shift left each limb n bits, carrying over to the new limb + # returns true if we were able to shift all the digits. + def shl_bits(n : Int) : Bool + # Internally, for each item, we shift left by n, and add the previous + # right shifted limb-bits. + # For example, we transform (for u8) shifted left 2, to: + # b10100100 b01000010 + # b10 b10010001 b00001000 + shl = n + shr = LIMB_BITS &- n + prev = Limb.zero + index = 0 + while index < @vec.size + xi = @vec.unsafe_fetch(index) + @vec.unsafe_put(index, xi.unsafe_shl(shl) | prev.unsafe_shr(shr)) + prev = xi + index &+= 1 + end + + carry = prev.unsafe_shr(shr) + if carry != 0 + return @vec.try_push(carry) + end + true + end + + # move the limbs left by `n` limbs. + def shl_limbs(n : Int) : Bool + if n &+ @vec.size > @vec.capacity + false + elsif !@vec.empty? + # move limbs + dst = @vec.to_unsafe + n + src = @vec.to_unsafe + src.move_to(dst, @vec.size) + # fill in empty limbs + first = @vec.to_unsafe + n.times { |i| first[i] = 0 } + @vec.size = (@vec.size &+ n).to_u16! + true + else + true + end + end + + # move the limbs left by `n` bits. + def shl(n : Int) : Bool + rem = n.unsafe_mod(LIMB_BITS) + div = n.unsafe_div(LIMB_BITS) + if rem != 0 + FastFloat.fastfloat_try shl_bits(rem) + end + if div != 0 + FastFloat.fastfloat_try shl_limbs(div) + end + true + end + + # get the number of leading zeros in the bigint. + def ctlz : Int32 + if @vec.empty? + 0 + else + @vec.rindex(0).leading_zeros_count.to_i32! + end + end + + # get the number of bits in the bigint. + def bit_length : Int32 + lz = ctlz + (LIMB_BITS &* @vec.size &- lz).to_i32! + end + + def mul(y : Limb) : Bool + FastFloat.small_mul(pointerof(@vec), y) + end + + def add(y : Limb) : Bool + FastFloat.small_add(pointerof(@vec), y) + end + + # multiply as if by 2 raised to a power. + def pow2(exp : UInt32) : Bool + shl(exp) + end + + # multiply as if by 5 raised to a power. + def pow5(exp : UInt32) : Bool + # multiply by a power of 5 + large = Pow5Tables::LARGE_POWER_OF_5 + while exp >= Pow5Tables::LARGE_STEP + FastFloat.fastfloat_try FastFloat.large_mul(pointerof(@vec), large) + exp &-= Pow5Tables::LARGE_STEP + end + small_step = {{ Limb == UInt64 ? 27_u32 : 13_u32 }} + max_native = {{ Limb == UInt64 ? 7450580596923828125_u64 : 1220703125_u32 }} + while exp >= small_step + FastFloat.fastfloat_try FastFloat.small_mul(pointerof(@vec), max_native) + exp &-= small_step + end + if exp != 0 + FastFloat.fastfloat_try FastFloat.small_mul(pointerof(@vec), Limb.new!(Pow5Tables::SMALL_POWER_OF_5.unsafe_fetch(exp))) + end + + true + end + + # multiply as if by 10 raised to a power. + def pow10(exp : UInt32) : Bool + FastFloat.fastfloat_try pow5(exp) + pow2(exp) + end + end +end diff --git a/src/float/fast_float/decimal_to_binary.cr b/src/float/fast_float/decimal_to_binary.cr new file mode 100644 index 000000000000..eea77c44c6be --- /dev/null +++ b/src/float/fast_float/decimal_to_binary.cr @@ -0,0 +1,177 @@ +require "./float_common" +require "./fast_table" + +module Float::FastFloat + # This will compute or rather approximate w * 5**q and return a pair of 64-bit + # words approximating the result, with the "high" part corresponding to the + # most significant bits and the low part corresponding to the least significant + # bits. + def self.compute_product_approximation(q : Int64, w : UInt64, bit_precision : Int) : Value128 + power_of_five_128 = Powers::POWER_OF_FIVE_128.to_unsafe + + index = 2 &* (q &- Powers::SMALLEST_POWER_OF_FIVE) + # For small values of q, e.g., q in [0,27], the answer is always exact + # because The line value128 firstproduct = full_multiplication(w, + # power_of_five_128[index]); gives the exact answer. + firstproduct = w.to_u128! &* power_of_five_128[index] + + precision_mask = bit_precision < 64 ? 0xFFFFFFFFFFFFFFFF_u64.unsafe_shr(bit_precision) : 0xFFFFFFFFFFFFFFFF_u64 + if firstproduct.unsafe_shr(64).bits_set?(precision_mask) # could further guard with (lower + w < lower) + # regarding the second product, we only need secondproduct.high, but our + # expectation is that the compiler will optimize this extra work away if + # needed. + secondproduct = w.to_u128! &* power_of_five_128[index &+ 1] + firstproduct &+= secondproduct.unsafe_shr(64) + end + Value128.new(firstproduct) + end + + module Detail + # For q in (0,350), we have that + # f = (((152170 + 65536) * q ) >> 16); + # is equal to + # floor(p) + q + # where + # p = log(5**q)/log(2) = q * log(5)/log(2) + # + # For negative values of q in (-400,0), we have that + # f = (((152170 + 65536) * q ) >> 16); + # is equal to + # -ceil(p) + q + # where + # p = log(5**-q)/log(2) = -q * log(5)/log(2) + def self.power(q : Int32) : Int32 + ((152170 &+ 65536) &* q).unsafe_shr(16) &+ 63 + end + end + + module BinaryFormat(T, EquivUint) + # create an adjusted mantissa, biased by the invalid power2 + # for significant digits already multiplied by 10 ** q. + def compute_error_scaled(q : Int64, w : UInt64, lz : Int) : AdjustedMantissa + hilz = w.unsafe_shr(63).to_i32! ^ 1 + bias = mantissa_explicit_bits &- minimum_exponent + + AdjustedMantissa.new( + mantissa: w.unsafe_shl(hilz), + power2: Detail.power(q.to_i32!) &+ bias &- hilz &- lz &- 62 &+ INVALID_AM_BIAS, + ) + end + + # w * 10 ** q, without rounding the representation up. + # the power2 in the exponent will be adjusted by invalid_am_bias. + def compute_error(q : Int64, w : UInt64) : AdjustedMantissa + lz = w.leading_zeros_count.to_i32! + w = w.unsafe_shl(lz) + product = FastFloat.compute_product_approximation(q, w, mantissa_explicit_bits &+ 3) + compute_error_scaled(q, product.high, lz) + end + + # w * 10 ** q + # The returned value should be a valid ieee64 number that simply need to be + # packed. However, in some very rare cases, the computation will fail. In such + # cases, we return an adjusted_mantissa with a negative power of 2: the caller + # should recompute in such cases. + def compute_float(q : Int64, w : UInt64) : AdjustedMantissa + if w == 0 || q < smallest_power_of_ten + # result should be zero + return AdjustedMantissa.new( + power2: 0, + mantissa: 0, + ) + end + if q > largest_power_of_ten + # we want to get infinity: + return AdjustedMantissa.new( + power2: infinite_power, + mantissa: 0, + ) + end + # At this point in time q is in [powers::smallest_power_of_five, + # powers::largest_power_of_five]. + + # We want the most significant bit of i to be 1. Shift if needed. + lz = w.leading_zeros_count + w = w.unsafe_shl(lz) + + # The required precision is binary::mantissa_explicit_bits() + 3 because + # 1. We need the implicit bit + # 2. We need an extra bit for rounding purposes + # 3. We might lose a bit due to the "upperbit" routine (result too small, + # requiring a shift) + + product = FastFloat.compute_product_approximation(q, w, mantissa_explicit_bits &+ 3) + # The computed 'product' is always sufficient. + # Mathematical proof: + # Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to + # appear) See script/mushtak_lemire.py + + # The "compute_product_approximation" function can be slightly slower than a + # branchless approach: value128 product = compute_product(q, w); but in + # practice, we can win big with the compute_product_approximation if its + # additional branch is easily predicted. Which is best is data specific. + upperbit = product.high.unsafe_shr(63).to_i32! + shift = upperbit &+ 64 &- mantissa_explicit_bits &- 3 + + mantissa = product.high.unsafe_shr(shift) + + power2 = (Detail.power(q.to_i32!) &+ upperbit &- lz &- minimum_exponent).to_i32! + if power2 <= 0 # we have a subnormal? + # Here have that answer.power2 <= 0 so -answer.power2 >= 0 + if 1 &- power2 >= 64 # if we have more than 64 bits below the minimum exponent, you have a zero for sure. + # result should be zero + return AdjustedMantissa.new( + power2: 0, + mantissa: 0, + ) + end + # next line is safe because -answer.power2 + 1 < 64 + mantissa = mantissa.unsafe_shr(1 &- power2) + # Thankfully, we can't have both "round-to-even" and subnormals because + # "round-to-even" only occurs for powers close to 0. + mantissa &+= mantissa & 1 + mantissa = mantissa.unsafe_shr(1) + # There is a weird scenario where we don't have a subnormal but just. + # Suppose we start with 2.2250738585072013e-308, we end up + # with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + # whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + # up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + # subnormal, but we can only know this after rounding. + # So we only declare a subnormal if we are smaller than the threshold. + power2 = mantissa < 1_u64.unsafe_shl(mantissa_explicit_bits) ? 0 : 1 + return AdjustedMantissa.new(power2: power2, mantissa: mantissa) + end + + # usually, we round *up*, but if we fall right in between and and we have an + # even basis, we need to round down + # We are only concerned with the cases where 5**q fits in single 64-bit word. + if product.low <= 1 && q >= min_exponent_round_to_even && q <= max_exponent_round_to_even && mantissa & 3 == 1 + # we may fall between two floats! + # To be in-between two floats we need that in doing + # answer.mantissa = product.high >> (upperbit + 64 - + # binary::mantissa_explicit_bits() - 3); + # ... we dropped out only zeroes. But if this happened, then we can go + # back!!! + if mantissa.unsafe_shl(shift) == product.high + mantissa &= ~1_u64 # flip it so that we do not round up + end + end + + mantissa &+= mantissa & 1 # round up + mantissa = mantissa.unsafe_shr(1) + if mantissa >= 2_u64.unsafe_shl(mantissa_explicit_bits) + mantissa = 1_u64.unsafe_shl(mantissa_explicit_bits) + power2 &+= 1 # undo previous addition + end + + mantissa &= ~(1_u64.unsafe_shl(mantissa_explicit_bits)) + if power2 >= infinite_power # infinity + return AdjustedMantissa.new( + power2: infinite_power, + mantissa: 0, + ) + end + AdjustedMantissa.new(power2: power2, mantissa: mantissa) + end + end +end diff --git a/src/float/fast_float/digit_comparison.cr b/src/float/fast_float/digit_comparison.cr new file mode 100644 index 000000000000..2da4c455bac4 --- /dev/null +++ b/src/float/fast_float/digit_comparison.cr @@ -0,0 +1,399 @@ +require "./float_common" +require "./bigint" +require "./ascii_number" + +module Float::FastFloat + # 1e0 to 1e19 + POWERS_OF_TEN_UINT64 = [ + 1_u64, + 10_u64, + 100_u64, + 1000_u64, + 10000_u64, + 100000_u64, + 1000000_u64, + 10000000_u64, + 100000000_u64, + 1000000000_u64, + 10000000000_u64, + 100000000000_u64, + 1000000000000_u64, + 10000000000000_u64, + 100000000000000_u64, + 1000000000000000_u64, + 10000000000000000_u64, + 100000000000000000_u64, + 1000000000000000000_u64, + 10000000000000000000_u64, + ] + + # calculate the exponent, in scientific notation, of the number. + # this algorithm is not even close to optimized, but it has no practical + # effect on performance: in order to have a faster algorithm, we'd need + # to slow down performance for faster algorithms, and this is still fast. + def self.scientific_exponent(num : ParsedNumberStringT(UC)) : Int32 forall UC + mantissa = num.mantissa + exponent = num.exponent.to_i32! + while mantissa >= 10000 + mantissa = mantissa.unsafe_div(10000) + exponent &+= 4 + end + while mantissa >= 100 + mantissa = mantissa.unsafe_div(100) + exponent &+= 2 + end + while mantissa >= 10 + mantissa = mantissa.unsafe_div(10) + exponent &+= 1 + end + exponent + end + + module BinaryFormat(T, EquivUint) + # this converts a native floating-point number to an extended-precision float. + def to_extended(value : T) : AdjustedMantissa + exponent_mask = self.exponent_mask + mantissa_mask = self.mantissa_mask + hidden_bit_mask = self.hidden_bit_mask + + bias = mantissa_explicit_bits &- minimum_exponent + bits = value.unsafe_as(EquivUint) + if bits & exponent_mask == 0 + # denormal + power2 = 1 &- bias + mantissa = bits & mantissa_mask + else + # normal + power2 = (bits & exponent_mask).unsafe_shr(mantissa_explicit_bits).to_i32! + power2 &-= bias + mantissa = (bits & mantissa_mask) | hidden_bit_mask + end + + AdjustedMantissa.new(power2: power2, mantissa: mantissa.to_u64!) + end + + # get the extended precision value of the halfway point between b and b+u. + # we are given a native float that represents b, so we need to adjust it + # halfway between b and b+u. + def to_extended_halfway(value : T) : AdjustedMantissa + am = to_extended(value) + am.mantissa = am.mantissa.unsafe_shl(1) + am.mantissa &+= 1 + am.power2 &-= 1 + am + end + + # round an extended-precision float to the nearest machine float. + # NOTE(crystal): passes *am* in and out by value + def round(am : AdjustedMantissa, & : AdjustedMantissa, Int32 -> AdjustedMantissa) : AdjustedMantissa + mantissa_shift = 64 &- mantissa_explicit_bits &- 1 + if 0 &- am.power2 >= mantissa_shift + # have a denormal float + shift = 1 &- am.power2 + am = yield am, {shift, 64}.min + # check for round-up: if rounding-nearest carried us to the hidden bit. + am.power2 = am.mantissa < 1_u64.unsafe_shl(mantissa_explicit_bits) ? 0 : 1 + return am + end + + # have a normal float, use the default shift. + am = yield am, mantissa_shift + + # check for carry + if am.mantissa >= 2_u64.unsafe_shl(mantissa_explicit_bits) + am.mantissa = 1_u64.unsafe_shl(mantissa_explicit_bits) + am.power2 &+= 1 + end + + # check for infinite: we could have carried to an infinite power + am.mantissa &= ~(1_u64.unsafe_shl(mantissa_explicit_bits)) + if am.power2 >= infinite_power + am.power2 = infinite_power + am.mantissa = 0 + end + + am + end + + # NOTE(crystal): passes *am* in and out by value + def round_nearest_tie_even(am : AdjustedMantissa, shift : Int32, & : Bool, Bool, Bool -> Bool) : AdjustedMantissa + mask = shift == 64 ? UInt64::MAX : 1_u64.unsafe_shl(shift) &- 1 + halfway = shift == 0 ? 0_u64 : 1_u64.unsafe_shl(shift &- 1) + truncated_bits = am.mantissa & mask + is_above = truncated_bits > halfway + is_halfway = truncated_bits == halfway + + # shift digits into position + if shift == 64 + am.mantissa = 0 + else + am.mantissa = am.mantissa.unsafe_shr(shift) + end + am.power2 &+= shift + + is_odd = am.mantissa.bits_set?(1) + am.mantissa &+= (yield is_odd, is_halfway, is_above) ? 1 : 0 + am + end + + # NOTE(crystal): passes *am* in and out by value + def round_down(am : AdjustedMantissa, shift : Int32) : AdjustedMantissa + if shift == 64 + am.mantissa = 0 + else + am.mantissa = am.mantissa.unsafe_shr(shift) + end + am.power2 &+= shift + am + end + + # NOTE(crystal): returns the new *first* by value + def skip_zeros(first : UC*, last : UC*) : UC* forall UC + int_cmp_len = FastFloat.int_cmp_len(UC) + int_cmp_zeros = FastFloat.int_cmp_zeros(UC) + + val = uninitialized UInt64 + while last - first >= int_cmp_len + first.copy_to(pointerof(val).as(UC*), int_cmp_len) + if val != int_cmp_zeros + break + end + first += int_cmp_len + end + while first != last + unless first.value === '0' + break + end + first += 1 + end + first + end + + # determine if any non-zero digits were truncated. + # all characters must be valid digits. + def is_truncated?(first : UC*, last : UC*) : Bool forall UC + int_cmp_len = FastFloat.int_cmp_len(UC) + int_cmp_zeros = FastFloat.int_cmp_zeros(UC) + + # do 8-bit optimizations, can just compare to 8 literal 0s. + + val = uninitialized UInt64 + while last - first >= int_cmp_len + first.copy_to(pointerof(val).as(UC*), int_cmp_len) + if val != int_cmp_zeros + return true + end + first += int_cmp_len + end + while first != last + unless first.value === '0' + return true + end + first += 1 + end + false + end + + def is_truncated?(s : Slice(UC)) : Bool forall UC + is_truncated?(s.to_unsafe, s.to_unsafe + s.size) + end + + macro parse_eight_digits(p, value, counter, count) + {{ value }} = {{ value }} &* 100000000 &+ FastFloat.parse_eight_digits_unrolled({{ p }}) + {{ p }} += 8 + {{ counter }} &+= 8 + {{ count }} &+= 8 + end + + macro parse_one_digit(p, value, counter, count) + {{ value }} = {{ value }} &* 10 &+ {{ p }}.value &- '0'.ord + {{ p }} += 1 + {{ counter }} &+= 1 + {{ count }} &+= 1 + end + + macro add_native(big, power, value) + {{ big }}.value.mul({{ power }}) + {{ big }}.value.add({{ value }}) + end + + macro round_up_bigint(big, count) + # need to round-up the digits, but need to avoid rounding + # ....9999 to ...10000, which could cause a false halfway point. + add_native({{ big }}, 10, 1) + {{ count }} &+= 1 + end + + # parse the significant digits into a big integer + # NOTE(crystal): returns the new *digits* by value + def parse_mantissa(result : Bigint*, num : ParsedNumberStringT(UC), max_digits : Int) : Int forall UC + # try to minimize the number of big integer and scalar multiplication. + # therefore, try to parse 8 digits at a time, and multiply by the largest + # scalar value (9 or 19 digits) for each step. + counter = 0 + digits = 0 + value = Limb.zero + step = {{ Limb == UInt64 ? 19 : 9 }} + + # process all integer digits. + p = num.integer.to_unsafe + pend = p + num.integer.size + p = skip_zeros(p, pend) + # process all digits, in increments of step per loop + while p != pend + while pend - p >= 8 && step &- counter >= 8 && max_digits &- digits >= 8 + parse_eight_digits(p, value, counter, digits) + end + while counter < step && p != pend && digits < max_digits + parse_one_digit(p, value, counter, digits) + end + if digits == max_digits + # add the temporary value, then check if we've truncated any digits + add_native(result, Limb.new!(POWERS_OF_TEN_UINT64.unsafe_fetch(counter)), value) + truncated = is_truncated?(p, pend) + unless num.fraction.empty? + truncated ||= is_truncated?(num.fraction) + end + if truncated + round_up_bigint(result, digits) + end + return digits + else + add_native(result, Limb.new!(POWERS_OF_TEN_UINT64.unsafe_fetch(counter)), value) + counter = 0 + value = Limb.zero + end + end + + # add our fraction digits, if they're available. + unless num.fraction.empty? + p = num.fraction.to_unsafe + pend = p + num.fraction.size + if digits == 0 + p = skip_zeros(p, pend) + end + # process all digits, in increments of step per loop + while p != pend + while pend - p >= 8 && step &- counter >= 8 && max_digits &- digits >= 8 + parse_eight_digits(p, value, counter, digits) + end + while counter < step && p != pend && digits < max_digits + parse_one_digit(p, value, counter, digits) + end + if digits == max_digits + # add the temporary value, then check if we've truncated any digits + add_native(result, Limb.new!(POWERS_OF_TEN_UINT64.unsafe_fetch(counter)), value) + truncated = is_truncated?(p, pend) + if truncated + round_up_bigint(result, digits) + end + return digits + else + add_native(result, Limb.new!(POWERS_OF_TEN_UINT64.unsafe_fetch(counter)), value) + counter = 0 + value = Limb.zero + end + end + end + + if counter != 0 + add_native(result, Limb.new!(POWERS_OF_TEN_UINT64.unsafe_fetch(counter)), value) + end + + digits + end + + def positive_digit_comp(bigmant : Bigint*, exponent : Int32) : AdjustedMantissa + bigmant.value.pow10(exponent.to_u32!) + mantissa, truncated = bigmant.value.hi64 + bias = mantissa_explicit_bits &- minimum_exponent + power2 = bigmant.value.bit_length &- 64 &+ bias + answer = AdjustedMantissa.new(power2: power2, mantissa: mantissa) + + answer = round(answer) do |a, shift| + round_nearest_tie_even(a, shift) do |is_odd, is_halfway, is_above| + is_above || (is_halfway && truncated) || (is_odd && is_halfway) + end + end + + answer + end + + # the scaling here is quite simple: we have, for the real digits `m * 10^e`, + # and for the theoretical digits `n * 2^f`. Since `e` is always negative, + # to scale them identically, we do `n * 2^f * 5^-f`, so we now have `m * 2^e`. + # we then need to scale by `2^(f- e)`, and then the two significant digits + # are of the same magnitude. + def negative_digit_comp(bigmant : Bigint*, am : AdjustedMantissa, exponent : Int32) : AdjustedMantissa + real_digits = bigmant + real_exp = exponent + + # get the value of `b`, rounded down, and get a bigint representation of b+h + am_b = round(am) do |a, shift| + round_down(a, shift) + end + b = to_float(false, am_b) + theor = to_extended_halfway(b) + theor_digits = Bigint.new(theor.mantissa) + theor_exp = theor.power2 + + # scale real digits and theor digits to be same power. + pow2_exp = theor_exp &- real_exp + pow5_exp = 0_u32 &- real_exp + if pow5_exp != 0 + theor_digits.pow5(pow5_exp) + end + if pow2_exp > 0 + theor_digits.pow2(pow2_exp.to_u32!) + elsif pow2_exp < 0 + real_digits.value.pow2(0_u32 &- pow2_exp) + end + + # compare digits, and use it to director rounding + ord = real_digits.value.compare(pointerof(theor_digits)) + answer = round(am) do |a, shift| + round_nearest_tie_even(a, shift) do |is_odd, _, _| + if ord > 0 + true + elsif ord < 0 + false + else + is_odd + end + end + end + + answer + end + + # parse the significant digits as a big integer to unambiguously round the + # the significant digits. here, we are trying to determine how to round + # an extended float representation close to `b+h`, halfway between `b` + # (the float rounded-down) and `b+u`, the next positive float. this + # algorithm is always correct, and uses one of two approaches. when + # the exponent is positive relative to the significant digits (such as + # 1234), we create a big-integer representation, get the high 64-bits, + # determine if any lower bits are truncated, and use that to direct + # rounding. in case of a negative exponent relative to the significant + # digits (such as 1.2345), we create a theoretical representation of + # `b` as a big-integer type, scaled to the same binary exponent as + # the actual digits. we then compare the big integer representations + # of both, and use that to direct rounding. + def digit_comp(num : ParsedNumberStringT(UC), am : AdjustedMantissa) : AdjustedMantissa forall UC + # remove the invalid exponent bias + am.power2 &-= INVALID_AM_BIAS + + sci_exp = FastFloat.scientific_exponent(num) + max_digits = self.max_digits + bigmant = Bigint.new + digits = parse_mantissa(pointerof(bigmant), num, max_digits) + # can't underflow, since digits is at most max_digits. + exponent = sci_exp &+ 1 &- digits + if exponent >= 0 + positive_digit_comp(pointerof(bigmant), exponent) + else + negative_digit_comp(pointerof(bigmant), am, exponent) + end + end + end +end diff --git a/src/float/fast_float/fast_table.cr b/src/float/fast_float/fast_table.cr new file mode 100644 index 000000000000..a2c2b2e9d1c9 --- /dev/null +++ b/src/float/fast_float/fast_table.cr @@ -0,0 +1,695 @@ +module Float::FastFloat + # When mapping numbers from decimal to binary, + # we go from w * 10^q to m * 2^p but we have + # 10^q = 5^q * 2^q, so effectively + # we are trying to match + # w * 2^q * 5^q to m * 2^p. Thus the powers of two + # are not a concern since they can be represented + # exactly using the binary notation, only the powers of five + # affect the binary significand. + + # The smallest non-zero float (binary64) is 2^-1074. + # We take as input numbers of the form w x 10^q where w < 2^64. + # We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. + # However, we have that + # (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. + # Thus it is possible for a number of the form w * 10^-342 where + # w is a 64-bit value to be a non-zero floating-point number. + # + # Any number of form w * 10^309 where w>= 1 is going to be + # infinite in binary64 so we never need to worry about powers + # of 5 greater than 308. + module Powers + SMALLEST_POWER_OF_FIVE = -342 + LARGEST_POWER_OF_FIVE = 308 + NUMBER_OF_ENTRIES = {{ 2 * (LARGEST_POWER_OF_FIVE - SMALLEST_POWER_OF_FIVE + 1) }} + + # TODO: this is needed to avoid generating lots of allocas + # in LLVM, which makes LLVM really slow. The compiler should + # try to avoid/reuse temporary allocas. + # Explanation: https://github.com/crystal-lang/crystal/issues/4516#issuecomment-306226171 + private def self.put(array, value) : Nil + array << value + end + + # Powers of five from 5^-342 all the way to 5^308 rounded toward one. + # NOTE(crystal): this is very similar to + # `Float::Printer::Dragonbox::ImplInfo_Float64::CACHE`, except the endpoints + # are different and the rounding is in a different direction + POWER_OF_FIVE_128 = begin + array = Array(UInt64).new(NUMBER_OF_ENTRIES) + put(array, 0xeef453d6923bd65a_u64); put(array, 0x113faa2906a13b3f_u64) + put(array, 0x9558b4661b6565f8_u64); put(array, 0x4ac7ca59a424c507_u64) + put(array, 0xbaaee17fa23ebf76_u64); put(array, 0x5d79bcf00d2df649_u64) + put(array, 0xe95a99df8ace6f53_u64); put(array, 0xf4d82c2c107973dc_u64) + put(array, 0x91d8a02bb6c10594_u64); put(array, 0x79071b9b8a4be869_u64) + put(array, 0xb64ec836a47146f9_u64); put(array, 0x9748e2826cdee284_u64) + put(array, 0xe3e27a444d8d98b7_u64); put(array, 0xfd1b1b2308169b25_u64) + put(array, 0x8e6d8c6ab0787f72_u64); put(array, 0xfe30f0f5e50e20f7_u64) + put(array, 0xb208ef855c969f4f_u64); put(array, 0xbdbd2d335e51a935_u64) + put(array, 0xde8b2b66b3bc4723_u64); put(array, 0xad2c788035e61382_u64) + put(array, 0x8b16fb203055ac76_u64); put(array, 0x4c3bcb5021afcc31_u64) + put(array, 0xaddcb9e83c6b1793_u64); put(array, 0xdf4abe242a1bbf3d_u64) + put(array, 0xd953e8624b85dd78_u64); put(array, 0xd71d6dad34a2af0d_u64) + put(array, 0x87d4713d6f33aa6b_u64); put(array, 0x8672648c40e5ad68_u64) + put(array, 0xa9c98d8ccb009506_u64); put(array, 0x680efdaf511f18c2_u64) + put(array, 0xd43bf0effdc0ba48_u64); put(array, 0x212bd1b2566def2_u64) + put(array, 0x84a57695fe98746d_u64); put(array, 0x14bb630f7604b57_u64) + put(array, 0xa5ced43b7e3e9188_u64); put(array, 0x419ea3bd35385e2d_u64) + put(array, 0xcf42894a5dce35ea_u64); put(array, 0x52064cac828675b9_u64) + put(array, 0x818995ce7aa0e1b2_u64); put(array, 0x7343efebd1940993_u64) + put(array, 0xa1ebfb4219491a1f_u64); put(array, 0x1014ebe6c5f90bf8_u64) + put(array, 0xca66fa129f9b60a6_u64); put(array, 0xd41a26e077774ef6_u64) + put(array, 0xfd00b897478238d0_u64); put(array, 0x8920b098955522b4_u64) + put(array, 0x9e20735e8cb16382_u64); put(array, 0x55b46e5f5d5535b0_u64) + put(array, 0xc5a890362fddbc62_u64); put(array, 0xeb2189f734aa831d_u64) + put(array, 0xf712b443bbd52b7b_u64); put(array, 0xa5e9ec7501d523e4_u64) + put(array, 0x9a6bb0aa55653b2d_u64); put(array, 0x47b233c92125366e_u64) + put(array, 0xc1069cd4eabe89f8_u64); put(array, 0x999ec0bb696e840a_u64) + put(array, 0xf148440a256e2c76_u64); put(array, 0xc00670ea43ca250d_u64) + put(array, 0x96cd2a865764dbca_u64); put(array, 0x380406926a5e5728_u64) + put(array, 0xbc807527ed3e12bc_u64); put(array, 0xc605083704f5ecf2_u64) + put(array, 0xeba09271e88d976b_u64); put(array, 0xf7864a44c633682e_u64) + put(array, 0x93445b8731587ea3_u64); put(array, 0x7ab3ee6afbe0211d_u64) + put(array, 0xb8157268fdae9e4c_u64); put(array, 0x5960ea05bad82964_u64) + put(array, 0xe61acf033d1a45df_u64); put(array, 0x6fb92487298e33bd_u64) + put(array, 0x8fd0c16206306bab_u64); put(array, 0xa5d3b6d479f8e056_u64) + put(array, 0xb3c4f1ba87bc8696_u64); put(array, 0x8f48a4899877186c_u64) + put(array, 0xe0b62e2929aba83c_u64); put(array, 0x331acdabfe94de87_u64) + put(array, 0x8c71dcd9ba0b4925_u64); put(array, 0x9ff0c08b7f1d0b14_u64) + put(array, 0xaf8e5410288e1b6f_u64); put(array, 0x7ecf0ae5ee44dd9_u64) + put(array, 0xdb71e91432b1a24a_u64); put(array, 0xc9e82cd9f69d6150_u64) + put(array, 0x892731ac9faf056e_u64); put(array, 0xbe311c083a225cd2_u64) + put(array, 0xab70fe17c79ac6ca_u64); put(array, 0x6dbd630a48aaf406_u64) + put(array, 0xd64d3d9db981787d_u64); put(array, 0x92cbbccdad5b108_u64) + put(array, 0x85f0468293f0eb4e_u64); put(array, 0x25bbf56008c58ea5_u64) + put(array, 0xa76c582338ed2621_u64); put(array, 0xaf2af2b80af6f24e_u64) + put(array, 0xd1476e2c07286faa_u64); put(array, 0x1af5af660db4aee1_u64) + put(array, 0x82cca4db847945ca_u64); put(array, 0x50d98d9fc890ed4d_u64) + put(array, 0xa37fce126597973c_u64); put(array, 0xe50ff107bab528a0_u64) + put(array, 0xcc5fc196fefd7d0c_u64); put(array, 0x1e53ed49a96272c8_u64) + put(array, 0xff77b1fcbebcdc4f_u64); put(array, 0x25e8e89c13bb0f7a_u64) + put(array, 0x9faacf3df73609b1_u64); put(array, 0x77b191618c54e9ac_u64) + put(array, 0xc795830d75038c1d_u64); put(array, 0xd59df5b9ef6a2417_u64) + put(array, 0xf97ae3d0d2446f25_u64); put(array, 0x4b0573286b44ad1d_u64) + put(array, 0x9becce62836ac577_u64); put(array, 0x4ee367f9430aec32_u64) + put(array, 0xc2e801fb244576d5_u64); put(array, 0x229c41f793cda73f_u64) + put(array, 0xf3a20279ed56d48a_u64); put(array, 0x6b43527578c1110f_u64) + put(array, 0x9845418c345644d6_u64); put(array, 0x830a13896b78aaa9_u64) + put(array, 0xbe5691ef416bd60c_u64); put(array, 0x23cc986bc656d553_u64) + put(array, 0xedec366b11c6cb8f_u64); put(array, 0x2cbfbe86b7ec8aa8_u64) + put(array, 0x94b3a202eb1c3f39_u64); put(array, 0x7bf7d71432f3d6a9_u64) + put(array, 0xb9e08a83a5e34f07_u64); put(array, 0xdaf5ccd93fb0cc53_u64) + put(array, 0xe858ad248f5c22c9_u64); put(array, 0xd1b3400f8f9cff68_u64) + put(array, 0x91376c36d99995be_u64); put(array, 0x23100809b9c21fa1_u64) + put(array, 0xb58547448ffffb2d_u64); put(array, 0xabd40a0c2832a78a_u64) + put(array, 0xe2e69915b3fff9f9_u64); put(array, 0x16c90c8f323f516c_u64) + put(array, 0x8dd01fad907ffc3b_u64); put(array, 0xae3da7d97f6792e3_u64) + put(array, 0xb1442798f49ffb4a_u64); put(array, 0x99cd11cfdf41779c_u64) + put(array, 0xdd95317f31c7fa1d_u64); put(array, 0x40405643d711d583_u64) + put(array, 0x8a7d3eef7f1cfc52_u64); put(array, 0x482835ea666b2572_u64) + put(array, 0xad1c8eab5ee43b66_u64); put(array, 0xda3243650005eecf_u64) + put(array, 0xd863b256369d4a40_u64); put(array, 0x90bed43e40076a82_u64) + put(array, 0x873e4f75e2224e68_u64); put(array, 0x5a7744a6e804a291_u64) + put(array, 0xa90de3535aaae202_u64); put(array, 0x711515d0a205cb36_u64) + put(array, 0xd3515c2831559a83_u64); put(array, 0xd5a5b44ca873e03_u64) + put(array, 0x8412d9991ed58091_u64); put(array, 0xe858790afe9486c2_u64) + put(array, 0xa5178fff668ae0b6_u64); put(array, 0x626e974dbe39a872_u64) + put(array, 0xce5d73ff402d98e3_u64); put(array, 0xfb0a3d212dc8128f_u64) + put(array, 0x80fa687f881c7f8e_u64); put(array, 0x7ce66634bc9d0b99_u64) + put(array, 0xa139029f6a239f72_u64); put(array, 0x1c1fffc1ebc44e80_u64) + put(array, 0xc987434744ac874e_u64); put(array, 0xa327ffb266b56220_u64) + put(array, 0xfbe9141915d7a922_u64); put(array, 0x4bf1ff9f0062baa8_u64) + put(array, 0x9d71ac8fada6c9b5_u64); put(array, 0x6f773fc3603db4a9_u64) + put(array, 0xc4ce17b399107c22_u64); put(array, 0xcb550fb4384d21d3_u64) + put(array, 0xf6019da07f549b2b_u64); put(array, 0x7e2a53a146606a48_u64) + put(array, 0x99c102844f94e0fb_u64); put(array, 0x2eda7444cbfc426d_u64) + put(array, 0xc0314325637a1939_u64); put(array, 0xfa911155fefb5308_u64) + put(array, 0xf03d93eebc589f88_u64); put(array, 0x793555ab7eba27ca_u64) + put(array, 0x96267c7535b763b5_u64); put(array, 0x4bc1558b2f3458de_u64) + put(array, 0xbbb01b9283253ca2_u64); put(array, 0x9eb1aaedfb016f16_u64) + put(array, 0xea9c227723ee8bcb_u64); put(array, 0x465e15a979c1cadc_u64) + put(array, 0x92a1958a7675175f_u64); put(array, 0xbfacd89ec191ec9_u64) + put(array, 0xb749faed14125d36_u64); put(array, 0xcef980ec671f667b_u64) + put(array, 0xe51c79a85916f484_u64); put(array, 0x82b7e12780e7401a_u64) + put(array, 0x8f31cc0937ae58d2_u64); put(array, 0xd1b2ecb8b0908810_u64) + put(array, 0xb2fe3f0b8599ef07_u64); put(array, 0x861fa7e6dcb4aa15_u64) + put(array, 0xdfbdcece67006ac9_u64); put(array, 0x67a791e093e1d49a_u64) + put(array, 0x8bd6a141006042bd_u64); put(array, 0xe0c8bb2c5c6d24e0_u64) + put(array, 0xaecc49914078536d_u64); put(array, 0x58fae9f773886e18_u64) + put(array, 0xda7f5bf590966848_u64); put(array, 0xaf39a475506a899e_u64) + put(array, 0x888f99797a5e012d_u64); put(array, 0x6d8406c952429603_u64) + put(array, 0xaab37fd7d8f58178_u64); put(array, 0xc8e5087ba6d33b83_u64) + put(array, 0xd5605fcdcf32e1d6_u64); put(array, 0xfb1e4a9a90880a64_u64) + put(array, 0x855c3be0a17fcd26_u64); put(array, 0x5cf2eea09a55067f_u64) + put(array, 0xa6b34ad8c9dfc06f_u64); put(array, 0xf42faa48c0ea481e_u64) + put(array, 0xd0601d8efc57b08b_u64); put(array, 0xf13b94daf124da26_u64) + put(array, 0x823c12795db6ce57_u64); put(array, 0x76c53d08d6b70858_u64) + put(array, 0xa2cb1717b52481ed_u64); put(array, 0x54768c4b0c64ca6e_u64) + put(array, 0xcb7ddcdda26da268_u64); put(array, 0xa9942f5dcf7dfd09_u64) + put(array, 0xfe5d54150b090b02_u64); put(array, 0xd3f93b35435d7c4c_u64) + put(array, 0x9efa548d26e5a6e1_u64); put(array, 0xc47bc5014a1a6daf_u64) + put(array, 0xc6b8e9b0709f109a_u64); put(array, 0x359ab6419ca1091b_u64) + put(array, 0xf867241c8cc6d4c0_u64); put(array, 0xc30163d203c94b62_u64) + put(array, 0x9b407691d7fc44f8_u64); put(array, 0x79e0de63425dcf1d_u64) + put(array, 0xc21094364dfb5636_u64); put(array, 0x985915fc12f542e4_u64) + put(array, 0xf294b943e17a2bc4_u64); put(array, 0x3e6f5b7b17b2939d_u64) + put(array, 0x979cf3ca6cec5b5a_u64); put(array, 0xa705992ceecf9c42_u64) + put(array, 0xbd8430bd08277231_u64); put(array, 0x50c6ff782a838353_u64) + put(array, 0xece53cec4a314ebd_u64); put(array, 0xa4f8bf5635246428_u64) + put(array, 0x940f4613ae5ed136_u64); put(array, 0x871b7795e136be99_u64) + put(array, 0xb913179899f68584_u64); put(array, 0x28e2557b59846e3f_u64) + put(array, 0xe757dd7ec07426e5_u64); put(array, 0x331aeada2fe589cf_u64) + put(array, 0x9096ea6f3848984f_u64); put(array, 0x3ff0d2c85def7621_u64) + put(array, 0xb4bca50b065abe63_u64); put(array, 0xfed077a756b53a9_u64) + put(array, 0xe1ebce4dc7f16dfb_u64); put(array, 0xd3e8495912c62894_u64) + put(array, 0x8d3360f09cf6e4bd_u64); put(array, 0x64712dd7abbbd95c_u64) + put(array, 0xb080392cc4349dec_u64); put(array, 0xbd8d794d96aacfb3_u64) + put(array, 0xdca04777f541c567_u64); put(array, 0xecf0d7a0fc5583a0_u64) + put(array, 0x89e42caaf9491b60_u64); put(array, 0xf41686c49db57244_u64) + put(array, 0xac5d37d5b79b6239_u64); put(array, 0x311c2875c522ced5_u64) + put(array, 0xd77485cb25823ac7_u64); put(array, 0x7d633293366b828b_u64) + put(array, 0x86a8d39ef77164bc_u64); put(array, 0xae5dff9c02033197_u64) + put(array, 0xa8530886b54dbdeb_u64); put(array, 0xd9f57f830283fdfc_u64) + put(array, 0xd267caa862a12d66_u64); put(array, 0xd072df63c324fd7b_u64) + put(array, 0x8380dea93da4bc60_u64); put(array, 0x4247cb9e59f71e6d_u64) + put(array, 0xa46116538d0deb78_u64); put(array, 0x52d9be85f074e608_u64) + put(array, 0xcd795be870516656_u64); put(array, 0x67902e276c921f8b_u64) + put(array, 0x806bd9714632dff6_u64); put(array, 0xba1cd8a3db53b6_u64) + put(array, 0xa086cfcd97bf97f3_u64); put(array, 0x80e8a40eccd228a4_u64) + put(array, 0xc8a883c0fdaf7df0_u64); put(array, 0x6122cd128006b2cd_u64) + put(array, 0xfad2a4b13d1b5d6c_u64); put(array, 0x796b805720085f81_u64) + put(array, 0x9cc3a6eec6311a63_u64); put(array, 0xcbe3303674053bb0_u64) + put(array, 0xc3f490aa77bd60fc_u64); put(array, 0xbedbfc4411068a9c_u64) + put(array, 0xf4f1b4d515acb93b_u64); put(array, 0xee92fb5515482d44_u64) + put(array, 0x991711052d8bf3c5_u64); put(array, 0x751bdd152d4d1c4a_u64) + put(array, 0xbf5cd54678eef0b6_u64); put(array, 0xd262d45a78a0635d_u64) + put(array, 0xef340a98172aace4_u64); put(array, 0x86fb897116c87c34_u64) + put(array, 0x9580869f0e7aac0e_u64); put(array, 0xd45d35e6ae3d4da0_u64) + put(array, 0xbae0a846d2195712_u64); put(array, 0x8974836059cca109_u64) + put(array, 0xe998d258869facd7_u64); put(array, 0x2bd1a438703fc94b_u64) + put(array, 0x91ff83775423cc06_u64); put(array, 0x7b6306a34627ddcf_u64) + put(array, 0xb67f6455292cbf08_u64); put(array, 0x1a3bc84c17b1d542_u64) + put(array, 0xe41f3d6a7377eeca_u64); put(array, 0x20caba5f1d9e4a93_u64) + put(array, 0x8e938662882af53e_u64); put(array, 0x547eb47b7282ee9c_u64) + put(array, 0xb23867fb2a35b28d_u64); put(array, 0xe99e619a4f23aa43_u64) + put(array, 0xdec681f9f4c31f31_u64); put(array, 0x6405fa00e2ec94d4_u64) + put(array, 0x8b3c113c38f9f37e_u64); put(array, 0xde83bc408dd3dd04_u64) + put(array, 0xae0b158b4738705e_u64); put(array, 0x9624ab50b148d445_u64) + put(array, 0xd98ddaee19068c76_u64); put(array, 0x3badd624dd9b0957_u64) + put(array, 0x87f8a8d4cfa417c9_u64); put(array, 0xe54ca5d70a80e5d6_u64) + put(array, 0xa9f6d30a038d1dbc_u64); put(array, 0x5e9fcf4ccd211f4c_u64) + put(array, 0xd47487cc8470652b_u64); put(array, 0x7647c3200069671f_u64) + put(array, 0x84c8d4dfd2c63f3b_u64); put(array, 0x29ecd9f40041e073_u64) + put(array, 0xa5fb0a17c777cf09_u64); put(array, 0xf468107100525890_u64) + put(array, 0xcf79cc9db955c2cc_u64); put(array, 0x7182148d4066eeb4_u64) + put(array, 0x81ac1fe293d599bf_u64); put(array, 0xc6f14cd848405530_u64) + put(array, 0xa21727db38cb002f_u64); put(array, 0xb8ada00e5a506a7c_u64) + put(array, 0xca9cf1d206fdc03b_u64); put(array, 0xa6d90811f0e4851c_u64) + put(array, 0xfd442e4688bd304a_u64); put(array, 0x908f4a166d1da663_u64) + put(array, 0x9e4a9cec15763e2e_u64); put(array, 0x9a598e4e043287fe_u64) + put(array, 0xc5dd44271ad3cdba_u64); put(array, 0x40eff1e1853f29fd_u64) + put(array, 0xf7549530e188c128_u64); put(array, 0xd12bee59e68ef47c_u64) + put(array, 0x9a94dd3e8cf578b9_u64); put(array, 0x82bb74f8301958ce_u64) + put(array, 0xc13a148e3032d6e7_u64); put(array, 0xe36a52363c1faf01_u64) + put(array, 0xf18899b1bc3f8ca1_u64); put(array, 0xdc44e6c3cb279ac1_u64) + put(array, 0x96f5600f15a7b7e5_u64); put(array, 0x29ab103a5ef8c0b9_u64) + put(array, 0xbcb2b812db11a5de_u64); put(array, 0x7415d448f6b6f0e7_u64) + put(array, 0xebdf661791d60f56_u64); put(array, 0x111b495b3464ad21_u64) + put(array, 0x936b9fcebb25c995_u64); put(array, 0xcab10dd900beec34_u64) + put(array, 0xb84687c269ef3bfb_u64); put(array, 0x3d5d514f40eea742_u64) + put(array, 0xe65829b3046b0afa_u64); put(array, 0xcb4a5a3112a5112_u64) + put(array, 0x8ff71a0fe2c2e6dc_u64); put(array, 0x47f0e785eaba72ab_u64) + put(array, 0xb3f4e093db73a093_u64); put(array, 0x59ed216765690f56_u64) + put(array, 0xe0f218b8d25088b8_u64); put(array, 0x306869c13ec3532c_u64) + put(array, 0x8c974f7383725573_u64); put(array, 0x1e414218c73a13fb_u64) + put(array, 0xafbd2350644eeacf_u64); put(array, 0xe5d1929ef90898fa_u64) + put(array, 0xdbac6c247d62a583_u64); put(array, 0xdf45f746b74abf39_u64) + put(array, 0x894bc396ce5da772_u64); put(array, 0x6b8bba8c328eb783_u64) + put(array, 0xab9eb47c81f5114f_u64); put(array, 0x66ea92f3f326564_u64) + put(array, 0xd686619ba27255a2_u64); put(array, 0xc80a537b0efefebd_u64) + put(array, 0x8613fd0145877585_u64); put(array, 0xbd06742ce95f5f36_u64) + put(array, 0xa798fc4196e952e7_u64); put(array, 0x2c48113823b73704_u64) + put(array, 0xd17f3b51fca3a7a0_u64); put(array, 0xf75a15862ca504c5_u64) + put(array, 0x82ef85133de648c4_u64); put(array, 0x9a984d73dbe722fb_u64) + put(array, 0xa3ab66580d5fdaf5_u64); put(array, 0xc13e60d0d2e0ebba_u64) + put(array, 0xcc963fee10b7d1b3_u64); put(array, 0x318df905079926a8_u64) + put(array, 0xffbbcfe994e5c61f_u64); put(array, 0xfdf17746497f7052_u64) + put(array, 0x9fd561f1fd0f9bd3_u64); put(array, 0xfeb6ea8bedefa633_u64) + put(array, 0xc7caba6e7c5382c8_u64); put(array, 0xfe64a52ee96b8fc0_u64) + put(array, 0xf9bd690a1b68637b_u64); put(array, 0x3dfdce7aa3c673b0_u64) + put(array, 0x9c1661a651213e2d_u64); put(array, 0x6bea10ca65c084e_u64) + put(array, 0xc31bfa0fe5698db8_u64); put(array, 0x486e494fcff30a62_u64) + put(array, 0xf3e2f893dec3f126_u64); put(array, 0x5a89dba3c3efccfa_u64) + put(array, 0x986ddb5c6b3a76b7_u64); put(array, 0xf89629465a75e01c_u64) + put(array, 0xbe89523386091465_u64); put(array, 0xf6bbb397f1135823_u64) + put(array, 0xee2ba6c0678b597f_u64); put(array, 0x746aa07ded582e2c_u64) + put(array, 0x94db483840b717ef_u64); put(array, 0xa8c2a44eb4571cdc_u64) + put(array, 0xba121a4650e4ddeb_u64); put(array, 0x92f34d62616ce413_u64) + put(array, 0xe896a0d7e51e1566_u64); put(array, 0x77b020baf9c81d17_u64) + put(array, 0x915e2486ef32cd60_u64); put(array, 0xace1474dc1d122e_u64) + put(array, 0xb5b5ada8aaff80b8_u64); put(array, 0xd819992132456ba_u64) + put(array, 0xe3231912d5bf60e6_u64); put(array, 0x10e1fff697ed6c69_u64) + put(array, 0x8df5efabc5979c8f_u64); put(array, 0xca8d3ffa1ef463c1_u64) + put(array, 0xb1736b96b6fd83b3_u64); put(array, 0xbd308ff8a6b17cb2_u64) + put(array, 0xddd0467c64bce4a0_u64); put(array, 0xac7cb3f6d05ddbde_u64) + put(array, 0x8aa22c0dbef60ee4_u64); put(array, 0x6bcdf07a423aa96b_u64) + put(array, 0xad4ab7112eb3929d_u64); put(array, 0x86c16c98d2c953c6_u64) + put(array, 0xd89d64d57a607744_u64); put(array, 0xe871c7bf077ba8b7_u64) + put(array, 0x87625f056c7c4a8b_u64); put(array, 0x11471cd764ad4972_u64) + put(array, 0xa93af6c6c79b5d2d_u64); put(array, 0xd598e40d3dd89bcf_u64) + put(array, 0xd389b47879823479_u64); put(array, 0x4aff1d108d4ec2c3_u64) + put(array, 0x843610cb4bf160cb_u64); put(array, 0xcedf722a585139ba_u64) + put(array, 0xa54394fe1eedb8fe_u64); put(array, 0xc2974eb4ee658828_u64) + put(array, 0xce947a3da6a9273e_u64); put(array, 0x733d226229feea32_u64) + put(array, 0x811ccc668829b887_u64); put(array, 0x806357d5a3f525f_u64) + put(array, 0xa163ff802a3426a8_u64); put(array, 0xca07c2dcb0cf26f7_u64) + put(array, 0xc9bcff6034c13052_u64); put(array, 0xfc89b393dd02f0b5_u64) + put(array, 0xfc2c3f3841f17c67_u64); put(array, 0xbbac2078d443ace2_u64) + put(array, 0x9d9ba7832936edc0_u64); put(array, 0xd54b944b84aa4c0d_u64) + put(array, 0xc5029163f384a931_u64); put(array, 0xa9e795e65d4df11_u64) + put(array, 0xf64335bcf065d37d_u64); put(array, 0x4d4617b5ff4a16d5_u64) + put(array, 0x99ea0196163fa42e_u64); put(array, 0x504bced1bf8e4e45_u64) + put(array, 0xc06481fb9bcf8d39_u64); put(array, 0xe45ec2862f71e1d6_u64) + put(array, 0xf07da27a82c37088_u64); put(array, 0x5d767327bb4e5a4c_u64) + put(array, 0x964e858c91ba2655_u64); put(array, 0x3a6a07f8d510f86f_u64) + put(array, 0xbbe226efb628afea_u64); put(array, 0x890489f70a55368b_u64) + put(array, 0xeadab0aba3b2dbe5_u64); put(array, 0x2b45ac74ccea842e_u64) + put(array, 0x92c8ae6b464fc96f_u64); put(array, 0x3b0b8bc90012929d_u64) + put(array, 0xb77ada0617e3bbcb_u64); put(array, 0x9ce6ebb40173744_u64) + put(array, 0xe55990879ddcaabd_u64); put(array, 0xcc420a6a101d0515_u64) + put(array, 0x8f57fa54c2a9eab6_u64); put(array, 0x9fa946824a12232d_u64) + put(array, 0xb32df8e9f3546564_u64); put(array, 0x47939822dc96abf9_u64) + put(array, 0xdff9772470297ebd_u64); put(array, 0x59787e2b93bc56f7_u64) + put(array, 0x8bfbea76c619ef36_u64); put(array, 0x57eb4edb3c55b65a_u64) + put(array, 0xaefae51477a06b03_u64); put(array, 0xede622920b6b23f1_u64) + put(array, 0xdab99e59958885c4_u64); put(array, 0xe95fab368e45eced_u64) + put(array, 0x88b402f7fd75539b_u64); put(array, 0x11dbcb0218ebb414_u64) + put(array, 0xaae103b5fcd2a881_u64); put(array, 0xd652bdc29f26a119_u64) + put(array, 0xd59944a37c0752a2_u64); put(array, 0x4be76d3346f0495f_u64) + put(array, 0x857fcae62d8493a5_u64); put(array, 0x6f70a4400c562ddb_u64) + put(array, 0xa6dfbd9fb8e5b88e_u64); put(array, 0xcb4ccd500f6bb952_u64) + put(array, 0xd097ad07a71f26b2_u64); put(array, 0x7e2000a41346a7a7_u64) + put(array, 0x825ecc24c873782f_u64); put(array, 0x8ed400668c0c28c8_u64) + put(array, 0xa2f67f2dfa90563b_u64); put(array, 0x728900802f0f32fa_u64) + put(array, 0xcbb41ef979346bca_u64); put(array, 0x4f2b40a03ad2ffb9_u64) + put(array, 0xfea126b7d78186bc_u64); put(array, 0xe2f610c84987bfa8_u64) + put(array, 0x9f24b832e6b0f436_u64); put(array, 0xdd9ca7d2df4d7c9_u64) + put(array, 0xc6ede63fa05d3143_u64); put(array, 0x91503d1c79720dbb_u64) + put(array, 0xf8a95fcf88747d94_u64); put(array, 0x75a44c6397ce912a_u64) + put(array, 0x9b69dbe1b548ce7c_u64); put(array, 0xc986afbe3ee11aba_u64) + put(array, 0xc24452da229b021b_u64); put(array, 0xfbe85badce996168_u64) + put(array, 0xf2d56790ab41c2a2_u64); put(array, 0xfae27299423fb9c3_u64) + put(array, 0x97c560ba6b0919a5_u64); put(array, 0xdccd879fc967d41a_u64) + put(array, 0xbdb6b8e905cb600f_u64); put(array, 0x5400e987bbc1c920_u64) + put(array, 0xed246723473e3813_u64); put(array, 0x290123e9aab23b68_u64) + put(array, 0x9436c0760c86e30b_u64); put(array, 0xf9a0b6720aaf6521_u64) + put(array, 0xb94470938fa89bce_u64); put(array, 0xf808e40e8d5b3e69_u64) + put(array, 0xe7958cb87392c2c2_u64); put(array, 0xb60b1d1230b20e04_u64) + put(array, 0x90bd77f3483bb9b9_u64); put(array, 0xb1c6f22b5e6f48c2_u64) + put(array, 0xb4ecd5f01a4aa828_u64); put(array, 0x1e38aeb6360b1af3_u64) + put(array, 0xe2280b6c20dd5232_u64); put(array, 0x25c6da63c38de1b0_u64) + put(array, 0x8d590723948a535f_u64); put(array, 0x579c487e5a38ad0e_u64) + put(array, 0xb0af48ec79ace837_u64); put(array, 0x2d835a9df0c6d851_u64) + put(array, 0xdcdb1b2798182244_u64); put(array, 0xf8e431456cf88e65_u64) + put(array, 0x8a08f0f8bf0f156b_u64); put(array, 0x1b8e9ecb641b58ff_u64) + put(array, 0xac8b2d36eed2dac5_u64); put(array, 0xe272467e3d222f3f_u64) + put(array, 0xd7adf884aa879177_u64); put(array, 0x5b0ed81dcc6abb0f_u64) + put(array, 0x86ccbb52ea94baea_u64); put(array, 0x98e947129fc2b4e9_u64) + put(array, 0xa87fea27a539e9a5_u64); put(array, 0x3f2398d747b36224_u64) + put(array, 0xd29fe4b18e88640e_u64); put(array, 0x8eec7f0d19a03aad_u64) + put(array, 0x83a3eeeef9153e89_u64); put(array, 0x1953cf68300424ac_u64) + put(array, 0xa48ceaaab75a8e2b_u64); put(array, 0x5fa8c3423c052dd7_u64) + put(array, 0xcdb02555653131b6_u64); put(array, 0x3792f412cb06794d_u64) + put(array, 0x808e17555f3ebf11_u64); put(array, 0xe2bbd88bbee40bd0_u64) + put(array, 0xa0b19d2ab70e6ed6_u64); put(array, 0x5b6aceaeae9d0ec4_u64) + put(array, 0xc8de047564d20a8b_u64); put(array, 0xf245825a5a445275_u64) + put(array, 0xfb158592be068d2e_u64); put(array, 0xeed6e2f0f0d56712_u64) + put(array, 0x9ced737bb6c4183d_u64); put(array, 0x55464dd69685606b_u64) + put(array, 0xc428d05aa4751e4c_u64); put(array, 0xaa97e14c3c26b886_u64) + put(array, 0xf53304714d9265df_u64); put(array, 0xd53dd99f4b3066a8_u64) + put(array, 0x993fe2c6d07b7fab_u64); put(array, 0xe546a8038efe4029_u64) + put(array, 0xbf8fdb78849a5f96_u64); put(array, 0xde98520472bdd033_u64) + put(array, 0xef73d256a5c0f77c_u64); put(array, 0x963e66858f6d4440_u64) + put(array, 0x95a8637627989aad_u64); put(array, 0xdde7001379a44aa8_u64) + put(array, 0xbb127c53b17ec159_u64); put(array, 0x5560c018580d5d52_u64) + put(array, 0xe9d71b689dde71af_u64); put(array, 0xaab8f01e6e10b4a6_u64) + put(array, 0x9226712162ab070d_u64); put(array, 0xcab3961304ca70e8_u64) + put(array, 0xb6b00d69bb55c8d1_u64); put(array, 0x3d607b97c5fd0d22_u64) + put(array, 0xe45c10c42a2b3b05_u64); put(array, 0x8cb89a7db77c506a_u64) + put(array, 0x8eb98a7a9a5b04e3_u64); put(array, 0x77f3608e92adb242_u64) + put(array, 0xb267ed1940f1c61c_u64); put(array, 0x55f038b237591ed3_u64) + put(array, 0xdf01e85f912e37a3_u64); put(array, 0x6b6c46dec52f6688_u64) + put(array, 0x8b61313bbabce2c6_u64); put(array, 0x2323ac4b3b3da015_u64) + put(array, 0xae397d8aa96c1b77_u64); put(array, 0xabec975e0a0d081a_u64) + put(array, 0xd9c7dced53c72255_u64); put(array, 0x96e7bd358c904a21_u64) + put(array, 0x881cea14545c7575_u64); put(array, 0x7e50d64177da2e54_u64) + put(array, 0xaa242499697392d2_u64); put(array, 0xdde50bd1d5d0b9e9_u64) + put(array, 0xd4ad2dbfc3d07787_u64); put(array, 0x955e4ec64b44e864_u64) + put(array, 0x84ec3c97da624ab4_u64); put(array, 0xbd5af13bef0b113e_u64) + put(array, 0xa6274bbdd0fadd61_u64); put(array, 0xecb1ad8aeacdd58e_u64) + put(array, 0xcfb11ead453994ba_u64); put(array, 0x67de18eda5814af2_u64) + put(array, 0x81ceb32c4b43fcf4_u64); put(array, 0x80eacf948770ced7_u64) + put(array, 0xa2425ff75e14fc31_u64); put(array, 0xa1258379a94d028d_u64) + put(array, 0xcad2f7f5359a3b3e_u64); put(array, 0x96ee45813a04330_u64) + put(array, 0xfd87b5f28300ca0d_u64); put(array, 0x8bca9d6e188853fc_u64) + put(array, 0x9e74d1b791e07e48_u64); put(array, 0x775ea264cf55347e_u64) + put(array, 0xc612062576589dda_u64); put(array, 0x95364afe032a819e_u64) + put(array, 0xf79687aed3eec551_u64); put(array, 0x3a83ddbd83f52205_u64) + put(array, 0x9abe14cd44753b52_u64); put(array, 0xc4926a9672793543_u64) + put(array, 0xc16d9a0095928a27_u64); put(array, 0x75b7053c0f178294_u64) + put(array, 0xf1c90080baf72cb1_u64); put(array, 0x5324c68b12dd6339_u64) + put(array, 0x971da05074da7bee_u64); put(array, 0xd3f6fc16ebca5e04_u64) + put(array, 0xbce5086492111aea_u64); put(array, 0x88f4bb1ca6bcf585_u64) + put(array, 0xec1e4a7db69561a5_u64); put(array, 0x2b31e9e3d06c32e6_u64) + put(array, 0x9392ee8e921d5d07_u64); put(array, 0x3aff322e62439fd0_u64) + put(array, 0xb877aa3236a4b449_u64); put(array, 0x9befeb9fad487c3_u64) + put(array, 0xe69594bec44de15b_u64); put(array, 0x4c2ebe687989a9b4_u64) + put(array, 0x901d7cf73ab0acd9_u64); put(array, 0xf9d37014bf60a11_u64) + put(array, 0xb424dc35095cd80f_u64); put(array, 0x538484c19ef38c95_u64) + put(array, 0xe12e13424bb40e13_u64); put(array, 0x2865a5f206b06fba_u64) + put(array, 0x8cbccc096f5088cb_u64); put(array, 0xf93f87b7442e45d4_u64) + put(array, 0xafebff0bcb24aafe_u64); put(array, 0xf78f69a51539d749_u64) + put(array, 0xdbe6fecebdedd5be_u64); put(array, 0xb573440e5a884d1c_u64) + put(array, 0x89705f4136b4a597_u64); put(array, 0x31680a88f8953031_u64) + put(array, 0xabcc77118461cefc_u64); put(array, 0xfdc20d2b36ba7c3e_u64) + put(array, 0xd6bf94d5e57a42bc_u64); put(array, 0x3d32907604691b4d_u64) + put(array, 0x8637bd05af6c69b5_u64); put(array, 0xa63f9a49c2c1b110_u64) + put(array, 0xa7c5ac471b478423_u64); put(array, 0xfcf80dc33721d54_u64) + put(array, 0xd1b71758e219652b_u64); put(array, 0xd3c36113404ea4a9_u64) + put(array, 0x83126e978d4fdf3b_u64); put(array, 0x645a1cac083126ea_u64) + put(array, 0xa3d70a3d70a3d70a_u64); put(array, 0x3d70a3d70a3d70a4_u64) + put(array, 0xcccccccccccccccc_u64); put(array, 0xcccccccccccccccd_u64) + put(array, 0x8000000000000000_u64); put(array, 0x0_u64) + put(array, 0xa000000000000000_u64); put(array, 0x0_u64) + put(array, 0xc800000000000000_u64); put(array, 0x0_u64) + put(array, 0xfa00000000000000_u64); put(array, 0x0_u64) + put(array, 0x9c40000000000000_u64); put(array, 0x0_u64) + put(array, 0xc350000000000000_u64); put(array, 0x0_u64) + put(array, 0xf424000000000000_u64); put(array, 0x0_u64) + put(array, 0x9896800000000000_u64); put(array, 0x0_u64) + put(array, 0xbebc200000000000_u64); put(array, 0x0_u64) + put(array, 0xee6b280000000000_u64); put(array, 0x0_u64) + put(array, 0x9502f90000000000_u64); put(array, 0x0_u64) + put(array, 0xba43b74000000000_u64); put(array, 0x0_u64) + put(array, 0xe8d4a51000000000_u64); put(array, 0x0_u64) + put(array, 0x9184e72a00000000_u64); put(array, 0x0_u64) + put(array, 0xb5e620f480000000_u64); put(array, 0x0_u64) + put(array, 0xe35fa931a0000000_u64); put(array, 0x0_u64) + put(array, 0x8e1bc9bf04000000_u64); put(array, 0x0_u64) + put(array, 0xb1a2bc2ec5000000_u64); put(array, 0x0_u64) + put(array, 0xde0b6b3a76400000_u64); put(array, 0x0_u64) + put(array, 0x8ac7230489e80000_u64); put(array, 0x0_u64) + put(array, 0xad78ebc5ac620000_u64); put(array, 0x0_u64) + put(array, 0xd8d726b7177a8000_u64); put(array, 0x0_u64) + put(array, 0x878678326eac9000_u64); put(array, 0x0_u64) + put(array, 0xa968163f0a57b400_u64); put(array, 0x0_u64) + put(array, 0xd3c21bcecceda100_u64); put(array, 0x0_u64) + put(array, 0x84595161401484a0_u64); put(array, 0x0_u64) + put(array, 0xa56fa5b99019a5c8_u64); put(array, 0x0_u64) + put(array, 0xcecb8f27f4200f3a_u64); put(array, 0x0_u64) + put(array, 0x813f3978f8940984_u64); put(array, 0x4000000000000000_u64) + put(array, 0xa18f07d736b90be5_u64); put(array, 0x5000000000000000_u64) + put(array, 0xc9f2c9cd04674ede_u64); put(array, 0xa400000000000000_u64) + put(array, 0xfc6f7c4045812296_u64); put(array, 0x4d00000000000000_u64) + put(array, 0x9dc5ada82b70b59d_u64); put(array, 0xf020000000000000_u64) + put(array, 0xc5371912364ce305_u64); put(array, 0x6c28000000000000_u64) + put(array, 0xf684df56c3e01bc6_u64); put(array, 0xc732000000000000_u64) + put(array, 0x9a130b963a6c115c_u64); put(array, 0x3c7f400000000000_u64) + put(array, 0xc097ce7bc90715b3_u64); put(array, 0x4b9f100000000000_u64) + put(array, 0xf0bdc21abb48db20_u64); put(array, 0x1e86d40000000000_u64) + put(array, 0x96769950b50d88f4_u64); put(array, 0x1314448000000000_u64) + put(array, 0xbc143fa4e250eb31_u64); put(array, 0x17d955a000000000_u64) + put(array, 0xeb194f8e1ae525fd_u64); put(array, 0x5dcfab0800000000_u64) + put(array, 0x92efd1b8d0cf37be_u64); put(array, 0x5aa1cae500000000_u64) + put(array, 0xb7abc627050305ad_u64); put(array, 0xf14a3d9e40000000_u64) + put(array, 0xe596b7b0c643c719_u64); put(array, 0x6d9ccd05d0000000_u64) + put(array, 0x8f7e32ce7bea5c6f_u64); put(array, 0xe4820023a2000000_u64) + put(array, 0xb35dbf821ae4f38b_u64); put(array, 0xdda2802c8a800000_u64) + put(array, 0xe0352f62a19e306e_u64); put(array, 0xd50b2037ad200000_u64) + put(array, 0x8c213d9da502de45_u64); put(array, 0x4526f422cc340000_u64) + put(array, 0xaf298d050e4395d6_u64); put(array, 0x9670b12b7f410000_u64) + put(array, 0xdaf3f04651d47b4c_u64); put(array, 0x3c0cdd765f114000_u64) + put(array, 0x88d8762bf324cd0f_u64); put(array, 0xa5880a69fb6ac800_u64) + put(array, 0xab0e93b6efee0053_u64); put(array, 0x8eea0d047a457a00_u64) + put(array, 0xd5d238a4abe98068_u64); put(array, 0x72a4904598d6d880_u64) + put(array, 0x85a36366eb71f041_u64); put(array, 0x47a6da2b7f864750_u64) + put(array, 0xa70c3c40a64e6c51_u64); put(array, 0x999090b65f67d924_u64) + put(array, 0xd0cf4b50cfe20765_u64); put(array, 0xfff4b4e3f741cf6d_u64) + put(array, 0x82818f1281ed449f_u64); put(array, 0xbff8f10e7a8921a4_u64) + put(array, 0xa321f2d7226895c7_u64); put(array, 0xaff72d52192b6a0d_u64) + put(array, 0xcbea6f8ceb02bb39_u64); put(array, 0x9bf4f8a69f764490_u64) + put(array, 0xfee50b7025c36a08_u64); put(array, 0x2f236d04753d5b4_u64) + put(array, 0x9f4f2726179a2245_u64); put(array, 0x1d762422c946590_u64) + put(array, 0xc722f0ef9d80aad6_u64); put(array, 0x424d3ad2b7b97ef5_u64) + put(array, 0xf8ebad2b84e0d58b_u64); put(array, 0xd2e0898765a7deb2_u64) + put(array, 0x9b934c3b330c8577_u64); put(array, 0x63cc55f49f88eb2f_u64) + put(array, 0xc2781f49ffcfa6d5_u64); put(array, 0x3cbf6b71c76b25fb_u64) + put(array, 0xf316271c7fc3908a_u64); put(array, 0x8bef464e3945ef7a_u64) + put(array, 0x97edd871cfda3a56_u64); put(array, 0x97758bf0e3cbb5ac_u64) + put(array, 0xbde94e8e43d0c8ec_u64); put(array, 0x3d52eeed1cbea317_u64) + put(array, 0xed63a231d4c4fb27_u64); put(array, 0x4ca7aaa863ee4bdd_u64) + put(array, 0x945e455f24fb1cf8_u64); put(array, 0x8fe8caa93e74ef6a_u64) + put(array, 0xb975d6b6ee39e436_u64); put(array, 0xb3e2fd538e122b44_u64) + put(array, 0xe7d34c64a9c85d44_u64); put(array, 0x60dbbca87196b616_u64) + put(array, 0x90e40fbeea1d3a4a_u64); put(array, 0xbc8955e946fe31cd_u64) + put(array, 0xb51d13aea4a488dd_u64); put(array, 0x6babab6398bdbe41_u64) + put(array, 0xe264589a4dcdab14_u64); put(array, 0xc696963c7eed2dd1_u64) + put(array, 0x8d7eb76070a08aec_u64); put(array, 0xfc1e1de5cf543ca2_u64) + put(array, 0xb0de65388cc8ada8_u64); put(array, 0x3b25a55f43294bcb_u64) + put(array, 0xdd15fe86affad912_u64); put(array, 0x49ef0eb713f39ebe_u64) + put(array, 0x8a2dbf142dfcc7ab_u64); put(array, 0x6e3569326c784337_u64) + put(array, 0xacb92ed9397bf996_u64); put(array, 0x49c2c37f07965404_u64) + put(array, 0xd7e77a8f87daf7fb_u64); put(array, 0xdc33745ec97be906_u64) + put(array, 0x86f0ac99b4e8dafd_u64); put(array, 0x69a028bb3ded71a3_u64) + put(array, 0xa8acd7c0222311bc_u64); put(array, 0xc40832ea0d68ce0c_u64) + put(array, 0xd2d80db02aabd62b_u64); put(array, 0xf50a3fa490c30190_u64) + put(array, 0x83c7088e1aab65db_u64); put(array, 0x792667c6da79e0fa_u64) + put(array, 0xa4b8cab1a1563f52_u64); put(array, 0x577001b891185938_u64) + put(array, 0xcde6fd5e09abcf26_u64); put(array, 0xed4c0226b55e6f86_u64) + put(array, 0x80b05e5ac60b6178_u64); put(array, 0x544f8158315b05b4_u64) + put(array, 0xa0dc75f1778e39d6_u64); put(array, 0x696361ae3db1c721_u64) + put(array, 0xc913936dd571c84c_u64); put(array, 0x3bc3a19cd1e38e9_u64) + put(array, 0xfb5878494ace3a5f_u64); put(array, 0x4ab48a04065c723_u64) + put(array, 0x9d174b2dcec0e47b_u64); put(array, 0x62eb0d64283f9c76_u64) + put(array, 0xc45d1df942711d9a_u64); put(array, 0x3ba5d0bd324f8394_u64) + put(array, 0xf5746577930d6500_u64); put(array, 0xca8f44ec7ee36479_u64) + put(array, 0x9968bf6abbe85f20_u64); put(array, 0x7e998b13cf4e1ecb_u64) + put(array, 0xbfc2ef456ae276e8_u64); put(array, 0x9e3fedd8c321a67e_u64) + put(array, 0xefb3ab16c59b14a2_u64); put(array, 0xc5cfe94ef3ea101e_u64) + put(array, 0x95d04aee3b80ece5_u64); put(array, 0xbba1f1d158724a12_u64) + put(array, 0xbb445da9ca61281f_u64); put(array, 0x2a8a6e45ae8edc97_u64) + put(array, 0xea1575143cf97226_u64); put(array, 0xf52d09d71a3293bd_u64) + put(array, 0x924d692ca61be758_u64); put(array, 0x593c2626705f9c56_u64) + put(array, 0xb6e0c377cfa2e12e_u64); put(array, 0x6f8b2fb00c77836c_u64) + put(array, 0xe498f455c38b997a_u64); put(array, 0xb6dfb9c0f956447_u64) + put(array, 0x8edf98b59a373fec_u64); put(array, 0x4724bd4189bd5eac_u64) + put(array, 0xb2977ee300c50fe7_u64); put(array, 0x58edec91ec2cb657_u64) + put(array, 0xdf3d5e9bc0f653e1_u64); put(array, 0x2f2967b66737e3ed_u64) + put(array, 0x8b865b215899f46c_u64); put(array, 0xbd79e0d20082ee74_u64) + put(array, 0xae67f1e9aec07187_u64); put(array, 0xecd8590680a3aa11_u64) + put(array, 0xda01ee641a708de9_u64); put(array, 0xe80e6f4820cc9495_u64) + put(array, 0x884134fe908658b2_u64); put(array, 0x3109058d147fdcdd_u64) + put(array, 0xaa51823e34a7eede_u64); put(array, 0xbd4b46f0599fd415_u64) + put(array, 0xd4e5e2cdc1d1ea96_u64); put(array, 0x6c9e18ac7007c91a_u64) + put(array, 0x850fadc09923329e_u64); put(array, 0x3e2cf6bc604ddb0_u64) + put(array, 0xa6539930bf6bff45_u64); put(array, 0x84db8346b786151c_u64) + put(array, 0xcfe87f7cef46ff16_u64); put(array, 0xe612641865679a63_u64) + put(array, 0x81f14fae158c5f6e_u64); put(array, 0x4fcb7e8f3f60c07e_u64) + put(array, 0xa26da3999aef7749_u64); put(array, 0xe3be5e330f38f09d_u64) + put(array, 0xcb090c8001ab551c_u64); put(array, 0x5cadf5bfd3072cc5_u64) + put(array, 0xfdcb4fa002162a63_u64); put(array, 0x73d9732fc7c8f7f6_u64) + put(array, 0x9e9f11c4014dda7e_u64); put(array, 0x2867e7fddcdd9afa_u64) + put(array, 0xc646d63501a1511d_u64); put(array, 0xb281e1fd541501b8_u64) + put(array, 0xf7d88bc24209a565_u64); put(array, 0x1f225a7ca91a4226_u64) + put(array, 0x9ae757596946075f_u64); put(array, 0x3375788de9b06958_u64) + put(array, 0xc1a12d2fc3978937_u64); put(array, 0x52d6b1641c83ae_u64) + put(array, 0xf209787bb47d6b84_u64); put(array, 0xc0678c5dbd23a49a_u64) + put(array, 0x9745eb4d50ce6332_u64); put(array, 0xf840b7ba963646e0_u64) + put(array, 0xbd176620a501fbff_u64); put(array, 0xb650e5a93bc3d898_u64) + put(array, 0xec5d3fa8ce427aff_u64); put(array, 0xa3e51f138ab4cebe_u64) + put(array, 0x93ba47c980e98cdf_u64); put(array, 0xc66f336c36b10137_u64) + put(array, 0xb8a8d9bbe123f017_u64); put(array, 0xb80b0047445d4184_u64) + put(array, 0xe6d3102ad96cec1d_u64); put(array, 0xa60dc059157491e5_u64) + put(array, 0x9043ea1ac7e41392_u64); put(array, 0x87c89837ad68db2f_u64) + put(array, 0xb454e4a179dd1877_u64); put(array, 0x29babe4598c311fb_u64) + put(array, 0xe16a1dc9d8545e94_u64); put(array, 0xf4296dd6fef3d67a_u64) + put(array, 0x8ce2529e2734bb1d_u64); put(array, 0x1899e4a65f58660c_u64) + put(array, 0xb01ae745b101e9e4_u64); put(array, 0x5ec05dcff72e7f8f_u64) + put(array, 0xdc21a1171d42645d_u64); put(array, 0x76707543f4fa1f73_u64) + put(array, 0x899504ae72497eba_u64); put(array, 0x6a06494a791c53a8_u64) + put(array, 0xabfa45da0edbde69_u64); put(array, 0x487db9d17636892_u64) + put(array, 0xd6f8d7509292d603_u64); put(array, 0x45a9d2845d3c42b6_u64) + put(array, 0x865b86925b9bc5c2_u64); put(array, 0xb8a2392ba45a9b2_u64) + put(array, 0xa7f26836f282b732_u64); put(array, 0x8e6cac7768d7141e_u64) + put(array, 0xd1ef0244af2364ff_u64); put(array, 0x3207d795430cd926_u64) + put(array, 0x8335616aed761f1f_u64); put(array, 0x7f44e6bd49e807b8_u64) + put(array, 0xa402b9c5a8d3a6e7_u64); put(array, 0x5f16206c9c6209a6_u64) + put(array, 0xcd036837130890a1_u64); put(array, 0x36dba887c37a8c0f_u64) + put(array, 0x802221226be55a64_u64); put(array, 0xc2494954da2c9789_u64) + put(array, 0xa02aa96b06deb0fd_u64); put(array, 0xf2db9baa10b7bd6c_u64) + put(array, 0xc83553c5c8965d3d_u64); put(array, 0x6f92829494e5acc7_u64) + put(array, 0xfa42a8b73abbf48c_u64); put(array, 0xcb772339ba1f17f9_u64) + put(array, 0x9c69a97284b578d7_u64); put(array, 0xff2a760414536efb_u64) + put(array, 0xc38413cf25e2d70d_u64); put(array, 0xfef5138519684aba_u64) + put(array, 0xf46518c2ef5b8cd1_u64); put(array, 0x7eb258665fc25d69_u64) + put(array, 0x98bf2f79d5993802_u64); put(array, 0xef2f773ffbd97a61_u64) + put(array, 0xbeeefb584aff8603_u64); put(array, 0xaafb550ffacfd8fa_u64) + put(array, 0xeeaaba2e5dbf6784_u64); put(array, 0x95ba2a53f983cf38_u64) + put(array, 0x952ab45cfa97a0b2_u64); put(array, 0xdd945a747bf26183_u64) + put(array, 0xba756174393d88df_u64); put(array, 0x94f971119aeef9e4_u64) + put(array, 0xe912b9d1478ceb17_u64); put(array, 0x7a37cd5601aab85d_u64) + put(array, 0x91abb422ccb812ee_u64); put(array, 0xac62e055c10ab33a_u64) + put(array, 0xb616a12b7fe617aa_u64); put(array, 0x577b986b314d6009_u64) + put(array, 0xe39c49765fdf9d94_u64); put(array, 0xed5a7e85fda0b80b_u64) + put(array, 0x8e41ade9fbebc27d_u64); put(array, 0x14588f13be847307_u64) + put(array, 0xb1d219647ae6b31c_u64); put(array, 0x596eb2d8ae258fc8_u64) + put(array, 0xde469fbd99a05fe3_u64); put(array, 0x6fca5f8ed9aef3bb_u64) + put(array, 0x8aec23d680043bee_u64); put(array, 0x25de7bb9480d5854_u64) + put(array, 0xada72ccc20054ae9_u64); put(array, 0xaf561aa79a10ae6a_u64) + put(array, 0xd910f7ff28069da4_u64); put(array, 0x1b2ba1518094da04_u64) + put(array, 0x87aa9aff79042286_u64); put(array, 0x90fb44d2f05d0842_u64) + put(array, 0xa99541bf57452b28_u64); put(array, 0x353a1607ac744a53_u64) + put(array, 0xd3fa922f2d1675f2_u64); put(array, 0x42889b8997915ce8_u64) + put(array, 0x847c9b5d7c2e09b7_u64); put(array, 0x69956135febada11_u64) + put(array, 0xa59bc234db398c25_u64); put(array, 0x43fab9837e699095_u64) + put(array, 0xcf02b2c21207ef2e_u64); put(array, 0x94f967e45e03f4bb_u64) + put(array, 0x8161afb94b44f57d_u64); put(array, 0x1d1be0eebac278f5_u64) + put(array, 0xa1ba1ba79e1632dc_u64); put(array, 0x6462d92a69731732_u64) + put(array, 0xca28a291859bbf93_u64); put(array, 0x7d7b8f7503cfdcfe_u64) + put(array, 0xfcb2cb35e702af78_u64); put(array, 0x5cda735244c3d43e_u64) + put(array, 0x9defbf01b061adab_u64); put(array, 0x3a0888136afa64a7_u64) + put(array, 0xc56baec21c7a1916_u64); put(array, 0x88aaa1845b8fdd0_u64) + put(array, 0xf6c69a72a3989f5b_u64); put(array, 0x8aad549e57273d45_u64) + put(array, 0x9a3c2087a63f6399_u64); put(array, 0x36ac54e2f678864b_u64) + put(array, 0xc0cb28a98fcf3c7f_u64); put(array, 0x84576a1bb416a7dd_u64) + put(array, 0xf0fdf2d3f3c30b9f_u64); put(array, 0x656d44a2a11c51d5_u64) + put(array, 0x969eb7c47859e743_u64); put(array, 0x9f644ae5a4b1b325_u64) + put(array, 0xbc4665b596706114_u64); put(array, 0x873d5d9f0dde1fee_u64) + put(array, 0xeb57ff22fc0c7959_u64); put(array, 0xa90cb506d155a7ea_u64) + put(array, 0x9316ff75dd87cbd8_u64); put(array, 0x9a7f12442d588f2_u64) + put(array, 0xb7dcbf5354e9bece_u64); put(array, 0xc11ed6d538aeb2f_u64) + put(array, 0xe5d3ef282a242e81_u64); put(array, 0x8f1668c8a86da5fa_u64) + put(array, 0x8fa475791a569d10_u64); put(array, 0xf96e017d694487bc_u64) + put(array, 0xb38d92d760ec4455_u64); put(array, 0x37c981dcc395a9ac_u64) + put(array, 0xe070f78d3927556a_u64); put(array, 0x85bbe253f47b1417_u64) + put(array, 0x8c469ab843b89562_u64); put(array, 0x93956d7478ccec8e_u64) + put(array, 0xaf58416654a6babb_u64); put(array, 0x387ac8d1970027b2_u64) + put(array, 0xdb2e51bfe9d0696a_u64); put(array, 0x6997b05fcc0319e_u64) + put(array, 0x88fcf317f22241e2_u64); put(array, 0x441fece3bdf81f03_u64) + put(array, 0xab3c2fddeeaad25a_u64); put(array, 0xd527e81cad7626c3_u64) + put(array, 0xd60b3bd56a5586f1_u64); put(array, 0x8a71e223d8d3b074_u64) + put(array, 0x85c7056562757456_u64); put(array, 0xf6872d5667844e49_u64) + put(array, 0xa738c6bebb12d16c_u64); put(array, 0xb428f8ac016561db_u64) + put(array, 0xd106f86e69d785c7_u64); put(array, 0xe13336d701beba52_u64) + put(array, 0x82a45b450226b39c_u64); put(array, 0xecc0024661173473_u64) + put(array, 0xa34d721642b06084_u64); put(array, 0x27f002d7f95d0190_u64) + put(array, 0xcc20ce9bd35c78a5_u64); put(array, 0x31ec038df7b441f4_u64) + put(array, 0xff290242c83396ce_u64); put(array, 0x7e67047175a15271_u64) + put(array, 0x9f79a169bd203e41_u64); put(array, 0xf0062c6e984d386_u64) + put(array, 0xc75809c42c684dd1_u64); put(array, 0x52c07b78a3e60868_u64) + put(array, 0xf92e0c3537826145_u64); put(array, 0xa7709a56ccdf8a82_u64) + put(array, 0x9bbcc7a142b17ccb_u64); put(array, 0x88a66076400bb691_u64) + put(array, 0xc2abf989935ddbfe_u64); put(array, 0x6acff893d00ea435_u64) + put(array, 0xf356f7ebf83552fe_u64); put(array, 0x583f6b8c4124d43_u64) + put(array, 0x98165af37b2153de_u64); put(array, 0xc3727a337a8b704a_u64) + put(array, 0xbe1bf1b059e9a8d6_u64); put(array, 0x744f18c0592e4c5c_u64) + put(array, 0xeda2ee1c7064130c_u64); put(array, 0x1162def06f79df73_u64) + put(array, 0x9485d4d1c63e8be7_u64); put(array, 0x8addcb5645ac2ba8_u64) + put(array, 0xb9a74a0637ce2ee1_u64); put(array, 0x6d953e2bd7173692_u64) + put(array, 0xe8111c87c5c1ba99_u64); put(array, 0xc8fa8db6ccdd0437_u64) + put(array, 0x910ab1d4db9914a0_u64); put(array, 0x1d9c9892400a22a2_u64) + put(array, 0xb54d5e4a127f59c8_u64); put(array, 0x2503beb6d00cab4b_u64) + put(array, 0xe2a0b5dc971f303a_u64); put(array, 0x2e44ae64840fd61d_u64) + put(array, 0x8da471a9de737e24_u64); put(array, 0x5ceaecfed289e5d2_u64) + put(array, 0xb10d8e1456105dad_u64); put(array, 0x7425a83e872c5f47_u64) + put(array, 0xdd50f1996b947518_u64); put(array, 0xd12f124e28f77719_u64) + put(array, 0x8a5296ffe33cc92f_u64); put(array, 0x82bd6b70d99aaa6f_u64) + put(array, 0xace73cbfdc0bfb7b_u64); put(array, 0x636cc64d1001550b_u64) + put(array, 0xd8210befd30efa5a_u64); put(array, 0x3c47f7e05401aa4e_u64) + put(array, 0x8714a775e3e95c78_u64); put(array, 0x65acfaec34810a71_u64) + put(array, 0xa8d9d1535ce3b396_u64); put(array, 0x7f1839a741a14d0d_u64) + put(array, 0xd31045a8341ca07c_u64); put(array, 0x1ede48111209a050_u64) + put(array, 0x83ea2b892091e44d_u64); put(array, 0x934aed0aab460432_u64) + put(array, 0xa4e4b66b68b65d60_u64); put(array, 0xf81da84d5617853f_u64) + put(array, 0xce1de40642e3f4b9_u64); put(array, 0x36251260ab9d668e_u64) + put(array, 0x80d2ae83e9ce78f3_u64); put(array, 0xc1d72b7c6b426019_u64) + put(array, 0xa1075a24e4421730_u64); put(array, 0xb24cf65b8612f81f_u64) + put(array, 0xc94930ae1d529cfc_u64); put(array, 0xdee033f26797b627_u64) + put(array, 0xfb9b7cd9a4a7443c_u64); put(array, 0x169840ef017da3b1_u64) + put(array, 0x9d412e0806e88aa5_u64); put(array, 0x8e1f289560ee864e_u64) + put(array, 0xc491798a08a2ad4e_u64); put(array, 0xf1a6f2bab92a27e2_u64) + put(array, 0xf5b5d7ec8acb58a2_u64); put(array, 0xae10af696774b1db_u64) + put(array, 0x9991a6f3d6bf1765_u64); put(array, 0xacca6da1e0a8ef29_u64) + put(array, 0xbff610b0cc6edd3f_u64); put(array, 0x17fd090a58d32af3_u64) + put(array, 0xeff394dcff8a948e_u64); put(array, 0xddfc4b4cef07f5b0_u64) + put(array, 0x95f83d0a1fb69cd9_u64); put(array, 0x4abdaf101564f98e_u64) + put(array, 0xbb764c4ca7a4440f_u64); put(array, 0x9d6d1ad41abe37f1_u64) + put(array, 0xea53df5fd18d5513_u64); put(array, 0x84c86189216dc5ed_u64) + put(array, 0x92746b9be2f8552c_u64); put(array, 0x32fd3cf5b4e49bb4_u64) + put(array, 0xb7118682dbb66a77_u64); put(array, 0x3fbc8c33221dc2a1_u64) + put(array, 0xe4d5e82392a40515_u64); put(array, 0xfabaf3feaa5334a_u64) + put(array, 0x8f05b1163ba6832d_u64); put(array, 0x29cb4d87f2a7400e_u64) + put(array, 0xb2c71d5bca9023f8_u64); put(array, 0x743e20e9ef511012_u64) + put(array, 0xdf78e4b2bd342cf6_u64); put(array, 0x914da9246b255416_u64) + put(array, 0x8bab8eefb6409c1a_u64); put(array, 0x1ad089b6c2f7548e_u64) + put(array, 0xae9672aba3d0c320_u64); put(array, 0xa184ac2473b529b1_u64) + put(array, 0xda3c0f568cc4f3e8_u64); put(array, 0xc9e5d72d90a2741e_u64) + put(array, 0x8865899617fb1871_u64); put(array, 0x7e2fa67c7a658892_u64) + put(array, 0xaa7eebfb9df9de8d_u64); put(array, 0xddbb901b98feeab7_u64) + put(array, 0xd51ea6fa85785631_u64); put(array, 0x552a74227f3ea565_u64) + put(array, 0x8533285c936b35de_u64); put(array, 0xd53a88958f87275f_u64) + put(array, 0xa67ff273b8460356_u64); put(array, 0x8a892abaf368f137_u64) + put(array, 0xd01fef10a657842c_u64); put(array, 0x2d2b7569b0432d85_u64) + put(array, 0x8213f56a67f6b29b_u64); put(array, 0x9c3b29620e29fc73_u64) + put(array, 0xa298f2c501f45f42_u64); put(array, 0x8349f3ba91b47b8f_u64) + put(array, 0xcb3f2f7642717713_u64); put(array, 0x241c70a936219a73_u64) + put(array, 0xfe0efb53d30dd4d7_u64); put(array, 0xed238cd383aa0110_u64) + put(array, 0x9ec95d1463e8a506_u64); put(array, 0xf4363804324a40aa_u64) + put(array, 0xc67bb4597ce2ce48_u64); put(array, 0xb143c6053edcd0d5_u64) + put(array, 0xf81aa16fdc1b81da_u64); put(array, 0xdd94b7868e94050a_u64) + put(array, 0x9b10a4e5e9913128_u64); put(array, 0xca7cf2b4191c8326_u64) + put(array, 0xc1d4ce1f63f57d72_u64); put(array, 0xfd1c2f611f63a3f0_u64) + put(array, 0xf24a01a73cf2dccf_u64); put(array, 0xbc633b39673c8cec_u64) + put(array, 0x976e41088617ca01_u64); put(array, 0xd5be0503e085d813_u64) + put(array, 0xbd49d14aa79dbc82_u64); put(array, 0x4b2d8644d8a74e18_u64) + put(array, 0xec9c459d51852ba2_u64); put(array, 0xddf8e7d60ed1219e_u64) + put(array, 0x93e1ab8252f33b45_u64); put(array, 0xcabb90e5c942b503_u64) + put(array, 0xb8da1662e7b00a17_u64); put(array, 0x3d6a751f3b936243_u64) + put(array, 0xe7109bfba19c0c9d_u64); put(array, 0xcc512670a783ad4_u64) + put(array, 0x906a617d450187e2_u64); put(array, 0x27fb2b80668b24c5_u64) + put(array, 0xb484f9dc9641e9da_u64); put(array, 0xb1f9f660802dedf6_u64) + put(array, 0xe1a63853bbd26451_u64); put(array, 0x5e7873f8a0396973_u64) + put(array, 0x8d07e33455637eb2_u64); put(array, 0xdb0b487b6423e1e8_u64) + put(array, 0xb049dc016abc5e5f_u64); put(array, 0x91ce1a9a3d2cda62_u64) + put(array, 0xdc5c5301c56b75f7_u64); put(array, 0x7641a140cc7810fb_u64) + put(array, 0x89b9b3e11b6329ba_u64); put(array, 0xa9e904c87fcb0a9d_u64) + put(array, 0xac2820d9623bf429_u64); put(array, 0x546345fa9fbdcd44_u64) + put(array, 0xd732290fbacaf133_u64); put(array, 0xa97c177947ad4095_u64) + put(array, 0x867f59a9d4bed6c0_u64); put(array, 0x49ed8eabcccc485d_u64) + put(array, 0xa81f301449ee8c70_u64); put(array, 0x5c68f256bfff5a74_u64) + put(array, 0xd226fc195c6a2f8c_u64); put(array, 0x73832eec6fff3111_u64) + put(array, 0x83585d8fd9c25db7_u64); put(array, 0xc831fd53c5ff7eab_u64) + put(array, 0xa42e74f3d032f525_u64); put(array, 0xba3e7ca8b77f5e55_u64) + put(array, 0xcd3a1230c43fb26f_u64); put(array, 0x28ce1bd2e55f35eb_u64) + put(array, 0x80444b5e7aa7cf85_u64); put(array, 0x7980d163cf5b81b3_u64) + put(array, 0xa0555e361951c366_u64); put(array, 0xd7e105bcc332621f_u64) + put(array, 0xc86ab5c39fa63440_u64); put(array, 0x8dd9472bf3fefaa7_u64) + put(array, 0xfa856334878fc150_u64); put(array, 0xb14f98f6f0feb951_u64) + put(array, 0x9c935e00d4b9d8d2_u64); put(array, 0x6ed1bf9a569f33d3_u64) + put(array, 0xc3b8358109e84f07_u64); put(array, 0xa862f80ec4700c8_u64) + put(array, 0xf4a642e14c6262c8_u64); put(array, 0xcd27bb612758c0fa_u64) + put(array, 0x98e7e9cccfbd7dbd_u64); put(array, 0x8038d51cb897789c_u64) + put(array, 0xbf21e44003acdd2c_u64); put(array, 0xe0470a63e6bd56c3_u64) + put(array, 0xeeea5d5004981478_u64); put(array, 0x1858ccfce06cac74_u64) + put(array, 0x95527a5202df0ccb_u64); put(array, 0xf37801e0c43ebc8_u64) + put(array, 0xbaa718e68396cffd_u64); put(array, 0xd30560258f54e6ba_u64) + put(array, 0xe950df20247c83fd_u64); put(array, 0x47c6b82ef32a2069_u64) + put(array, 0x91d28b7416cdd27e_u64); put(array, 0x4cdc331d57fa5441_u64) + put(array, 0xb6472e511c81471d_u64); put(array, 0xe0133fe4adf8e952_u64) + put(array, 0xe3d8f9e563a198e5_u64); put(array, 0x58180fddd97723a6_u64) + put(array, 0x8e679c2f5e44ff8f_u64); put(array, 0x570f09eaa7ea7648_u64) + array + end + end +end diff --git a/src/float/fast_float/float_common.cr b/src/float/fast_float/float_common.cr new file mode 100644 index 000000000000..a66dc99f82f7 --- /dev/null +++ b/src/float/fast_float/float_common.cr @@ -0,0 +1,294 @@ +module Float::FastFloat + @[Flags] + enum CharsFormat + Scientific = 1 << 0 + Fixed = 1 << 2 + Hex = 1 << 3 + NoInfnan = 1 << 4 + JsonFmt = 1 << 5 + FortranFmt = 1 << 6 + + # RFC 8259: https://datatracker.ietf.org/doc/html/rfc8259#section-6 + Json = JsonFmt | Fixed | Scientific | NoInfnan + + # Extension of RFC 8259 where, e.g., "inf" and "nan" are allowed. + JsonOrInfnan = JsonFmt | Fixed | Scientific + + Fortran = FortranFmt | Fixed | Scientific + General = Fixed | Scientific + end + + # NOTE(crystal): uses `Errno` to represent C++'s `std::errc` + record FromCharsResultT(UC), ptr : UC*, ec : Errno + + alias FromCharsResult = FromCharsResultT(UInt8) + + record ParseOptionsT(UC), format : CharsFormat = :general, decimal_point : UC = 0x2E # '.'.ord + + alias ParseOptions = ParseOptionsT(UInt8) + + # rust style `try!()` macro, or `?` operator + macro fastfloat_try(x) + unless {{ x }} + return false + end + end + + # Compares two ASCII strings in a case insensitive manner. + def self.fastfloat_strncasecmp(input1 : UC*, input2 : UC*, length : Int) : Bool forall UC + running_diff = 0_u8 + length.times do |i| + running_diff |= input1[i].to_u8! ^ input2[i].to_u8! + end + running_diff.in?(0_u8, 32_u8) + end + + record Value128, low : UInt64, high : UInt64 do + def self.new(x : UInt128) : self + new(low: x.to_u64!, high: x.unsafe_shr(64).to_u64!) + end + end + + struct AdjustedMantissa + property mantissa : UInt64 + property power2 : Int32 + + def initialize(@mantissa : UInt64 = 0, @power2 : Int32 = 0) + end + end + + INVALID_AM_BIAS = -0x8000 + + CONSTANT_55555 = 3125_u64 + + module BinaryFormat(T, EquivUint) + end + + struct BinaryFormat_Float64 + include BinaryFormat(Float64, UInt64) + + POWERS_OF_TEN = [ + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, + ] + + # Largest integer value v so that (5**index * v) <= 1<<53. + # 0x20000000000000 == 1 << 53 + MAX_MANTISSA = [ + 0x20000000000000_u64, + 0x20000000000000_u64.unsafe_div(5), + 0x20000000000000_u64.unsafe_div(5 * 5), + 0x20000000000000_u64.unsafe_div(5 * 5 * 5), + 0x20000000000000_u64.unsafe_div(5 * 5 * 5 * 5), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * 5), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * 5 * 5), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * 5 * 5 * 5), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * 5 * 5 * 5 * 5), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * 5), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * 5 * 5), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * 5 * 5 * 5), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * 5), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * 5 * 5), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * 5 * 5 * 5), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * 5 * 5 * 5 * 5), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * 5), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * 5 * 5), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * 5 * 5 * 5), + 0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * 5 * 5 * 5 * 5), + ] + + def min_exponent_fast_path : Int32 + -22 + end + + def mantissa_explicit_bits : Int32 + 52 + end + + def max_exponent_round_to_even : Int32 + 23 + end + + def min_exponent_round_to_even : Int32 + -4 + end + + def minimum_exponent : Int32 + -1023 + end + + def infinite_power : Int32 + 0x7FF + end + + def sign_index : Int32 + 63 + end + + def max_exponent_fast_path : Int32 + 22 + end + + def max_mantissa_fast_path : UInt64 + 0x20000000000000_u64 + end + + def max_mantissa_fast_path(power : Int64) : UInt64 + # caller is responsible to ensure that + # power >= 0 && power <= 22 + MAX_MANTISSA.unsafe_fetch(power) + end + + def exact_power_of_ten(power : Int64) : Float64 + POWERS_OF_TEN.unsafe_fetch(power) + end + + def largest_power_of_ten : Int32 + 308 + end + + def smallest_power_of_ten : Int32 + -342 + end + + def max_digits : Int32 + 769 + end + + def exponent_mask : EquivUint + 0x7FF0000000000000_u64 + end + + def mantissa_mask : EquivUint + 0x000FFFFFFFFFFFFF_u64 + end + + def hidden_bit_mask : EquivUint + 0x0010000000000000_u64 + end + end + + struct BinaryFormat_Float32 + include BinaryFormat(Float32, UInt32) + + POWERS_OF_TEN = [ + 1e0f32, 1e1f32, 1e2f32, 1e3f32, 1e4f32, 1e5f32, 1e6f32, 1e7f32, 1e8f32, 1e9f32, 1e10f32, + ] + + # Largest integer value v so that (5**index * v) <= 1<<24. + # 0x1000000 == 1<<24 + MAX_MANTISSA = [ + 0x1000000_u64, + 0x1000000_u64.unsafe_div(5), + 0x1000000_u64.unsafe_div(5 * 5), + 0x1000000_u64.unsafe_div(5 * 5 * 5), + 0x1000000_u64.unsafe_div(5 * 5 * 5 * 5), + 0x1000000_u64.unsafe_div(CONSTANT_55555), + 0x1000000_u64.unsafe_div(CONSTANT_55555 * 5), + 0x1000000_u64.unsafe_div(CONSTANT_55555 * 5 * 5), + 0x1000000_u64.unsafe_div(CONSTANT_55555 * 5 * 5 * 5), + 0x1000000_u64.unsafe_div(CONSTANT_55555 * 5 * 5 * 5 * 5), + 0x1000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555), + 0x1000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * 5), + ] + + def min_exponent_fast_path : Int32 + -10 + end + + def mantissa_explicit_bits : Int32 + 23 + end + + def max_exponent_round_to_even : Int32 + 10 + end + + def min_exponent_round_to_even : Int32 + -17 + end + + def minimum_exponent : Int32 + -127 + end + + def infinite_power : Int32 + 0xFF + end + + def sign_index : Int32 + 31 + end + + def max_exponent_fast_path : Int32 + 10 + end + + def max_mantissa_fast_path : UInt64 + 0x1000000_u64 + end + + def max_mantissa_fast_path(power : Int64) : UInt64 + # caller is responsible to ensure that + # power >= 0 && power <= 10 + MAX_MANTISSA.unsafe_fetch(power) + end + + def exact_power_of_ten(power : Int64) : Float32 + POWERS_OF_TEN.unsafe_fetch(power) + end + + def largest_power_of_ten : Int32 + 38 + end + + def smallest_power_of_ten : Int32 + -64 + end + + def max_digits : Int32 + 114 + end + + def exponent_mask : EquivUint + 0x7F800000_u32 + end + + def mantissa_mask : EquivUint + 0x007FFFFF_u32 + end + + def hidden_bit_mask : EquivUint + 0x00800000_u32 + end + end + + module BinaryFormat(T, EquivUint) + # NOTE(crystal): returns the new *value* by value + def to_float(negative : Bool, am : AdjustedMantissa) : T + word = EquivUint.new!(am.mantissa) + word |= EquivUint.new!(am.power2).unsafe_shl(mantissa_explicit_bits) + word |= EquivUint.new!(negative ? 1 : 0).unsafe_shl(sign_index) + word.unsafe_as(T) + end + end + + def self.int_cmp_zeros(uc : UC.class) : UInt64 forall UC + case sizeof(UC) + when 1 + 0x3030303030303030_u64 + when 2 + 0x0030003000300030_u64 + else + 0x0000003000000030_u64 + end + end + + def self.int_cmp_len(uc : UC.class) : Int32 forall UC + sizeof(UInt64).unsafe_div(sizeof(UC)) + end +end diff --git a/src/float/fast_float/parse_number.cr b/src/float/fast_float/parse_number.cr new file mode 100644 index 000000000000..3c1ac4c1cb24 --- /dev/null +++ b/src/float/fast_float/parse_number.cr @@ -0,0 +1,197 @@ +require "./ascii_number" +require "./decimal_to_binary" +require "./digit_comparison" +require "./float_common" + +module Float::FastFloat + module Detail + def self.parse_infnan(first : UC*, last : UC*, value : T*) : FromCharsResultT(UC) forall T, UC + ptr = first + ec = Errno::NONE # be optimistic + minus_sign = false + if first.value === '-' # assume first < last, so dereference without checks + minus_sign = true + first += 1 + elsif first.value === '+' + first += 1 + end + + if last - first >= 3 + if FastFloat.fastfloat_strncasecmp(first, "nan".to_unsafe, 3) + first += 3 + ptr = first + value.value = minus_sign ? -T::NAN : T::NAN + # Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, + # C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan). + if first != last && first.value === '(' + ptr2 = first + 1 + while ptr2 != last + case ptr2.value.unsafe_chr + when ')' + ptr = ptr2 + 1 # valid nan(n-char-seq-opt) + break + when 'a'..'z', 'A'..'Z', '0'..'9', '_' + # Do nothing + else + break # forbidden char, not nan(n-char-seq-opt) + end + ptr2 += 1 + end + end + return FromCharsResultT(UC).new(ptr, ec) + end + end + if FastFloat.fastfloat_strncasecmp(first, "inf".to_unsafe, 3) + if last - first >= 8 && FastFloat.fastfloat_strncasecmp(first + 3, "inity".to_unsafe, 5) + ptr = first + 8 + else + ptr = first + 3 + end + value.value = minus_sign ? -T::INFINITY : T::INFINITY + return FromCharsResultT(UC).new(ptr, ec) + end + + ec = Errno::EINVAL + FromCharsResultT(UC).new(ptr, ec) + end + + # See + # A fast function to check your floating-point rounding mode + # https://lemire.me/blog/2022/11/16/a-fast-function-to-check-your-floating-point-rounding-mode/ + # + # This function is meant to be equivalent to : + # prior: #include + # return fegetround() == FE_TONEAREST; + # However, it is expected to be much faster than the fegetround() + # function call. + # + # NOTE(crystal): uses a pointer instead of a volatile variable to prevent + # LLVM optimization + @@fmin : Float32* = Pointer(Float32).malloc(1, Float32::MIN_POSITIVE) + + # Returns true if the floating-pointing rounding mode is to 'nearest'. + # It is the default on most system. This function is meant to be inexpensive. + # Credit : @mwalcott3 + def self.rounds_to_nearest? : Bool + fmin = @@fmin.value # we copy it so that it gets loaded at most once. + + # Explanation: + # Only when fegetround() == FE_TONEAREST do we have that + # fmin + 1.0f == 1.0f - fmin. + # + # FE_UPWARD: + # fmin + 1.0f > 1 + # 1.0f - fmin == 1 + # + # FE_DOWNWARD or FE_TOWARDZERO: + # fmin + 1.0f == 1 + # 1.0f - fmin < 1 + # + # Note: This may fail to be accurate if fast-math has been + # enabled, as rounding conventions may not apply. + fmin + 1.0_f32 == 1.0_f32 - fmin + end + end + + module BinaryFormat(T, EquivUint) + def from_chars_advanced(pns : ParsedNumberStringT(UC), value : T*) : FromCharsResultT(UC) forall UC + {% raise "only some floating-point types are supported" unless T == Float32 || T == Float64 %} + + # TODO(crystal): support UInt16 and UInt32 + {% raise "only UInt8 is supported" unless UC == UInt8 %} + + ec = Errno::NONE # be optimistic + ptr = pns.lastmatch + # The implementation of the Clinger's fast path is convoluted because + # we want round-to-nearest in all cases, irrespective of the rounding mode + # selected on the thread. + # We proceed optimistically, assuming that detail::rounds_to_nearest() + # returns true. + if (min_exponent_fast_path <= pns.exponent <= max_exponent_fast_path) && !pns.too_many_digits + # Unfortunately, the conventional Clinger's fast path is only possible + # when the system rounds to the nearest float. + # + # We expect the next branch to almost always be selected. + # We could check it first (before the previous branch), but + # there might be performance advantages at having the check + # be last. + if Detail.rounds_to_nearest? + # We have that fegetround() == FE_TONEAREST. + # Next is Clinger's fast path. + if pns.mantissa <= max_mantissa_fast_path + if pns.mantissa == 0 + value.value = pns.negative ? T.new(-0.0) : T.new(0.0) + return FromCharsResultT(UC).new(ptr, ec) + end + value.value = T.new(pns.mantissa) + if pns.exponent < 0 + value.value /= exact_power_of_ten(0_i64 &- pns.exponent) + else + value.value *= exact_power_of_ten(pns.exponent) + end + if pns.negative + value.value = -value.value + end + return FromCharsResultT(UC).new(ptr, ec) + end + else + # We do not have that fegetround() == FE_TONEAREST. + # Next is a modified Clinger's fast path, inspired by Jakub JelĂ­nek's + # proposal + if pns.exponent >= 0 && pns.mantissa <= max_mantissa_fast_path(pns.exponent) + # Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD + if pns.mantissa == 0 + value.value = pns.negative ? T.new(-0.0) : T.new(0.0) + return FromCharsResultT(UC).new(ptr, ec) + end + value.value = T.new(pns.mantissa) * exact_power_of_ten(pns.exponent) + if pns.negative + value.value = -value.value + end + return FromCharsResultT(UC).new(ptr, ec) + end + end + end + am = compute_float(pns.exponent, pns.mantissa) + if pns.too_many_digits && am.power2 >= 0 + if am != compute_float(pns.exponent, pns.mantissa &+ 1) + am = compute_error(pns.exponent, pns.mantissa) + end + end + # If we called compute_float>(pns.exponent, pns.mantissa) + # and we have an invalid power (am.power2 < 0), then we need to go the long + # way around again. This is very uncommon. + if am.power2 < 0 + am = digit_comp(pns, am) + end + value.value = to_float(pns.negative, am) + # Test for over/underflow. + if (pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) || am.power2 == infinite_power + ec = Errno::ERANGE + end + FromCharsResultT(UC).new(ptr, ec) + end + + def from_chars_advanced(first : UC*, last : UC*, value : T*, options : ParseOptionsT(UC)) : FromCharsResultT(UC) forall UC + {% raise "only some floating-point types are supported" unless T == Float32 || T == Float64 %} + + # TODO(crystal): support UInt16 and UInt32 + {% raise "only UInt8 is supported" unless UC == UInt8 %} + + if first == last + return FromCharsResultT(UC).new(first, Errno::EINVAL) + end + pns = FastFloat.parse_number_string(first, last, options) + if !pns.valid + if options.format.no_infnan? + return FromCharsResultT(UC).new(first, Errno::EINVAL) + else + return Detail.parse_infnan(first, last, value) + end + end + + # call overload that takes parsed_number_string_t directly. + from_chars_advanced(pns, value) + end + end +end diff --git a/src/lib_c/x86_64-windows-msvc/c/stdlib.cr b/src/lib_c/x86_64-windows-msvc/c/stdlib.cr index 63c38003fd6a..140e49a229a7 100644 --- a/src/lib_c/x86_64-windows-msvc/c/stdlib.cr +++ b/src/lib_c/x86_64-windows-msvc/c/stdlib.cr @@ -11,13 +11,13 @@ lib LibC fun free(ptr : Void*) : Void fun malloc(size : SizeT) : Void* fun realloc(ptr : Void*, size : SizeT) : Void* - fun strtof(nptr : Char*, endptr : Char**) : Float - fun strtod(nptr : Char*, endptr : Char**) : Double alias InvalidParameterHandler = WCHAR*, WCHAR*, WCHAR*, UInt, UIntPtrT -> fun _set_invalid_parameter_handler(pNew : InvalidParameterHandler) : InvalidParameterHandler # unused + fun strtof(nptr : Char*, endptr : Char**) : Float + fun strtod(nptr : Char*, endptr : Char**) : Double fun atof(nptr : Char*) : Double fun div(numer : Int, denom : Int) : DivT fun putenv(string : Char*) : Int diff --git a/src/string.cr b/src/string.cr index d47e87638976..9bc9d0c22701 100644 --- a/src/string.cr +++ b/src/string.cr @@ -1,9 +1,9 @@ -require "c/stdlib" require "c/string" require "crystal/small_deque" {% unless flag?(:without_iconv) %} require "crystal/iconv" {% end %} +require "float/fast_float" # A `String` represents an immutable sequence of UTF-8 characters. # @@ -738,10 +738,7 @@ class String # :ditto: def to_f64?(whitespace : Bool = true, strict : Bool = true) : Float64? - to_f_impl(whitespace: whitespace, strict: strict) do - v = LibC.strtod self, out endptr - {v, endptr} - end + Float::FastFloat.to_f64?(self, whitespace, strict) end # Same as `#to_f` but returns a Float32. @@ -751,59 +748,7 @@ class String # Same as `#to_f?` but returns a Float32. def to_f32?(whitespace : Bool = true, strict : Bool = true) : Float32? - to_f_impl(whitespace: whitespace, strict: strict) do - v = LibC.strtof self, out endptr - {v, endptr} - end - end - - private def to_f_impl(whitespace : Bool = true, strict : Bool = true, &) - return unless first_char = self[0]? - return unless whitespace || '0' <= first_char <= '9' || first_char.in?('-', '+', 'i', 'I', 'n', 'N') - - v, endptr = yield - - unless v.finite? - startptr = to_unsafe - if whitespace - while startptr.value.unsafe_chr.ascii_whitespace? - startptr += 1 - end - end - if startptr.value.unsafe_chr.in?('+', '-') - startptr += 1 - end - - if v.nan? - return unless startptr.value.unsafe_chr.in?('n', 'N') - else - return unless startptr.value.unsafe_chr.in?('i', 'I') - end - end - - string_end = to_unsafe + bytesize - - # blank string - return if endptr == to_unsafe - - if strict - if whitespace - while endptr < string_end && endptr.value.unsafe_chr.ascii_whitespace? - endptr += 1 - end - end - # reached the end of the string - v if endptr == string_end - else - ptr = to_unsafe - if whitespace - while ptr < string_end && ptr.value.unsafe_chr.ascii_whitespace? - ptr += 1 - end - end - # consumed some bytes - v if endptr > ptr - end + Float::FastFloat.to_f32?(self, whitespace, strict) end # Returns the `Char` at the given *index*. @@ -2166,7 +2111,8 @@ class String remove_excess_left(excess_left) end - private def calc_excess_right + # :nodoc: + def calc_excess_right if single_byte_optimizable? i = bytesize - 1 while i >= 0 && to_unsafe[i].unsafe_chr.ascii_whitespace? @@ -2204,7 +2150,8 @@ class String bytesize - byte_index end - private def calc_excess_left + # :nodoc: + def calc_excess_left if single_byte_optimizable? excess_left = 0 # All strings end with '\0', and it's not a whitespace