From c5455ce0d39b1efe15f7e6d60917eb5434ed2a70 Mon Sep 17 00:00:00 2001
From: Quinton Miller <nicetas.c@gmail.com>
Date: Sat, 21 Dec 2024 19:20:09 +0800
Subject: [PATCH] Implement `fast_float` for `String#to_f` (#15195)

This is a source port of https://github.com/fastfloat/fast_float, which is both locale-independent and platform-independent, meaning the special float values will work on MSYS2's MINGW64 environment too, as we are not calling `LibC.strtod` anymore. Additionally, non-ASCII whitespace characters are now stripped, just like `#to_i`.

**The current implementation doesn't accept hexfloats.**

This implementation brings a roughly 3x speedup, without any additional allocations.
---
 spec/manual/string_to_f32_spec.cr            |  27 +
 spec/manual/string_to_f_supplemental_spec.cr | 103 +++
 spec/std/string_spec.cr                      |   4 +
 spec/support/number.cr                       |  32 +
 src/float/fast_float.cr                      |  75 ++
 src/float/fast_float/ascii_number.cr         | 270 +++++++
 src/float/fast_float/bigint.cr               | 577 +++++++++++++++
 src/float/fast_float/decimal_to_binary.cr    | 177 +++++
 src/float/fast_float/digit_comparison.cr     | 399 +++++++++++
 src/float/fast_float/fast_table.cr           | 695 +++++++++++++++++++
 src/float/fast_float/float_common.cr         | 294 ++++++++
 src/float/fast_float/parse_number.cr         | 197 ++++++
 src/lib_c/x86_64-windows-msvc/c/stdlib.cr    |   4 +-
 src/string.cr                                |  67 +-
 14 files changed, 2859 insertions(+), 62 deletions(-)
 create mode 100644 spec/manual/string_to_f32_spec.cr
 create mode 100644 spec/manual/string_to_f_supplemental_spec.cr
 create mode 100644 src/float/fast_float.cr
 create mode 100644 src/float/fast_float/ascii_number.cr
 create mode 100644 src/float/fast_float/bigint.cr
 create mode 100644 src/float/fast_float/decimal_to_binary.cr
 create mode 100644 src/float/fast_float/digit_comparison.cr
 create mode 100644 src/float/fast_float/fast_table.cr
 create mode 100644 src/float/fast_float/float_common.cr
 create mode 100644 src/float/fast_float/parse_number.cr

diff --git a/spec/manual/string_to_f32_spec.cr b/spec/manual/string_to_f32_spec.cr
new file mode 100644
index 000000000000..6d0940b1190c
--- /dev/null
+++ b/spec/manual/string_to_f32_spec.cr
@@ -0,0 +1,27 @@
+require "spec"
+
+# Exhaustively checks that for all 4294967296 possible `Float32` values,
+# `to_s.to_f32` returns the original number. Splits the floats into 4096 bins
+# for better progress tracking. Also useful as a sort of benchmark.
+#
+# This was originally added when `String#to_f` moved from `LibC.strtod` to
+# `fast_float`, but is applicable to any other implementation as well.
+describe "x.to_s.to_f32 == x" do
+  (0_u32..0xFFF_u32).each do |i|
+    it "%03x00000..%03xfffff" % {i, i} do
+      0x100000.times do |j|
+        bits = i << 20 | j
+        float = bits.unsafe_as(Float32)
+        str = float.to_s
+        val = str.to_f32?.should_not be_nil
+
+        if float.nan?
+          val.nan?.should be_true
+        else
+          val.should eq(float)
+          Math.copysign(1, val).should eq(Math.copysign(1, float))
+        end
+      end
+    end
+  end
+end
diff --git a/spec/manual/string_to_f_supplemental_spec.cr b/spec/manual/string_to_f_supplemental_spec.cr
new file mode 100644
index 000000000000..1b016e22c86a
--- /dev/null
+++ b/spec/manual/string_to_f_supplemental_spec.cr
@@ -0,0 +1,103 @@
+# Runs the fast_float supplemental test suite:
+# https://github.com/fastfloat/supplemental_test_files
+#
+#   Supplemental data files for testing floating parsing (credit: Nigel Tao for
+#   the data)
+#
+#   LICENSE file (Apache 2): https://github.com/nigeltao/parse-number-fxx-test-data/blob/main/LICENSE
+#
+# Due to the sheer volume of the test cases (5.2+ million test cases across
+# 270+ MB of text) these specs are not vendored into the Crystal repository.
+
+require "spec"
+require "http/client"
+require "../support/number"
+require "wait_group"
+
+# these specs permit underflow and overflow to return 0 and infinity
+# respectively (when `ret.rc == Errno::ERANGE`), so we have to use
+# `Float::FastFloat` directly
+def fast_float_to_f32(str)
+  value = uninitialized Float32
+  start = str.to_unsafe
+  finish = start + str.bytesize
+  options = Float::FastFloat::ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general)
+
+  ret = Float::FastFloat::BinaryFormat_Float32.new.from_chars_advanced(start, finish, pointerof(value), options)
+  {Errno::NONE, Errno::ERANGE}.should contain(ret.ec)
+  value
+end
+
+def fast_float_to_f64(str)
+  value = uninitialized Float64
+  start = str.to_unsafe
+  finish = start + str.bytesize
+  options = Float::FastFloat::ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general)
+
+  ret = Float::FastFloat::BinaryFormat_Float64.new.from_chars_advanced(start, finish, pointerof(value), options)
+  {Errno::NONE, Errno::ERANGE}.should contain(ret.ec)
+  value
+end
+
+RAW_BASE_URL = "https://raw.githubusercontent.com/fastfloat/supplemental_test_files/7cc512a7c60361ebe1baf54991d7905efdc62aa0/data/" # @1.0.0
+
+TEST_SUITES = %w(
+  freetype-2-7.txt
+  google-double-conversion.txt
+  google-wuffs.txt
+  ibm-fpgen.txt
+  lemire-fast-double-parser.txt
+  lemire-fast-float.txt
+  more-test-cases.txt
+  remyoudompheng-fptest-0.txt
+  remyoudompheng-fptest-1.txt
+  remyoudompheng-fptest-2.txt
+  remyoudompheng-fptest-3.txt
+  tencent-rapidjson.txt
+  ulfjack-ryu.txt
+)
+
+test_suite_cache = {} of String => Array({UInt32, UInt64, String})
+puts "Fetching #{TEST_SUITES.size} test suites"
+WaitGroup.wait do |wg|
+  TEST_SUITES.each do |suite|
+    wg.spawn do
+      url = RAW_BASE_URL + suite
+
+      cache = HTTP::Client.get(url) do |res|
+        res.body_io.each_line.map do |line|
+          args = line.split(' ')
+          raise "BUG: should have 4 args" unless args.size == 4
+
+          # f16_bits = args[0].to_u16(16)
+          f32_bits = args[1].to_u32(16)
+          f64_bits = args[2].to_u64(16)
+          str = args[3]
+
+          {f32_bits, f64_bits, str}
+        end.to_a
+      end
+
+      puts "#{cache.size} test cases cached from #{url}"
+      test_suite_cache[suite] = cache
+    end
+  end
+end
+puts "There are a total of #{test_suite_cache.sum(&.last.size)} test cases"
+
+describe String do
+  describe "#to_f" do
+    test_suite_cache.each do |suite, cache|
+      describe suite do
+        each_hardware_rounding_mode do |mode, mode_name|
+          it mode_name do
+            cache.each do |f32_bits, f64_bits, str|
+              fast_float_to_f32(str).unsafe_as(UInt32).should eq(f32_bits)
+              fast_float_to_f64(str).unsafe_as(UInt64).should eq(f64_bits)
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr
index 2bbc63f7e18e..72e05adab458 100644
--- a/spec/std/string_spec.cr
+++ b/spec/std/string_spec.cr
@@ -482,6 +482,7 @@ describe "String" do
     it { "1Y2P0IJ32E8E7".to_i64(36).should eq(9223372036854775807) }
   end
 
+  # more specs are available in `spec/manual/string_to_f_supplemental_spec.cr`
   it "does to_f" do
     expect_raises(ArgumentError) { "".to_f }
     "".to_f?.should be_nil
@@ -503,6 +504,7 @@ describe "String" do
     "  1234.56  ".to_f?(whitespace: false).should be_nil
     expect_raises(ArgumentError) { "  1234.56foo".to_f }
     "  1234.56foo".to_f?.should be_nil
+    "\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f.should eq(1234.56_f64)
     "123.45 x".to_f64(strict: false).should eq(123.45_f64)
     expect_raises(ArgumentError) { "x1.2".to_f64 }
     "x1.2".to_f64?.should be_nil
@@ -547,6 +549,7 @@ describe "String" do
     "  1234.56  ".to_f32?(whitespace: false).should be_nil
     expect_raises(ArgumentError) { "  1234.56foo".to_f32 }
     "  1234.56foo".to_f32?.should be_nil
+    "\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f32.should eq(1234.56_f32)
     "123.45 x".to_f32(strict: false).should eq(123.45_f32)
     expect_raises(ArgumentError) { "x1.2".to_f32 }
     "x1.2".to_f32?.should be_nil
@@ -590,6 +593,7 @@ describe "String" do
     "  1234.56  ".to_f64?(whitespace: false).should be_nil
     expect_raises(ArgumentError) { "  1234.56foo".to_f64 }
     "  1234.56foo".to_f64?.should be_nil
+    "\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f64.should eq(1234.56_f64)
     "123.45 x".to_f64(strict: false).should eq(123.45_f64)
     expect_raises(ArgumentError) { "x1.2".to_f64 }
     "x1.2".to_f64?.should be_nil
diff --git a/spec/support/number.cr b/spec/support/number.cr
index 4ec22f9dcf87..404d2bd32438 100644
--- a/spec/support/number.cr
+++ b/spec/support/number.cr
@@ -94,3 +94,35 @@ macro hexfloat(str)
     ::Float64.parse_hexfloat({{ str }})
   {% end %}
 end
+
+# See also: https://github.com/crystal-lang/crystal/issues/15192
+lib LibC
+  {% if flag?(:win32) %}
+    FE_TONEAREST  = 0x00000000
+    FE_DOWNWARD   = 0x00000100
+    FE_UPWARD     = 0x00000200
+    FE_TOWARDZERO = 0x00000300
+  {% else %}
+    FE_TONEAREST  = 0x00000000
+    FE_DOWNWARD   = 0x00000400
+    FE_UPWARD     = 0x00000800
+    FE_TOWARDZERO = 0x00000C00
+  {% end %}
+
+  fun fegetround : Int
+  fun fesetround(round : Int) : Int
+end
+
+def with_hardware_rounding_mode(mode, &)
+  old_mode = LibC.fegetround
+  LibC.fesetround(mode)
+  yield ensure LibC.fesetround(old_mode)
+end
+
+def each_hardware_rounding_mode(&)
+  {% for mode in %w(FE_TONEAREST FE_DOWNWARD FE_UPWARD FE_TOWARDZERO) %}
+    with_hardware_rounding_mode(LibC::{{ mode.id }}) do
+      yield LibC::{{ mode.id }}, {{ mode }}
+    end
+  {% end %}
+end
diff --git a/src/float/fast_float.cr b/src/float/fast_float.cr
new file mode 100644
index 000000000000..010476db4bca
--- /dev/null
+++ b/src/float/fast_float.cr
@@ -0,0 +1,75 @@
+struct Float
+  # :nodoc:
+  # Source port of the floating-point part of fast_float for C++:
+  # https://github.com/fastfloat/fast_float
+  #
+  # fast_float implements the C++17 `std::from_chars`, which accepts a subset of
+  # the C `strtod` / `strtof`'s string format:
+  #
+  # - a leading plus sign is disallowed, but both fast_float and this port
+  #   accept it;
+  # - the exponent may be required or disallowed, depending on the format
+  #   argument (this port always allows both);
+  # - hexfloats are not enabled by default, and fast_float doesn't implement it;
+  #   (https://github.com/fastfloat/fast_float/issues/124)
+  # - hexfloats cannot start with `0x` or `0X`.
+  #
+  # The following is their license:
+  #
+  #   Licensed under either of Apache License, Version 2.0 or MIT license or
+  #   BOOST license.
+  #
+  #   Unless you explicitly state otherwise, any contribution intentionally
+  #   submitted for inclusion in this repository by you, as defined in the
+  #   Apache-2.0 license, shall be triple licensed as above, without any
+  #   additional terms or conditions.
+  #
+  # Main differences from the original fast_float:
+  #
+  # - Only `UC == UInt8` is implemented and tested, not the other wide chars;
+  # - No explicit SIMD (the original mainly uses this for wide char strings).
+  #
+  # The following compile-time configuration is assumed:
+  #
+  # - #define FASTFLOAT_ALLOWS_LEADING_PLUS
+  # - #define FLT_EVAL_METHOD 0
+  module FastFloat
+    # Current revision: https://github.com/fastfloat/fast_float/tree/v6.1.6
+
+    def self.to_f64?(str : String, whitespace : Bool, strict : Bool) : Float64?
+      value = uninitialized Float64
+      start = str.to_unsafe
+      finish = start + str.bytesize
+      options = ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general)
+
+      if whitespace
+        start += str.calc_excess_left
+        finish -= str.calc_excess_right
+      end
+
+      ret = BinaryFormat_Float64.new.from_chars_advanced(start, finish, pointerof(value), options)
+      if ret.ec == Errno::NONE && (!strict || ret.ptr == finish)
+        value
+      end
+    end
+
+    def self.to_f32?(str : String, whitespace : Bool, strict : Bool) : Float32?
+      value = uninitialized Float32
+      start = str.to_unsafe
+      finish = start + str.bytesize
+      options = ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general)
+
+      if whitespace
+        start += str.calc_excess_left
+        finish -= str.calc_excess_right
+      end
+
+      ret = BinaryFormat_Float32.new.from_chars_advanced(start, finish, pointerof(value), options)
+      if ret.ec == Errno::NONE && (!strict || ret.ptr == finish)
+        value
+      end
+    end
+  end
+end
+
+require "./fast_float/parse_number"
diff --git a/src/float/fast_float/ascii_number.cr b/src/float/fast_float/ascii_number.cr
new file mode 100644
index 000000000000..1c4b43ea4b7d
--- /dev/null
+++ b/src/float/fast_float/ascii_number.cr
@@ -0,0 +1,270 @@
+require "./float_common"
+
+module Float::FastFloat
+  # Next function can be micro-optimized, but compilers are entirely able to
+  # optimize it well.
+  def self.is_integer?(c : UC) : Bool forall UC
+    !(c > '9'.ord || c < '0'.ord)
+  end
+
+  # Read 8 UC into a u64. Truncates UC if not char.
+  def self.read8_to_u64(chars : UC*) : UInt64 forall UC
+    val = uninitialized UInt64
+    chars.as(UInt8*).copy_to(pointerof(val).as(UInt8*), sizeof(UInt64))
+    {% if IO::ByteFormat::SystemEndian == IO::ByteFormat::BigEndian %}
+      val.byte_swap
+    {% else %}
+      val
+    {% end %}
+  end
+
+  # credit  @aqrit
+  def self.parse_eight_digits_unrolled(val : UInt64) : UInt32
+    mask = 0x000000FF000000FF_u64
+    mul1 = 0x000F424000000064_u64 # 100 + (1000000ULL << 32)
+    mul2 = 0x0000271000000001_u64 # 1 + (10000ULL << 32)
+    val &-= 0x3030303030303030
+    val = (val &* 10) &+ val.unsafe_shr(8) # val = (val * 2561) >> 8
+    val = (((val & mask) &* mul1) &+ ((val.unsafe_shr(16) & mask) &* mul2)).unsafe_shr(32)
+    val.to_u32!
+  end
+
+  # Call this if chars are definitely 8 digits.
+  def self.parse_eight_digits_unrolled(chars : UC*) : UInt32 forall UC
+    parse_eight_digits_unrolled(read8_to_u64(chars))
+  end
+
+  # credit @aqrit
+  def self.is_made_of_eight_digits_fast?(val : UInt64) : Bool
+    ((val &+ 0x4646464646464646_u64) | (val &- 0x3030303030303030_u64)) & 0x8080808080808080_u64 == 0
+  end
+
+  # NOTE(crystal): returns {p, i}
+  def self.loop_parse_if_eight_digits(p : UInt8*, pend : UInt8*, i : UInt64) : {UInt8*, UInt64}
+    # optimizes better than parse_if_eight_digits_unrolled() for UC = char.
+    while pend - p >= 8 && is_made_of_eight_digits_fast?(read8_to_u64(p))
+      i = i &* 100000000 &+ parse_eight_digits_unrolled(read8_to_u64(p)) # in rare cases, this will overflow, but that's ok
+      p += 8
+    end
+    {p, i}
+  end
+
+  enum ParseError
+    NoError
+
+    # [JSON-only] The minus sign must be followed by an integer.
+    MissingIntegerAfterSign
+
+    # A sign must be followed by an integer or dot.
+    MissingIntegerOrDotAfterSign
+
+    # [JSON-only] The integer part must not have leading zeros.
+    LeadingZerosInIntegerPart
+
+    # [JSON-only] The integer part must have at least one digit.
+    NoDigitsInIntegerPart
+
+    # [JSON-only] If there is a decimal point, there must be digits in the
+    # fractional part.
+    NoDigitsInFractionalPart
+
+    # The mantissa must have at least one digit.
+    NoDigitsInMantissa
+
+    # Scientific notation requires an exponential part.
+    MissingExponentialPart
+  end
+
+  struct ParsedNumberStringT(UC)
+    property exponent : Int64 = 0
+    property mantissa : UInt64 = 0
+    property lastmatch : UC* = Pointer(UC).null
+    property negative : Bool = false
+    property valid : Bool = false
+    property too_many_digits : Bool = false
+    # contains the range of the significant digits
+    property integer : Slice(UC) = Slice(UC).empty  # non-nullable
+    property fraction : Slice(UC) = Slice(UC).empty # nullable
+    property error : ParseError = :no_error
+  end
+
+  alias ByteSpan = ::Bytes
+  alias ParsedNumberString = ParsedNumberStringT(UInt8)
+
+  def self.report_parse_error(p : UC*, error : ParseError) : ParsedNumberStringT(UC) forall UC
+    answer = ParsedNumberStringT(UC).new
+    answer.valid = false
+    answer.lastmatch = p
+    answer.error = error
+    answer
+  end
+
+  # Assuming that you use no more than 19 digits, this will parse an ASCII
+  # string.
+  def self.parse_number_string(p : UC*, pend : UC*, options : ParseOptionsT(UC)) : ParsedNumberStringT(UC) forall UC
+    fmt = options.format
+    decimal_point = options.decimal_point
+
+    answer = ParsedNumberStringT(UInt8).new
+    answer.valid = false
+    answer.too_many_digits = false
+    answer.negative = p.value === '-'
+
+    if p.value === '-' || (!fmt.json_fmt? && p.value === '+')
+      p += 1
+      if p == pend
+        return report_parse_error(p, :missing_integer_or_dot_after_sign)
+      end
+      if fmt.json_fmt?
+        if !is_integer?(p.value) # a sign must be followed by an integer
+          return report_parse_error(p, :missing_integer_after_sign)
+        end
+      else
+        if !is_integer?(p.value) && p.value != decimal_point # a sign must be followed by an integer or the dot
+          return report_parse_error(p, :missing_integer_or_dot_after_sign)
+        end
+      end
+    end
+    start_digits = p
+
+    i = 0_u64 # an unsigned int avoids signed overflows (which are bad)
+
+    while p != pend && is_integer?(p.value)
+      # a multiplication by 10 is cheaper than an arbitrary integer multiplication
+      i = i &* 10 &+ (p.value &- '0'.ord).to_u64! # might overflow, we will handle the overflow later
+      p += 1
+    end
+    end_of_integer_part = p
+    digit_count = (end_of_integer_part - start_digits).to_i32!
+    answer.integer = Slice.new(start_digits, digit_count)
+    if fmt.json_fmt?
+      # at least 1 digit in integer part, without leading zeros
+      if digit_count == 0
+        return report_parse_error(p, :no_digits_in_integer_part)
+      end
+      if start_digits[0] === '0' && digit_count > 1
+        return report_parse_error(p, :leading_zeros_in_integer_part)
+      end
+    end
+
+    exponent = 0_i64
+    has_decimal_point = p != pend && p.value == decimal_point
+    if has_decimal_point
+      p += 1
+      before = p
+      # can occur at most twice without overflowing, but let it occur more, since
+      # for integers with many digits, digit parsing is the primary bottleneck.
+      p, i = loop_parse_if_eight_digits(p, pend, i)
+
+      while p != pend && is_integer?(p.value)
+        digit = (p.value &- '0'.ord).to_u8!
+        p += 1
+        i = i &* 10 &+ digit # in rare cases, this will overflow, but that's ok
+      end
+      exponent = before - p
+      answer.fraction = Slice.new(before, (p - before).to_i32!)
+      digit_count &-= exponent
+    end
+    if fmt.json_fmt?
+      # at least 1 digit in fractional part
+      if has_decimal_point && exponent == 0
+        return report_parse_error(p, :no_digits_in_fractional_part)
+      end
+    elsif digit_count == 0 # we must have encountered at least one integer!
+      return report_parse_error(p, :no_digits_in_mantissa)
+    end
+    exp_number = 0_i64 # explicit exponential part
+    if (fmt.scientific? && p != pend && p.value.unsafe_chr.in?('e', 'E')) ||
+       (fmt.fortran_fmt? && p != pend && p.value.unsafe_chr.in?('+', '-', 'd', 'D'))
+      location_of_e = p
+      if p.value.unsafe_chr.in?('e', 'E', 'd', 'D')
+        p += 1
+      end
+      neg_exp = false
+      if p != pend && p.value === '-'
+        neg_exp = true
+        p += 1
+      elsif p != pend && p.value === '+' # '+' on exponent is allowed by C++17 20.19.3.(7.1)
+        p += 1
+      end
+      if p == pend || !is_integer?(p.value)
+        if !fmt.fixed?
+          # The exponential part is invalid for scientific notation, so it must
+          # be a trailing token for fixed notation. However, fixed notation is
+          # disabled, so report a scientific notation error.
+          return report_parse_error(p, :missing_exponential_part)
+        end
+        # Otherwise, we will be ignoring the 'e'.
+        p = location_of_e
+      else
+        while p != pend && is_integer?(p.value)
+          digit = (p.value &- '0'.ord).to_u8!
+          if exp_number < 0x10000000
+            exp_number = exp_number &* 10 &+ digit
+          end
+          p += 1
+        end
+        if neg_exp
+          exp_number = 0_i64 &- exp_number
+        end
+        exponent &+= exp_number
+      end
+    else
+      # If it scientific and not fixed, we have to bail out.
+      if fmt.scientific? && !fmt.fixed?
+        return report_parse_error(p, :missing_exponential_part)
+      end
+    end
+    answer.lastmatch = p
+    answer.valid = true
+
+    # If we frequently had to deal with long strings of digits,
+    # we could extend our code by using a 128-bit integer instead
+    # of a 64-bit integer. However, this is uncommon.
+    #
+    # We can deal with up to 19 digits.
+    if digit_count > 19 # this is uncommon
+      # It is possible that the integer had an overflow.
+      # We have to handle the case where we have 0.0000somenumber.
+      # We need to be mindful of the case where we only have zeroes...
+      # E.g., 0.000000000...000.
+      start = start_digits
+      while start != pend && (start.value === '0' || start.value == decimal_point)
+        if start.value === '0'
+          digit_count &-= 1
+        end
+        start += 1
+      end
+
+      if digit_count > 19
+        answer.too_many_digits = true
+        # Let us start again, this time, avoiding overflows.
+        # We don't need to check if is_integer, since we use the
+        # pre-tokenized spans from above.
+        i = 0_u64
+        p = answer.integer.to_unsafe
+        int_end = p + answer.integer.size
+        minimal_nineteen_digit_integer = 1000000000000000000_u64
+        while i < minimal_nineteen_digit_integer && p != int_end
+          i = i &* 10 &+ (p.value &- '0'.ord).to_u64!
+          p += 1
+        end
+        if i >= minimal_nineteen_digit_integer # We have a big integers
+          exponent = (end_of_integer_part - p) &+ exp_number
+        else # We have a value with a fractional component.
+          p = answer.fraction.to_unsafe
+          frac_end = p + answer.fraction.size
+          while i < minimal_nineteen_digit_integer && p != frac_end
+            i = i &* 10 &+ (p.value &- '0'.ord).to_u64!
+            p += 1
+          end
+          exponent = (answer.fraction.to_unsafe - p) &+ exp_number
+        end
+        # We have now corrected both exponent and i, to a truncated value
+      end
+    end
+    answer.exponent = exponent
+    answer.mantissa = i
+    answer
+  end
+end
diff --git a/src/float/fast_float/bigint.cr b/src/float/fast_float/bigint.cr
new file mode 100644
index 000000000000..14b0bb2d0549
--- /dev/null
+++ b/src/float/fast_float/bigint.cr
@@ -0,0 +1,577 @@
+require "./float_common"
+
+module Float::FastFloat
+  # the limb width: we want efficient multiplication of double the bits in
+  # limb, or for 64-bit limbs, at least 64-bit multiplication where we can
+  # extract the high and low parts efficiently. this is every 64-bit
+  # architecture except for sparc, which emulates 128-bit multiplication.
+  # we might have platforms where `CHAR_BIT` is not 8, so let's avoid
+  # doing `8 * sizeof(limb)`.
+  {% if flag?(:bits64) %}
+    alias Limb = UInt64
+    LIMB_BITS = 64
+  {% else %}
+    alias Limb = UInt32
+    LIMB_BITS = 32
+  {% end %}
+
+  alias LimbSpan = Slice(Limb)
+
+  # number of bits in a bigint. this needs to be at least the number
+  # of bits required to store the largest bigint, which is
+  # `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or
+  # ~3600 bits, so we round to 4000.
+  BIGINT_BITS = 4000
+  {% begin %}
+    BIGINT_LIMBS = {{ BIGINT_BITS // LIMB_BITS }}
+  {% end %}
+
+  # vector-like type that is allocated on the stack. the entire
+  # buffer is pre-allocated, and only the length changes.
+  # NOTE(crystal): Deviates a lot from the original implementation to reuse
+  # `Indexable` as much as possible. Contrast with `Crystal::SmallDeque` and
+  # `Crystal::Tracing::BufferIO`
+  struct Stackvec(Size)
+    include Indexable::Mutable(Limb)
+
+    @data = uninitialized Limb[Size]
+
+    # we never need more than 150 limbs
+    @length = 0_u16
+
+    def unsafe_fetch(index : Int) : Limb
+      @data.to_unsafe[index]
+    end
+
+    def unsafe_put(index : Int, value : Limb) : Limb
+      @data.to_unsafe[index] = value
+    end
+
+    def size : Int32
+      @length.to_i32!
+    end
+
+    def to_unsafe : Limb*
+      @data.to_unsafe
+    end
+
+    def to_slice : LimbSpan
+      LimbSpan.new(@data.to_unsafe, @length)
+    end
+
+    def initialize
+    end
+
+    # create stack vector from existing limb span.
+    def initialize(s : LimbSpan)
+      try_extend(s)
+    end
+
+    # index from the end of the container
+    def rindex(index : Int) : Limb
+      rindex = @length &- index &- 1
+      @data.to_unsafe[rindex]
+    end
+
+    # set the length, without bounds checking.
+    def size=(@length : UInt16) : UInt16
+      length
+    end
+
+    def capacity : Int32
+      Size.to_i32!
+    end
+
+    # append item to vector, without bounds checking.
+    def push_unchecked(value : Limb) : Nil
+      @data.to_unsafe[@length] = value
+      @length &+= 1
+    end
+
+    # append item to vector, returning if item was added
+    def try_push(value : Limb) : Bool
+      if size < capacity
+        push_unchecked(value)
+        true
+      else
+        false
+      end
+    end
+
+    # add items to the vector, from a span, without bounds checking
+    def extend_unchecked(s : LimbSpan) : Nil
+      ptr = @data.to_unsafe + @length
+      s.to_unsafe.copy_to(ptr, s.size)
+      @length &+= s.size
+    end
+
+    # try to add items to the vector, returning if items were added
+    def try_extend(s : LimbSpan) : Bool
+      if size &+ s.size <= capacity
+        extend_unchecked(s)
+        true
+      else
+        false
+      end
+    end
+
+    # resize the vector, without bounds checking
+    # if the new size is longer than the vector, assign value to each
+    # appended item.
+    def resize_unchecked(new_len : UInt16, value : Limb) : Nil
+      if new_len > @length
+        count = new_len &- @length
+        first = @data.to_unsafe + @length
+        count.times { |i| first[i] = value }
+        @length = new_len
+      else
+        @length = new_len
+      end
+    end
+
+    # try to resize the vector, returning if the vector was resized.
+    def try_resize(new_len : UInt16, value : Limb) : Bool
+      if new_len > capacity
+        false
+      else
+        resize_unchecked(new_len, value)
+        true
+      end
+    end
+
+    # check if any limbs are non-zero after the given index.
+    # this needs to be done in reverse order, since the index
+    # is relative to the most significant limbs.
+    def nonzero?(index : Int) : Bool
+      while index < size
+        if rindex(index) != 0
+          return true
+        end
+        index &+= 1
+      end
+      false
+    end
+
+    # normalize the big integer, so most-significant zero limbs are removed.
+    def normalize : Nil
+      while @length > 0 && rindex(0) == 0
+        @length &-= 1
+      end
+    end
+  end
+
+  # NOTE(crystal): returns also *truncated* by value (ditto below)
+  def self.empty_hi64 : {UInt64, Bool}
+    truncated = false
+    {0_u64, truncated}
+  end
+
+  def self.uint64_hi64(r0 : UInt64) : {UInt64, Bool}
+    truncated = false
+    shl = r0.leading_zeros_count
+    {r0.unsafe_shl(shl), truncated}
+  end
+
+  def self.uint64_hi64(r0 : UInt64, r1 : UInt64) : {UInt64, Bool}
+    shl = r0.leading_zeros_count
+    if shl == 0
+      truncated = r1 != 0
+      {r0, truncated}
+    else
+      shr = 64 &- shl
+      truncated = r1.unsafe_shl(shl) != 0
+      {r0.unsafe_shl(shl) | r1.unsafe_shr(shr), truncated}
+    end
+  end
+
+  def self.uint32_hi64(r0 : UInt32) : {UInt64, Bool}
+    uint64_hi64(r0.to_u64!)
+  end
+
+  def self.uint32_hi64(r0 : UInt32, r1 : UInt32) : {UInt64, Bool}
+    x0 = r0.to_u64!
+    x1 = r1.to_u64!
+    uint64_hi64(x0.unsafe_shl(32) | x1)
+  end
+
+  def self.uint32_hi64(r0 : UInt32, r1 : UInt32, r2 : UInt32) : {UInt64, Bool}
+    x0 = r0.to_u64!
+    x1 = r1.to_u64!
+    x2 = r2.to_u64!
+    uint64_hi64(x0, x1.unsafe_shl(32) | x2)
+  end
+
+  # add two small integers, checking for overflow.
+  # we want an efficient operation.
+  # NOTE(crystal): returns also *overflow* by value
+  def self.scalar_add(x : Limb, y : Limb) : {Limb, Bool}
+    z = x &+ y
+    overflow = z < x
+    {z, overflow}
+  end
+
+  # multiply two small integers, getting both the high and low bits.
+  # NOTE(crystal): passes *carry* in and out by value
+  def self.scalar_mul(x : Limb, y : Limb, carry : Limb) : {Limb, Limb}
+    {% if Limb == UInt64 %}
+      z = x.to_u128! &* y.to_u128! &+ carry
+      carry = z.unsafe_shr(LIMB_BITS).to_u64!
+      {z.to_u64!, carry}
+    {% else %}
+      z = x.to_u64! &* y.to_u64! &+ carry
+      carry = z.unsafe_shr(LIMB_BITS).to_u32!
+      {z.to_u32!, carry}
+    {% end %}
+  end
+
+  # add scalar value to bigint starting from offset.
+  # used in grade school multiplication
+  def self.small_add_from(vec : Stackvec(Size)*, y : Limb, start : Int) : Bool forall Size
+    index = start
+    carry = y
+
+    while carry != 0 && index < vec.value.size
+      x, overflow = scalar_add(vec.value.unsafe_fetch(index), carry)
+      vec.value.unsafe_put(index, x)
+      carry = Limb.new!(overflow ? 1 : 0)
+      index &+= 1
+    end
+    if carry != 0
+      fastfloat_try vec.value.try_push(carry)
+    end
+    true
+  end
+
+  # add scalar value to bigint.
+  def self.small_add(vec : Stackvec(Size)*, y : Limb) : Bool forall Size
+    small_add_from(vec, y, 0)
+  end
+
+  # multiply bigint by scalar value.
+  def self.small_mul(vec : Stackvec(Size)*, y : Limb) : Bool forall Size
+    carry = Limb.zero
+    i = 0
+    while i < vec.value.size
+      xi = vec.value.unsafe_fetch(i)
+      z, carry = scalar_mul(xi, y, carry)
+      vec.value.unsafe_put(i, z)
+      i &+= 1
+    end
+    if carry != 0
+      fastfloat_try vec.value.try_push(carry)
+    end
+    true
+  end
+
+  # add bigint to bigint starting from index.
+  # used in grade school multiplication
+  def self.large_add_from(x : Stackvec(Size)*, y : LimbSpan, start : Int) : Bool forall Size
+    # the effective x buffer is from `xstart..x.len()`, so exit early
+    # if we can't get that current range.
+    if x.value.size < start || y.size > x.value.size &- start
+      fastfloat_try x.value.try_resize((y.size &+ start).to_u16!, 0)
+    end
+
+    carry = false
+    index = 0
+    while index < y.size
+      xi = x.value.unsafe_fetch(index &+ start)
+      yi = y.unsafe_fetch(index)
+      c2 = false
+      xi, c1 = scalar_add(xi, yi)
+      if carry
+        xi, c2 = scalar_add(xi, 1)
+      end
+      x.value.unsafe_put(index &+ start, xi)
+      carry = c1 || c2
+      index &+= 1
+    end
+
+    # handle overflow
+    if carry
+      fastfloat_try small_add_from(x, 1, y.size &+ start)
+    end
+    true
+  end
+
+  # add bigint to bigint.
+  def self.large_add_from(x : Stackvec(Size)*, y : LimbSpan) : Bool forall Size
+    large_add_from(x, y, 0)
+  end
+
+  # grade-school multiplication algorithm
+  def self.long_mul(x : Stackvec(Size)*, y : LimbSpan) : Bool forall Size
+    xs = x.value.to_slice
+    z = Stackvec(Size).new(xs)
+    zs = z.to_slice
+
+    if y.size != 0
+      y0 = y.unsafe_fetch(0)
+      fastfloat_try small_mul(x, y0)
+      (1...y.size).each do |index|
+        yi = y.unsafe_fetch(index)
+        zi = Stackvec(Size).new
+        if yi != 0
+          # re-use the same buffer throughout
+          zi.size = 0
+          fastfloat_try zi.try_extend(zs)
+          fastfloat_try small_mul(pointerof(zi), yi)
+          zis = zi.to_slice
+          fastfloat_try large_add_from(x, zis, index)
+        end
+      end
+    end
+
+    x.value.normalize
+    true
+  end
+
+  # grade-school multiplication algorithm
+  def self.large_mul(x : Stackvec(Size)*, y : LimbSpan) : Bool forall Size
+    if y.size == 1
+      fastfloat_try small_mul(x, y.unsafe_fetch(0))
+    else
+      fastfloat_try long_mul(x, y)
+    end
+    true
+  end
+
+  module Pow5Tables
+    LARGE_STEP = 135_u32
+
+    SMALL_POWER_OF_5 = [
+      1_u64,
+      5_u64,
+      25_u64,
+      125_u64,
+      625_u64,
+      3125_u64,
+      15625_u64,
+      78125_u64,
+      390625_u64,
+      1953125_u64,
+      9765625_u64,
+      48828125_u64,
+      244140625_u64,
+      1220703125_u64,
+      6103515625_u64,
+      30517578125_u64,
+      152587890625_u64,
+      762939453125_u64,
+      3814697265625_u64,
+      19073486328125_u64,
+      95367431640625_u64,
+      476837158203125_u64,
+      2384185791015625_u64,
+      11920928955078125_u64,
+      59604644775390625_u64,
+      298023223876953125_u64,
+      1490116119384765625_u64,
+      7450580596923828125_u64,
+    ]
+
+    {% if Limb == UInt64 %}
+      LARGE_POWER_OF_5 = Slice[
+        1414648277510068013_u64, 9180637584431281687_u64, 4539964771860779200_u64,
+        10482974169319127550_u64, 198276706040285095_u64,
+      ]
+    {% else %}
+      LARGE_POWER_OF_5 = Slice[
+        4279965485_u32, 329373468_u32, 4020270615_u32, 2137533757_u32, 4287402176_u32,
+        1057042919_u32, 1071430142_u32, 2440757623_u32, 381945767_u32, 46164893_u32,
+      ]
+    {% end %}
+  end
+
+  # big integer type. implements a small subset of big integer
+  # arithmetic, using simple algorithms since asymptotically
+  # faster algorithms are slower for a small number of limbs.
+  # all operations assume the big-integer is normalized.
+  # NOTE(crystal): contrast with ::BigInt
+  struct Bigint
+    # storage of the limbs, in little-endian order.
+    @vec = Stackvec(BIGINT_LIMBS).new
+
+    def initialize
+    end
+
+    def initialize(value : UInt64)
+      {% if Limb == UInt64 %}
+        @vec.push_unchecked(value)
+      {% else %}
+        @vec.push_unchecked(value.to_u32!)
+        @vec.push_unchecked(value.unsafe_shr(32).to_u32!)
+      {% end %}
+      @vec.normalize
+    end
+
+    # get the high 64 bits from the vector, and if bits were truncated.
+    # this is to get the significant digits for the float.
+    # NOTE(crystal): returns also *truncated* by value
+    def hi64 : {UInt64, Bool}
+      {% if Limb == UInt64 %}
+        if @vec.empty?
+          FastFloat.empty_hi64
+        elsif @vec.size == 1
+          FastFloat.uint64_hi64(@vec.rindex(0))
+        else
+          result, truncated = FastFloat.uint64_hi64(@vec.rindex(0), @vec.rindex(1))
+          truncated ||= @vec.nonzero?(2)
+          {result, truncated}
+        end
+      {% else %}
+        if @vec.empty?
+          FastFloat.empty_hi64
+        elsif @vec.size == 1
+          FastFloat.uint32_hi64(@vec.rindex(0))
+        elsif @vec.size == 2
+          FastFloat.uint32_hi64(@vec.rindex(0), @vec.rindex(1))
+        else
+          result, truncated = FastFloat.uint32_hi64(@vec.rindex(0), @vec.rindex(1), @vec.rindex(2))
+          truncated ||= @vec.nonzero?(3)
+          {result, truncated}
+        end
+      {% end %}
+    end
+
+    # compare two big integers, returning the large value.
+    # assumes both are normalized. if the return value is
+    # negative, other is larger, if the return value is
+    # positive, this is larger, otherwise they are equal.
+    # the limbs are stored in little-endian order, so we
+    # must compare the limbs in ever order.
+    def compare(other : Bigint*) : Int32
+      if @vec.size > other.value.@vec.size
+        1
+      elsif @vec.size < other.value.@vec.size
+        -1
+      else
+        index = @vec.size
+        while index > 0
+          xi = @vec.unsafe_fetch(index &- 1)
+          yi = other.value.@vec.unsafe_fetch(index &- 1)
+          if xi > yi
+            return 1
+          elsif xi < yi
+            return -1
+          end
+          index &-= 1
+        end
+        0
+      end
+    end
+
+    # shift left each limb n bits, carrying over to the new limb
+    # returns true if we were able to shift all the digits.
+    def shl_bits(n : Int) : Bool
+      # Internally, for each item, we shift left by n, and add the previous
+      # right shifted limb-bits.
+      # For example, we transform (for u8) shifted left 2, to:
+      #      b10100100 b01000010
+      #      b10 b10010001 b00001000
+      shl = n
+      shr = LIMB_BITS &- n
+      prev = Limb.zero
+      index = 0
+      while index < @vec.size
+        xi = @vec.unsafe_fetch(index)
+        @vec.unsafe_put(index, xi.unsafe_shl(shl) | prev.unsafe_shr(shr))
+        prev = xi
+        index &+= 1
+      end
+
+      carry = prev.unsafe_shr(shr)
+      if carry != 0
+        return @vec.try_push(carry)
+      end
+      true
+    end
+
+    # move the limbs left by `n` limbs.
+    def shl_limbs(n : Int) : Bool
+      if n &+ @vec.size > @vec.capacity
+        false
+      elsif !@vec.empty?
+        # move limbs
+        dst = @vec.to_unsafe + n
+        src = @vec.to_unsafe
+        src.move_to(dst, @vec.size)
+        # fill in empty limbs
+        first = @vec.to_unsafe
+        n.times { |i| first[i] = 0 }
+        @vec.size = (@vec.size &+ n).to_u16!
+        true
+      else
+        true
+      end
+    end
+
+    # move the limbs left by `n` bits.
+    def shl(n : Int) : Bool
+      rem = n.unsafe_mod(LIMB_BITS)
+      div = n.unsafe_div(LIMB_BITS)
+      if rem != 0
+        FastFloat.fastfloat_try shl_bits(rem)
+      end
+      if div != 0
+        FastFloat.fastfloat_try shl_limbs(div)
+      end
+      true
+    end
+
+    # get the number of leading zeros in the bigint.
+    def ctlz : Int32
+      if @vec.empty?
+        0
+      else
+        @vec.rindex(0).leading_zeros_count.to_i32!
+      end
+    end
+
+    # get the number of bits in the bigint.
+    def bit_length : Int32
+      lz = ctlz
+      (LIMB_BITS &* @vec.size &- lz).to_i32!
+    end
+
+    def mul(y : Limb) : Bool
+      FastFloat.small_mul(pointerof(@vec), y)
+    end
+
+    def add(y : Limb) : Bool
+      FastFloat.small_add(pointerof(@vec), y)
+    end
+
+    # multiply as if by 2 raised to a power.
+    def pow2(exp : UInt32) : Bool
+      shl(exp)
+    end
+
+    # multiply as if by 5 raised to a power.
+    def pow5(exp : UInt32) : Bool
+      # multiply by a power of 5
+      large = Pow5Tables::LARGE_POWER_OF_5
+      while exp >= Pow5Tables::LARGE_STEP
+        FastFloat.fastfloat_try FastFloat.large_mul(pointerof(@vec), large)
+        exp &-= Pow5Tables::LARGE_STEP
+      end
+      small_step = {{ Limb == UInt64 ? 27_u32 : 13_u32 }}
+      max_native = {{ Limb == UInt64 ? 7450580596923828125_u64 : 1220703125_u32 }}
+      while exp >= small_step
+        FastFloat.fastfloat_try FastFloat.small_mul(pointerof(@vec), max_native)
+        exp &-= small_step
+      end
+      if exp != 0
+        FastFloat.fastfloat_try FastFloat.small_mul(pointerof(@vec), Limb.new!(Pow5Tables::SMALL_POWER_OF_5.unsafe_fetch(exp)))
+      end
+
+      true
+    end
+
+    # multiply as if by 10 raised to a power.
+    def pow10(exp : UInt32) : Bool
+      FastFloat.fastfloat_try pow5(exp)
+      pow2(exp)
+    end
+  end
+end
diff --git a/src/float/fast_float/decimal_to_binary.cr b/src/float/fast_float/decimal_to_binary.cr
new file mode 100644
index 000000000000..eea77c44c6be
--- /dev/null
+++ b/src/float/fast_float/decimal_to_binary.cr
@@ -0,0 +1,177 @@
+require "./float_common"
+require "./fast_table"
+
+module Float::FastFloat
+  # This will compute or rather approximate w * 5**q and return a pair of 64-bit
+  # words approximating the result, with the "high" part corresponding to the
+  # most significant bits and the low part corresponding to the least significant
+  # bits.
+  def self.compute_product_approximation(q : Int64, w : UInt64, bit_precision : Int) : Value128
+    power_of_five_128 = Powers::POWER_OF_FIVE_128.to_unsafe
+
+    index = 2 &* (q &- Powers::SMALLEST_POWER_OF_FIVE)
+    # For small values of q, e.g., q in [0,27], the answer is always exact
+    # because The line value128 firstproduct = full_multiplication(w,
+    # power_of_five_128[index]); gives the exact answer.
+    firstproduct = w.to_u128! &* power_of_five_128[index]
+
+    precision_mask = bit_precision < 64 ? 0xFFFFFFFFFFFFFFFF_u64.unsafe_shr(bit_precision) : 0xFFFFFFFFFFFFFFFF_u64
+    if firstproduct.unsafe_shr(64).bits_set?(precision_mask) # could further guard with  (lower + w < lower)
+      # regarding the second product, we only need secondproduct.high, but our
+      # expectation is that the compiler will optimize this extra work away if
+      # needed.
+      secondproduct = w.to_u128! &* power_of_five_128[index &+ 1]
+      firstproduct &+= secondproduct.unsafe_shr(64)
+    end
+    Value128.new(firstproduct)
+  end
+
+  module Detail
+    # For q in (0,350), we have that
+    #  f = (((152170 + 65536) * q ) >> 16);
+    # is equal to
+    #   floor(p) + q
+    # where
+    #   p = log(5**q)/log(2) = q * log(5)/log(2)
+    #
+    # For negative values of q in (-400,0), we have that
+    #  f = (((152170 + 65536) * q ) >> 16);
+    # is equal to
+    #   -ceil(p) + q
+    # where
+    #   p = log(5**-q)/log(2) = -q * log(5)/log(2)
+    def self.power(q : Int32) : Int32
+      ((152170 &+ 65536) &* q).unsafe_shr(16) &+ 63
+    end
+  end
+
+  module BinaryFormat(T, EquivUint)
+    # create an adjusted mantissa, biased by the invalid power2
+    # for significant digits already multiplied by 10 ** q.
+    def compute_error_scaled(q : Int64, w : UInt64, lz : Int) : AdjustedMantissa
+      hilz = w.unsafe_shr(63).to_i32! ^ 1
+      bias = mantissa_explicit_bits &- minimum_exponent
+
+      AdjustedMantissa.new(
+        mantissa: w.unsafe_shl(hilz),
+        power2: Detail.power(q.to_i32!) &+ bias &- hilz &- lz &- 62 &+ INVALID_AM_BIAS,
+      )
+    end
+
+    # w * 10 ** q, without rounding the representation up.
+    # the power2 in the exponent will be adjusted by invalid_am_bias.
+    def compute_error(q : Int64, w : UInt64) : AdjustedMantissa
+      lz = w.leading_zeros_count.to_i32!
+      w = w.unsafe_shl(lz)
+      product = FastFloat.compute_product_approximation(q, w, mantissa_explicit_bits &+ 3)
+      compute_error_scaled(q, product.high, lz)
+    end
+
+    # w * 10 ** q
+    # The returned value should be a valid ieee64 number that simply need to be
+    # packed. However, in some very rare cases, the computation will fail. In such
+    # cases, we return an adjusted_mantissa with a negative power of 2: the caller
+    # should recompute in such cases.
+    def compute_float(q : Int64, w : UInt64) : AdjustedMantissa
+      if w == 0 || q < smallest_power_of_ten
+        # result should be zero
+        return AdjustedMantissa.new(
+          power2: 0,
+          mantissa: 0,
+        )
+      end
+      if q > largest_power_of_ten
+        # we want to get infinity:
+        return AdjustedMantissa.new(
+          power2: infinite_power,
+          mantissa: 0,
+        )
+      end
+      # At this point in time q is in [powers::smallest_power_of_five,
+      # powers::largest_power_of_five].
+
+      # We want the most significant bit of i to be 1. Shift if needed.
+      lz = w.leading_zeros_count
+      w = w.unsafe_shl(lz)
+
+      # The required precision is binary::mantissa_explicit_bits() + 3 because
+      # 1. We need the implicit bit
+      # 2. We need an extra bit for rounding purposes
+      # 3. We might lose a bit due to the "upperbit" routine (result too small,
+      # requiring a shift)
+
+      product = FastFloat.compute_product_approximation(q, w, mantissa_explicit_bits &+ 3)
+      # The computed 'product' is always sufficient.
+      # Mathematical proof:
+      # Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to
+      # appear) See script/mushtak_lemire.py
+
+      # The "compute_product_approximation" function can be slightly slower than a
+      # branchless approach: value128 product = compute_product(q, w); but in
+      # practice, we can win big with the compute_product_approximation if its
+      # additional branch is easily predicted. Which is best is data specific.
+      upperbit = product.high.unsafe_shr(63).to_i32!
+      shift = upperbit &+ 64 &- mantissa_explicit_bits &- 3
+
+      mantissa = product.high.unsafe_shr(shift)
+
+      power2 = (Detail.power(q.to_i32!) &+ upperbit &- lz &- minimum_exponent).to_i32!
+      if power2 <= 0 # we have a subnormal?
+        # Here have that answer.power2 <= 0 so -answer.power2 >= 0
+        if 1 &- power2 >= 64 # if we have more than 64 bits below the minimum exponent, you have a zero for sure.
+          # result should be zero
+          return AdjustedMantissa.new(
+            power2: 0,
+            mantissa: 0,
+          )
+        end
+        # next line is safe because -answer.power2 + 1 < 64
+        mantissa = mantissa.unsafe_shr(1 &- power2)
+        # Thankfully, we can't have both "round-to-even" and subnormals because
+        # "round-to-even" only occurs for powers close to 0.
+        mantissa &+= mantissa & 1
+        mantissa = mantissa.unsafe_shr(1)
+        # There is a weird scenario where we don't have a subnormal but just.
+        # Suppose we start with 2.2250738585072013e-308, we end up
+        # with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+        # whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+        # up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+        # subnormal, but we can only know this after rounding.
+        # So we only declare a subnormal if we are smaller than the threshold.
+        power2 = mantissa < 1_u64.unsafe_shl(mantissa_explicit_bits) ? 0 : 1
+        return AdjustedMantissa.new(power2: power2, mantissa: mantissa)
+      end
+
+      # usually, we round *up*, but if we fall right in between and and we have an
+      # even basis, we need to round down
+      # We are only concerned with the cases where 5**q fits in single 64-bit word.
+      if product.low <= 1 && q >= min_exponent_round_to_even && q <= max_exponent_round_to_even && mantissa & 3 == 1
+        # we may fall between two floats!
+        # To be in-between two floats we need that in doing
+        #   answer.mantissa = product.high >> (upperbit + 64 -
+        #   binary::mantissa_explicit_bits() - 3);
+        # ... we dropped out only zeroes. But if this happened, then we can go
+        # back!!!
+        if mantissa.unsafe_shl(shift) == product.high
+          mantissa &= ~1_u64 # flip it so that we do not round up
+        end
+      end
+
+      mantissa &+= mantissa & 1 # round up
+      mantissa = mantissa.unsafe_shr(1)
+      if mantissa >= 2_u64.unsafe_shl(mantissa_explicit_bits)
+        mantissa = 1_u64.unsafe_shl(mantissa_explicit_bits)
+        power2 &+= 1 # undo previous addition
+      end
+
+      mantissa &= ~(1_u64.unsafe_shl(mantissa_explicit_bits))
+      if power2 >= infinite_power # infinity
+        return AdjustedMantissa.new(
+          power2: infinite_power,
+          mantissa: 0,
+        )
+      end
+      AdjustedMantissa.new(power2: power2, mantissa: mantissa)
+    end
+  end
+end
diff --git a/src/float/fast_float/digit_comparison.cr b/src/float/fast_float/digit_comparison.cr
new file mode 100644
index 000000000000..2da4c455bac4
--- /dev/null
+++ b/src/float/fast_float/digit_comparison.cr
@@ -0,0 +1,399 @@
+require "./float_common"
+require "./bigint"
+require "./ascii_number"
+
+module Float::FastFloat
+  # 1e0 to 1e19
+  POWERS_OF_TEN_UINT64 = [
+    1_u64,
+    10_u64,
+    100_u64,
+    1000_u64,
+    10000_u64,
+    100000_u64,
+    1000000_u64,
+    10000000_u64,
+    100000000_u64,
+    1000000000_u64,
+    10000000000_u64,
+    100000000000_u64,
+    1000000000000_u64,
+    10000000000000_u64,
+    100000000000000_u64,
+    1000000000000000_u64,
+    10000000000000000_u64,
+    100000000000000000_u64,
+    1000000000000000000_u64,
+    10000000000000000000_u64,
+  ]
+
+  # calculate the exponent, in scientific notation, of the number.
+  # this algorithm is not even close to optimized, but it has no practical
+  # effect on performance: in order to have a faster algorithm, we'd need
+  # to slow down performance for faster algorithms, and this is still fast.
+  def self.scientific_exponent(num : ParsedNumberStringT(UC)) : Int32 forall UC
+    mantissa = num.mantissa
+    exponent = num.exponent.to_i32!
+    while mantissa >= 10000
+      mantissa = mantissa.unsafe_div(10000)
+      exponent &+= 4
+    end
+    while mantissa >= 100
+      mantissa = mantissa.unsafe_div(100)
+      exponent &+= 2
+    end
+    while mantissa >= 10
+      mantissa = mantissa.unsafe_div(10)
+      exponent &+= 1
+    end
+    exponent
+  end
+
+  module BinaryFormat(T, EquivUint)
+    # this converts a native floating-point number to an extended-precision float.
+    def to_extended(value : T) : AdjustedMantissa
+      exponent_mask = self.exponent_mask
+      mantissa_mask = self.mantissa_mask
+      hidden_bit_mask = self.hidden_bit_mask
+
+      bias = mantissa_explicit_bits &- minimum_exponent
+      bits = value.unsafe_as(EquivUint)
+      if bits & exponent_mask == 0
+        # denormal
+        power2 = 1 &- bias
+        mantissa = bits & mantissa_mask
+      else
+        # normal
+        power2 = (bits & exponent_mask).unsafe_shr(mantissa_explicit_bits).to_i32!
+        power2 &-= bias
+        mantissa = (bits & mantissa_mask) | hidden_bit_mask
+      end
+
+      AdjustedMantissa.new(power2: power2, mantissa: mantissa.to_u64!)
+    end
+
+    # get the extended precision value of the halfway point between b and b+u.
+    # we are given a native float that represents b, so we need to adjust it
+    # halfway between b and b+u.
+    def to_extended_halfway(value : T) : AdjustedMantissa
+      am = to_extended(value)
+      am.mantissa = am.mantissa.unsafe_shl(1)
+      am.mantissa &+= 1
+      am.power2 &-= 1
+      am
+    end
+
+    # round an extended-precision float to the nearest machine float.
+    # NOTE(crystal): passes *am* in and out by value
+    def round(am : AdjustedMantissa, & : AdjustedMantissa, Int32 -> AdjustedMantissa) : AdjustedMantissa
+      mantissa_shift = 64 &- mantissa_explicit_bits &- 1
+      if 0 &- am.power2 >= mantissa_shift
+        # have a denormal float
+        shift = 1 &- am.power2
+        am = yield am, {shift, 64}.min
+        # check for round-up: if rounding-nearest carried us to the hidden bit.
+        am.power2 = am.mantissa < 1_u64.unsafe_shl(mantissa_explicit_bits) ? 0 : 1
+        return am
+      end
+
+      # have a normal float, use the default shift.
+      am = yield am, mantissa_shift
+
+      # check for carry
+      if am.mantissa >= 2_u64.unsafe_shl(mantissa_explicit_bits)
+        am.mantissa = 1_u64.unsafe_shl(mantissa_explicit_bits)
+        am.power2 &+= 1
+      end
+
+      # check for infinite: we could have carried to an infinite power
+      am.mantissa &= ~(1_u64.unsafe_shl(mantissa_explicit_bits))
+      if am.power2 >= infinite_power
+        am.power2 = infinite_power
+        am.mantissa = 0
+      end
+
+      am
+    end
+
+    # NOTE(crystal): passes *am* in and out by value
+    def round_nearest_tie_even(am : AdjustedMantissa, shift : Int32, & : Bool, Bool, Bool -> Bool) : AdjustedMantissa
+      mask = shift == 64 ? UInt64::MAX : 1_u64.unsafe_shl(shift) &- 1
+      halfway = shift == 0 ? 0_u64 : 1_u64.unsafe_shl(shift &- 1)
+      truncated_bits = am.mantissa & mask
+      is_above = truncated_bits > halfway
+      is_halfway = truncated_bits == halfway
+
+      # shift digits into position
+      if shift == 64
+        am.mantissa = 0
+      else
+        am.mantissa = am.mantissa.unsafe_shr(shift)
+      end
+      am.power2 &+= shift
+
+      is_odd = am.mantissa.bits_set?(1)
+      am.mantissa &+= (yield is_odd, is_halfway, is_above) ? 1 : 0
+      am
+    end
+
+    # NOTE(crystal): passes *am* in and out by value
+    def round_down(am : AdjustedMantissa, shift : Int32) : AdjustedMantissa
+      if shift == 64
+        am.mantissa = 0
+      else
+        am.mantissa = am.mantissa.unsafe_shr(shift)
+      end
+      am.power2 &+= shift
+      am
+    end
+
+    # NOTE(crystal): returns the new *first* by value
+    def skip_zeros(first : UC*, last : UC*) : UC* forall UC
+      int_cmp_len = FastFloat.int_cmp_len(UC)
+      int_cmp_zeros = FastFloat.int_cmp_zeros(UC)
+
+      val = uninitialized UInt64
+      while last - first >= int_cmp_len
+        first.copy_to(pointerof(val).as(UC*), int_cmp_len)
+        if val != int_cmp_zeros
+          break
+        end
+        first += int_cmp_len
+      end
+      while first != last
+        unless first.value === '0'
+          break
+        end
+        first += 1
+      end
+      first
+    end
+
+    # determine if any non-zero digits were truncated.
+    # all characters must be valid digits.
+    def is_truncated?(first : UC*, last : UC*) : Bool forall UC
+      int_cmp_len = FastFloat.int_cmp_len(UC)
+      int_cmp_zeros = FastFloat.int_cmp_zeros(UC)
+
+      # do 8-bit optimizations, can just compare to 8 literal 0s.
+
+      val = uninitialized UInt64
+      while last - first >= int_cmp_len
+        first.copy_to(pointerof(val).as(UC*), int_cmp_len)
+        if val != int_cmp_zeros
+          return true
+        end
+        first += int_cmp_len
+      end
+      while first != last
+        unless first.value === '0'
+          return true
+        end
+        first += 1
+      end
+      false
+    end
+
+    def is_truncated?(s : Slice(UC)) : Bool forall UC
+      is_truncated?(s.to_unsafe, s.to_unsafe + s.size)
+    end
+
+    macro parse_eight_digits(p, value, counter, count)
+      {{ value }} = {{ value }} &* 100000000 &+ FastFloat.parse_eight_digits_unrolled({{ p }})
+      {{ p }} += 8
+      {{ counter }} &+= 8
+      {{ count }} &+= 8
+    end
+
+    macro parse_one_digit(p, value, counter, count)
+      {{ value }} = {{ value }} &* 10 &+ {{ p }}.value &- '0'.ord
+      {{ p }} += 1
+      {{ counter }} &+= 1
+      {{ count }} &+= 1
+    end
+
+    macro add_native(big, power, value)
+      {{ big }}.value.mul({{ power }})
+      {{ big }}.value.add({{ value }})
+    end
+
+    macro round_up_bigint(big, count)
+      # need to round-up the digits, but need to avoid rounding
+      # ....9999 to ...10000, which could cause a false halfway point.
+      add_native({{ big }}, 10, 1)
+      {{ count }} &+= 1
+    end
+
+    # parse the significant digits into a big integer
+    # NOTE(crystal): returns the new *digits* by value
+    def parse_mantissa(result : Bigint*, num : ParsedNumberStringT(UC), max_digits : Int) : Int forall UC
+      # try to minimize the number of big integer and scalar multiplication.
+      # therefore, try to parse 8 digits at a time, and multiply by the largest
+      # scalar value (9 or 19 digits) for each step.
+      counter = 0
+      digits = 0
+      value = Limb.zero
+      step = {{ Limb == UInt64 ? 19 : 9 }}
+
+      # process all integer digits.
+      p = num.integer.to_unsafe
+      pend = p + num.integer.size
+      p = skip_zeros(p, pend)
+      # process all digits, in increments of step per loop
+      while p != pend
+        while pend - p >= 8 && step &- counter >= 8 && max_digits &- digits >= 8
+          parse_eight_digits(p, value, counter, digits)
+        end
+        while counter < step && p != pend && digits < max_digits
+          parse_one_digit(p, value, counter, digits)
+        end
+        if digits == max_digits
+          # add the temporary value, then check if we've truncated any digits
+          add_native(result, Limb.new!(POWERS_OF_TEN_UINT64.unsafe_fetch(counter)), value)
+          truncated = is_truncated?(p, pend)
+          unless num.fraction.empty?
+            truncated ||= is_truncated?(num.fraction)
+          end
+          if truncated
+            round_up_bigint(result, digits)
+          end
+          return digits
+        else
+          add_native(result, Limb.new!(POWERS_OF_TEN_UINT64.unsafe_fetch(counter)), value)
+          counter = 0
+          value = Limb.zero
+        end
+      end
+
+      # add our fraction digits, if they're available.
+      unless num.fraction.empty?
+        p = num.fraction.to_unsafe
+        pend = p + num.fraction.size
+        if digits == 0
+          p = skip_zeros(p, pend)
+        end
+        # process all digits, in increments of step per loop
+        while p != pend
+          while pend - p >= 8 && step &- counter >= 8 && max_digits &- digits >= 8
+            parse_eight_digits(p, value, counter, digits)
+          end
+          while counter < step && p != pend && digits < max_digits
+            parse_one_digit(p, value, counter, digits)
+          end
+          if digits == max_digits
+            # add the temporary value, then check if we've truncated any digits
+            add_native(result, Limb.new!(POWERS_OF_TEN_UINT64.unsafe_fetch(counter)), value)
+            truncated = is_truncated?(p, pend)
+            if truncated
+              round_up_bigint(result, digits)
+            end
+            return digits
+          else
+            add_native(result, Limb.new!(POWERS_OF_TEN_UINT64.unsafe_fetch(counter)), value)
+            counter = 0
+            value = Limb.zero
+          end
+        end
+      end
+
+      if counter != 0
+        add_native(result, Limb.new!(POWERS_OF_TEN_UINT64.unsafe_fetch(counter)), value)
+      end
+
+      digits
+    end
+
+    def positive_digit_comp(bigmant : Bigint*, exponent : Int32) : AdjustedMantissa
+      bigmant.value.pow10(exponent.to_u32!)
+      mantissa, truncated = bigmant.value.hi64
+      bias = mantissa_explicit_bits &- minimum_exponent
+      power2 = bigmant.value.bit_length &- 64 &+ bias
+      answer = AdjustedMantissa.new(power2: power2, mantissa: mantissa)
+
+      answer = round(answer) do |a, shift|
+        round_nearest_tie_even(a, shift) do |is_odd, is_halfway, is_above|
+          is_above || (is_halfway && truncated) || (is_odd && is_halfway)
+        end
+      end
+
+      answer
+    end
+
+    # the scaling here is quite simple: we have, for the real digits `m * 10^e`,
+    # and for the theoretical digits `n * 2^f`. Since `e` is always negative,
+    # to scale them identically, we do `n * 2^f * 5^-f`, so we now have `m * 2^e`.
+    # we then need to scale by `2^(f- e)`, and then the two significant digits
+    # are of the same magnitude.
+    def negative_digit_comp(bigmant : Bigint*, am : AdjustedMantissa, exponent : Int32) : AdjustedMantissa
+      real_digits = bigmant
+      real_exp = exponent
+
+      # get the value of `b`, rounded down, and get a bigint representation of b+h
+      am_b = round(am) do |a, shift|
+        round_down(a, shift)
+      end
+      b = to_float(false, am_b)
+      theor = to_extended_halfway(b)
+      theor_digits = Bigint.new(theor.mantissa)
+      theor_exp = theor.power2
+
+      # scale real digits and theor digits to be same power.
+      pow2_exp = theor_exp &- real_exp
+      pow5_exp = 0_u32 &- real_exp
+      if pow5_exp != 0
+        theor_digits.pow5(pow5_exp)
+      end
+      if pow2_exp > 0
+        theor_digits.pow2(pow2_exp.to_u32!)
+      elsif pow2_exp < 0
+        real_digits.value.pow2(0_u32 &- pow2_exp)
+      end
+
+      # compare digits, and use it to director rounding
+      ord = real_digits.value.compare(pointerof(theor_digits))
+      answer = round(am) do |a, shift|
+        round_nearest_tie_even(a, shift) do |is_odd, _, _|
+          if ord > 0
+            true
+          elsif ord < 0
+            false
+          else
+            is_odd
+          end
+        end
+      end
+
+      answer
+    end
+
+    # parse the significant digits as a big integer to unambiguously round the
+    # the significant digits. here, we are trying to determine how to round
+    # an extended float representation close to `b+h`, halfway between `b`
+    # (the float rounded-down) and `b+u`, the next positive float. this
+    # algorithm is always correct, and uses one of two approaches. when
+    # the exponent is positive relative to the significant digits (such as
+    # 1234), we create a big-integer representation, get the high 64-bits,
+    # determine if any lower bits are truncated, and use that to direct
+    # rounding. in case of a negative exponent relative to the significant
+    # digits (such as 1.2345), we create a theoretical representation of
+    # `b` as a big-integer type, scaled to the same binary exponent as
+    # the actual digits. we then compare the big integer representations
+    # of both, and use that to direct rounding.
+    def digit_comp(num : ParsedNumberStringT(UC), am : AdjustedMantissa) : AdjustedMantissa forall UC
+      # remove the invalid exponent bias
+      am.power2 &-= INVALID_AM_BIAS
+
+      sci_exp = FastFloat.scientific_exponent(num)
+      max_digits = self.max_digits
+      bigmant = Bigint.new
+      digits = parse_mantissa(pointerof(bigmant), num, max_digits)
+      # can't underflow, since digits is at most max_digits.
+      exponent = sci_exp &+ 1 &- digits
+      if exponent >= 0
+        positive_digit_comp(pointerof(bigmant), exponent)
+      else
+        negative_digit_comp(pointerof(bigmant), am, exponent)
+      end
+    end
+  end
+end
diff --git a/src/float/fast_float/fast_table.cr b/src/float/fast_float/fast_table.cr
new file mode 100644
index 000000000000..a2c2b2e9d1c9
--- /dev/null
+++ b/src/float/fast_float/fast_table.cr
@@ -0,0 +1,695 @@
+module Float::FastFloat
+  # When mapping numbers from decimal to binary,
+  # we go from w * 10^q to m * 2^p but we have
+  # 10^q = 5^q * 2^q, so effectively
+  # we are trying to match
+  # w * 2^q * 5^q to m * 2^p. Thus the powers of two
+  # are not a concern since they can be represented
+  # exactly using the binary notation, only the powers of five
+  # affect the binary significand.
+
+  # The smallest non-zero float (binary64) is 2^-1074.
+  # We take as input numbers of the form w x 10^q where w < 2^64.
+  # We have that w * 10^-343  <  2^(64-344) 5^-343 < 2^-1076.
+  # However, we have that
+  # (2^64-1) * 10^-342 =  (2^64-1) * 2^-342 * 5^-342 > 2^-1074.
+  # Thus it is possible for a number of the form w * 10^-342 where
+  # w is a 64-bit value to be a non-zero floating-point number.
+  #
+  # Any number of form w * 10^309 where w>= 1 is going to be
+  # infinite in binary64 so we never need to worry about powers
+  # of 5 greater than 308.
+  module Powers
+    SMALLEST_POWER_OF_FIVE = -342
+    LARGEST_POWER_OF_FIVE  =  308
+    NUMBER_OF_ENTRIES      = {{ 2 * (LARGEST_POWER_OF_FIVE - SMALLEST_POWER_OF_FIVE + 1) }}
+
+    # TODO: this is needed to avoid generating lots of allocas
+    # in LLVM, which makes LLVM really slow. The compiler should
+    # try to avoid/reuse temporary allocas.
+    # Explanation: https://github.com/crystal-lang/crystal/issues/4516#issuecomment-306226171
+    private def self.put(array, value) : Nil
+      array << value
+    end
+
+    # Powers of five from 5^-342 all the way to 5^308 rounded toward one.
+    # NOTE(crystal): this is very similar to
+    # `Float::Printer::Dragonbox::ImplInfo_Float64::CACHE`, except the endpoints
+    # are different and the rounding is in a different direction
+    POWER_OF_FIVE_128 = begin
+      array = Array(UInt64).new(NUMBER_OF_ENTRIES)
+      put(array, 0xeef453d6923bd65a_u64); put(array, 0x113faa2906a13b3f_u64)
+      put(array, 0x9558b4661b6565f8_u64); put(array, 0x4ac7ca59a424c507_u64)
+      put(array, 0xbaaee17fa23ebf76_u64); put(array, 0x5d79bcf00d2df649_u64)
+      put(array, 0xe95a99df8ace6f53_u64); put(array, 0xf4d82c2c107973dc_u64)
+      put(array, 0x91d8a02bb6c10594_u64); put(array, 0x79071b9b8a4be869_u64)
+      put(array, 0xb64ec836a47146f9_u64); put(array, 0x9748e2826cdee284_u64)
+      put(array, 0xe3e27a444d8d98b7_u64); put(array, 0xfd1b1b2308169b25_u64)
+      put(array, 0x8e6d8c6ab0787f72_u64); put(array, 0xfe30f0f5e50e20f7_u64)
+      put(array, 0xb208ef855c969f4f_u64); put(array, 0xbdbd2d335e51a935_u64)
+      put(array, 0xde8b2b66b3bc4723_u64); put(array, 0xad2c788035e61382_u64)
+      put(array, 0x8b16fb203055ac76_u64); put(array, 0x4c3bcb5021afcc31_u64)
+      put(array, 0xaddcb9e83c6b1793_u64); put(array, 0xdf4abe242a1bbf3d_u64)
+      put(array, 0xd953e8624b85dd78_u64); put(array, 0xd71d6dad34a2af0d_u64)
+      put(array, 0x87d4713d6f33aa6b_u64); put(array, 0x8672648c40e5ad68_u64)
+      put(array, 0xa9c98d8ccb009506_u64); put(array, 0x680efdaf511f18c2_u64)
+      put(array, 0xd43bf0effdc0ba48_u64); put(array, 0x212bd1b2566def2_u64)
+      put(array, 0x84a57695fe98746d_u64); put(array, 0x14bb630f7604b57_u64)
+      put(array, 0xa5ced43b7e3e9188_u64); put(array, 0x419ea3bd35385e2d_u64)
+      put(array, 0xcf42894a5dce35ea_u64); put(array, 0x52064cac828675b9_u64)
+      put(array, 0x818995ce7aa0e1b2_u64); put(array, 0x7343efebd1940993_u64)
+      put(array, 0xa1ebfb4219491a1f_u64); put(array, 0x1014ebe6c5f90bf8_u64)
+      put(array, 0xca66fa129f9b60a6_u64); put(array, 0xd41a26e077774ef6_u64)
+      put(array, 0xfd00b897478238d0_u64); put(array, 0x8920b098955522b4_u64)
+      put(array, 0x9e20735e8cb16382_u64); put(array, 0x55b46e5f5d5535b0_u64)
+      put(array, 0xc5a890362fddbc62_u64); put(array, 0xeb2189f734aa831d_u64)
+      put(array, 0xf712b443bbd52b7b_u64); put(array, 0xa5e9ec7501d523e4_u64)
+      put(array, 0x9a6bb0aa55653b2d_u64); put(array, 0x47b233c92125366e_u64)
+      put(array, 0xc1069cd4eabe89f8_u64); put(array, 0x999ec0bb696e840a_u64)
+      put(array, 0xf148440a256e2c76_u64); put(array, 0xc00670ea43ca250d_u64)
+      put(array, 0x96cd2a865764dbca_u64); put(array, 0x380406926a5e5728_u64)
+      put(array, 0xbc807527ed3e12bc_u64); put(array, 0xc605083704f5ecf2_u64)
+      put(array, 0xeba09271e88d976b_u64); put(array, 0xf7864a44c633682e_u64)
+      put(array, 0x93445b8731587ea3_u64); put(array, 0x7ab3ee6afbe0211d_u64)
+      put(array, 0xb8157268fdae9e4c_u64); put(array, 0x5960ea05bad82964_u64)
+      put(array, 0xe61acf033d1a45df_u64); put(array, 0x6fb92487298e33bd_u64)
+      put(array, 0x8fd0c16206306bab_u64); put(array, 0xa5d3b6d479f8e056_u64)
+      put(array, 0xb3c4f1ba87bc8696_u64); put(array, 0x8f48a4899877186c_u64)
+      put(array, 0xe0b62e2929aba83c_u64); put(array, 0x331acdabfe94de87_u64)
+      put(array, 0x8c71dcd9ba0b4925_u64); put(array, 0x9ff0c08b7f1d0b14_u64)
+      put(array, 0xaf8e5410288e1b6f_u64); put(array, 0x7ecf0ae5ee44dd9_u64)
+      put(array, 0xdb71e91432b1a24a_u64); put(array, 0xc9e82cd9f69d6150_u64)
+      put(array, 0x892731ac9faf056e_u64); put(array, 0xbe311c083a225cd2_u64)
+      put(array, 0xab70fe17c79ac6ca_u64); put(array, 0x6dbd630a48aaf406_u64)
+      put(array, 0xd64d3d9db981787d_u64); put(array, 0x92cbbccdad5b108_u64)
+      put(array, 0x85f0468293f0eb4e_u64); put(array, 0x25bbf56008c58ea5_u64)
+      put(array, 0xa76c582338ed2621_u64); put(array, 0xaf2af2b80af6f24e_u64)
+      put(array, 0xd1476e2c07286faa_u64); put(array, 0x1af5af660db4aee1_u64)
+      put(array, 0x82cca4db847945ca_u64); put(array, 0x50d98d9fc890ed4d_u64)
+      put(array, 0xa37fce126597973c_u64); put(array, 0xe50ff107bab528a0_u64)
+      put(array, 0xcc5fc196fefd7d0c_u64); put(array, 0x1e53ed49a96272c8_u64)
+      put(array, 0xff77b1fcbebcdc4f_u64); put(array, 0x25e8e89c13bb0f7a_u64)
+      put(array, 0x9faacf3df73609b1_u64); put(array, 0x77b191618c54e9ac_u64)
+      put(array, 0xc795830d75038c1d_u64); put(array, 0xd59df5b9ef6a2417_u64)
+      put(array, 0xf97ae3d0d2446f25_u64); put(array, 0x4b0573286b44ad1d_u64)
+      put(array, 0x9becce62836ac577_u64); put(array, 0x4ee367f9430aec32_u64)
+      put(array, 0xc2e801fb244576d5_u64); put(array, 0x229c41f793cda73f_u64)
+      put(array, 0xf3a20279ed56d48a_u64); put(array, 0x6b43527578c1110f_u64)
+      put(array, 0x9845418c345644d6_u64); put(array, 0x830a13896b78aaa9_u64)
+      put(array, 0xbe5691ef416bd60c_u64); put(array, 0x23cc986bc656d553_u64)
+      put(array, 0xedec366b11c6cb8f_u64); put(array, 0x2cbfbe86b7ec8aa8_u64)
+      put(array, 0x94b3a202eb1c3f39_u64); put(array, 0x7bf7d71432f3d6a9_u64)
+      put(array, 0xb9e08a83a5e34f07_u64); put(array, 0xdaf5ccd93fb0cc53_u64)
+      put(array, 0xe858ad248f5c22c9_u64); put(array, 0xd1b3400f8f9cff68_u64)
+      put(array, 0x91376c36d99995be_u64); put(array, 0x23100809b9c21fa1_u64)
+      put(array, 0xb58547448ffffb2d_u64); put(array, 0xabd40a0c2832a78a_u64)
+      put(array, 0xe2e69915b3fff9f9_u64); put(array, 0x16c90c8f323f516c_u64)
+      put(array, 0x8dd01fad907ffc3b_u64); put(array, 0xae3da7d97f6792e3_u64)
+      put(array, 0xb1442798f49ffb4a_u64); put(array, 0x99cd11cfdf41779c_u64)
+      put(array, 0xdd95317f31c7fa1d_u64); put(array, 0x40405643d711d583_u64)
+      put(array, 0x8a7d3eef7f1cfc52_u64); put(array, 0x482835ea666b2572_u64)
+      put(array, 0xad1c8eab5ee43b66_u64); put(array, 0xda3243650005eecf_u64)
+      put(array, 0xd863b256369d4a40_u64); put(array, 0x90bed43e40076a82_u64)
+      put(array, 0x873e4f75e2224e68_u64); put(array, 0x5a7744a6e804a291_u64)
+      put(array, 0xa90de3535aaae202_u64); put(array, 0x711515d0a205cb36_u64)
+      put(array, 0xd3515c2831559a83_u64); put(array, 0xd5a5b44ca873e03_u64)
+      put(array, 0x8412d9991ed58091_u64); put(array, 0xe858790afe9486c2_u64)
+      put(array, 0xa5178fff668ae0b6_u64); put(array, 0x626e974dbe39a872_u64)
+      put(array, 0xce5d73ff402d98e3_u64); put(array, 0xfb0a3d212dc8128f_u64)
+      put(array, 0x80fa687f881c7f8e_u64); put(array, 0x7ce66634bc9d0b99_u64)
+      put(array, 0xa139029f6a239f72_u64); put(array, 0x1c1fffc1ebc44e80_u64)
+      put(array, 0xc987434744ac874e_u64); put(array, 0xa327ffb266b56220_u64)
+      put(array, 0xfbe9141915d7a922_u64); put(array, 0x4bf1ff9f0062baa8_u64)
+      put(array, 0x9d71ac8fada6c9b5_u64); put(array, 0x6f773fc3603db4a9_u64)
+      put(array, 0xc4ce17b399107c22_u64); put(array, 0xcb550fb4384d21d3_u64)
+      put(array, 0xf6019da07f549b2b_u64); put(array, 0x7e2a53a146606a48_u64)
+      put(array, 0x99c102844f94e0fb_u64); put(array, 0x2eda7444cbfc426d_u64)
+      put(array, 0xc0314325637a1939_u64); put(array, 0xfa911155fefb5308_u64)
+      put(array, 0xf03d93eebc589f88_u64); put(array, 0x793555ab7eba27ca_u64)
+      put(array, 0x96267c7535b763b5_u64); put(array, 0x4bc1558b2f3458de_u64)
+      put(array, 0xbbb01b9283253ca2_u64); put(array, 0x9eb1aaedfb016f16_u64)
+      put(array, 0xea9c227723ee8bcb_u64); put(array, 0x465e15a979c1cadc_u64)
+      put(array, 0x92a1958a7675175f_u64); put(array, 0xbfacd89ec191ec9_u64)
+      put(array, 0xb749faed14125d36_u64); put(array, 0xcef980ec671f667b_u64)
+      put(array, 0xe51c79a85916f484_u64); put(array, 0x82b7e12780e7401a_u64)
+      put(array, 0x8f31cc0937ae58d2_u64); put(array, 0xd1b2ecb8b0908810_u64)
+      put(array, 0xb2fe3f0b8599ef07_u64); put(array, 0x861fa7e6dcb4aa15_u64)
+      put(array, 0xdfbdcece67006ac9_u64); put(array, 0x67a791e093e1d49a_u64)
+      put(array, 0x8bd6a141006042bd_u64); put(array, 0xe0c8bb2c5c6d24e0_u64)
+      put(array, 0xaecc49914078536d_u64); put(array, 0x58fae9f773886e18_u64)
+      put(array, 0xda7f5bf590966848_u64); put(array, 0xaf39a475506a899e_u64)
+      put(array, 0x888f99797a5e012d_u64); put(array, 0x6d8406c952429603_u64)
+      put(array, 0xaab37fd7d8f58178_u64); put(array, 0xc8e5087ba6d33b83_u64)
+      put(array, 0xd5605fcdcf32e1d6_u64); put(array, 0xfb1e4a9a90880a64_u64)
+      put(array, 0x855c3be0a17fcd26_u64); put(array, 0x5cf2eea09a55067f_u64)
+      put(array, 0xa6b34ad8c9dfc06f_u64); put(array, 0xf42faa48c0ea481e_u64)
+      put(array, 0xd0601d8efc57b08b_u64); put(array, 0xf13b94daf124da26_u64)
+      put(array, 0x823c12795db6ce57_u64); put(array, 0x76c53d08d6b70858_u64)
+      put(array, 0xa2cb1717b52481ed_u64); put(array, 0x54768c4b0c64ca6e_u64)
+      put(array, 0xcb7ddcdda26da268_u64); put(array, 0xa9942f5dcf7dfd09_u64)
+      put(array, 0xfe5d54150b090b02_u64); put(array, 0xd3f93b35435d7c4c_u64)
+      put(array, 0x9efa548d26e5a6e1_u64); put(array, 0xc47bc5014a1a6daf_u64)
+      put(array, 0xc6b8e9b0709f109a_u64); put(array, 0x359ab6419ca1091b_u64)
+      put(array, 0xf867241c8cc6d4c0_u64); put(array, 0xc30163d203c94b62_u64)
+      put(array, 0x9b407691d7fc44f8_u64); put(array, 0x79e0de63425dcf1d_u64)
+      put(array, 0xc21094364dfb5636_u64); put(array, 0x985915fc12f542e4_u64)
+      put(array, 0xf294b943e17a2bc4_u64); put(array, 0x3e6f5b7b17b2939d_u64)
+      put(array, 0x979cf3ca6cec5b5a_u64); put(array, 0xa705992ceecf9c42_u64)
+      put(array, 0xbd8430bd08277231_u64); put(array, 0x50c6ff782a838353_u64)
+      put(array, 0xece53cec4a314ebd_u64); put(array, 0xa4f8bf5635246428_u64)
+      put(array, 0x940f4613ae5ed136_u64); put(array, 0x871b7795e136be99_u64)
+      put(array, 0xb913179899f68584_u64); put(array, 0x28e2557b59846e3f_u64)
+      put(array, 0xe757dd7ec07426e5_u64); put(array, 0x331aeada2fe589cf_u64)
+      put(array, 0x9096ea6f3848984f_u64); put(array, 0x3ff0d2c85def7621_u64)
+      put(array, 0xb4bca50b065abe63_u64); put(array, 0xfed077a756b53a9_u64)
+      put(array, 0xe1ebce4dc7f16dfb_u64); put(array, 0xd3e8495912c62894_u64)
+      put(array, 0x8d3360f09cf6e4bd_u64); put(array, 0x64712dd7abbbd95c_u64)
+      put(array, 0xb080392cc4349dec_u64); put(array, 0xbd8d794d96aacfb3_u64)
+      put(array, 0xdca04777f541c567_u64); put(array, 0xecf0d7a0fc5583a0_u64)
+      put(array, 0x89e42caaf9491b60_u64); put(array, 0xf41686c49db57244_u64)
+      put(array, 0xac5d37d5b79b6239_u64); put(array, 0x311c2875c522ced5_u64)
+      put(array, 0xd77485cb25823ac7_u64); put(array, 0x7d633293366b828b_u64)
+      put(array, 0x86a8d39ef77164bc_u64); put(array, 0xae5dff9c02033197_u64)
+      put(array, 0xa8530886b54dbdeb_u64); put(array, 0xd9f57f830283fdfc_u64)
+      put(array, 0xd267caa862a12d66_u64); put(array, 0xd072df63c324fd7b_u64)
+      put(array, 0x8380dea93da4bc60_u64); put(array, 0x4247cb9e59f71e6d_u64)
+      put(array, 0xa46116538d0deb78_u64); put(array, 0x52d9be85f074e608_u64)
+      put(array, 0xcd795be870516656_u64); put(array, 0x67902e276c921f8b_u64)
+      put(array, 0x806bd9714632dff6_u64); put(array, 0xba1cd8a3db53b6_u64)
+      put(array, 0xa086cfcd97bf97f3_u64); put(array, 0x80e8a40eccd228a4_u64)
+      put(array, 0xc8a883c0fdaf7df0_u64); put(array, 0x6122cd128006b2cd_u64)
+      put(array, 0xfad2a4b13d1b5d6c_u64); put(array, 0x796b805720085f81_u64)
+      put(array, 0x9cc3a6eec6311a63_u64); put(array, 0xcbe3303674053bb0_u64)
+      put(array, 0xc3f490aa77bd60fc_u64); put(array, 0xbedbfc4411068a9c_u64)
+      put(array, 0xf4f1b4d515acb93b_u64); put(array, 0xee92fb5515482d44_u64)
+      put(array, 0x991711052d8bf3c5_u64); put(array, 0x751bdd152d4d1c4a_u64)
+      put(array, 0xbf5cd54678eef0b6_u64); put(array, 0xd262d45a78a0635d_u64)
+      put(array, 0xef340a98172aace4_u64); put(array, 0x86fb897116c87c34_u64)
+      put(array, 0x9580869f0e7aac0e_u64); put(array, 0xd45d35e6ae3d4da0_u64)
+      put(array, 0xbae0a846d2195712_u64); put(array, 0x8974836059cca109_u64)
+      put(array, 0xe998d258869facd7_u64); put(array, 0x2bd1a438703fc94b_u64)
+      put(array, 0x91ff83775423cc06_u64); put(array, 0x7b6306a34627ddcf_u64)
+      put(array, 0xb67f6455292cbf08_u64); put(array, 0x1a3bc84c17b1d542_u64)
+      put(array, 0xe41f3d6a7377eeca_u64); put(array, 0x20caba5f1d9e4a93_u64)
+      put(array, 0x8e938662882af53e_u64); put(array, 0x547eb47b7282ee9c_u64)
+      put(array, 0xb23867fb2a35b28d_u64); put(array, 0xe99e619a4f23aa43_u64)
+      put(array, 0xdec681f9f4c31f31_u64); put(array, 0x6405fa00e2ec94d4_u64)
+      put(array, 0x8b3c113c38f9f37e_u64); put(array, 0xde83bc408dd3dd04_u64)
+      put(array, 0xae0b158b4738705e_u64); put(array, 0x9624ab50b148d445_u64)
+      put(array, 0xd98ddaee19068c76_u64); put(array, 0x3badd624dd9b0957_u64)
+      put(array, 0x87f8a8d4cfa417c9_u64); put(array, 0xe54ca5d70a80e5d6_u64)
+      put(array, 0xa9f6d30a038d1dbc_u64); put(array, 0x5e9fcf4ccd211f4c_u64)
+      put(array, 0xd47487cc8470652b_u64); put(array, 0x7647c3200069671f_u64)
+      put(array, 0x84c8d4dfd2c63f3b_u64); put(array, 0x29ecd9f40041e073_u64)
+      put(array, 0xa5fb0a17c777cf09_u64); put(array, 0xf468107100525890_u64)
+      put(array, 0xcf79cc9db955c2cc_u64); put(array, 0x7182148d4066eeb4_u64)
+      put(array, 0x81ac1fe293d599bf_u64); put(array, 0xc6f14cd848405530_u64)
+      put(array, 0xa21727db38cb002f_u64); put(array, 0xb8ada00e5a506a7c_u64)
+      put(array, 0xca9cf1d206fdc03b_u64); put(array, 0xa6d90811f0e4851c_u64)
+      put(array, 0xfd442e4688bd304a_u64); put(array, 0x908f4a166d1da663_u64)
+      put(array, 0x9e4a9cec15763e2e_u64); put(array, 0x9a598e4e043287fe_u64)
+      put(array, 0xc5dd44271ad3cdba_u64); put(array, 0x40eff1e1853f29fd_u64)
+      put(array, 0xf7549530e188c128_u64); put(array, 0xd12bee59e68ef47c_u64)
+      put(array, 0x9a94dd3e8cf578b9_u64); put(array, 0x82bb74f8301958ce_u64)
+      put(array, 0xc13a148e3032d6e7_u64); put(array, 0xe36a52363c1faf01_u64)
+      put(array, 0xf18899b1bc3f8ca1_u64); put(array, 0xdc44e6c3cb279ac1_u64)
+      put(array, 0x96f5600f15a7b7e5_u64); put(array, 0x29ab103a5ef8c0b9_u64)
+      put(array, 0xbcb2b812db11a5de_u64); put(array, 0x7415d448f6b6f0e7_u64)
+      put(array, 0xebdf661791d60f56_u64); put(array, 0x111b495b3464ad21_u64)
+      put(array, 0x936b9fcebb25c995_u64); put(array, 0xcab10dd900beec34_u64)
+      put(array, 0xb84687c269ef3bfb_u64); put(array, 0x3d5d514f40eea742_u64)
+      put(array, 0xe65829b3046b0afa_u64); put(array, 0xcb4a5a3112a5112_u64)
+      put(array, 0x8ff71a0fe2c2e6dc_u64); put(array, 0x47f0e785eaba72ab_u64)
+      put(array, 0xb3f4e093db73a093_u64); put(array, 0x59ed216765690f56_u64)
+      put(array, 0xe0f218b8d25088b8_u64); put(array, 0x306869c13ec3532c_u64)
+      put(array, 0x8c974f7383725573_u64); put(array, 0x1e414218c73a13fb_u64)
+      put(array, 0xafbd2350644eeacf_u64); put(array, 0xe5d1929ef90898fa_u64)
+      put(array, 0xdbac6c247d62a583_u64); put(array, 0xdf45f746b74abf39_u64)
+      put(array, 0x894bc396ce5da772_u64); put(array, 0x6b8bba8c328eb783_u64)
+      put(array, 0xab9eb47c81f5114f_u64); put(array, 0x66ea92f3f326564_u64)
+      put(array, 0xd686619ba27255a2_u64); put(array, 0xc80a537b0efefebd_u64)
+      put(array, 0x8613fd0145877585_u64); put(array, 0xbd06742ce95f5f36_u64)
+      put(array, 0xa798fc4196e952e7_u64); put(array, 0x2c48113823b73704_u64)
+      put(array, 0xd17f3b51fca3a7a0_u64); put(array, 0xf75a15862ca504c5_u64)
+      put(array, 0x82ef85133de648c4_u64); put(array, 0x9a984d73dbe722fb_u64)
+      put(array, 0xa3ab66580d5fdaf5_u64); put(array, 0xc13e60d0d2e0ebba_u64)
+      put(array, 0xcc963fee10b7d1b3_u64); put(array, 0x318df905079926a8_u64)
+      put(array, 0xffbbcfe994e5c61f_u64); put(array, 0xfdf17746497f7052_u64)
+      put(array, 0x9fd561f1fd0f9bd3_u64); put(array, 0xfeb6ea8bedefa633_u64)
+      put(array, 0xc7caba6e7c5382c8_u64); put(array, 0xfe64a52ee96b8fc0_u64)
+      put(array, 0xf9bd690a1b68637b_u64); put(array, 0x3dfdce7aa3c673b0_u64)
+      put(array, 0x9c1661a651213e2d_u64); put(array, 0x6bea10ca65c084e_u64)
+      put(array, 0xc31bfa0fe5698db8_u64); put(array, 0x486e494fcff30a62_u64)
+      put(array, 0xf3e2f893dec3f126_u64); put(array, 0x5a89dba3c3efccfa_u64)
+      put(array, 0x986ddb5c6b3a76b7_u64); put(array, 0xf89629465a75e01c_u64)
+      put(array, 0xbe89523386091465_u64); put(array, 0xf6bbb397f1135823_u64)
+      put(array, 0xee2ba6c0678b597f_u64); put(array, 0x746aa07ded582e2c_u64)
+      put(array, 0x94db483840b717ef_u64); put(array, 0xa8c2a44eb4571cdc_u64)
+      put(array, 0xba121a4650e4ddeb_u64); put(array, 0x92f34d62616ce413_u64)
+      put(array, 0xe896a0d7e51e1566_u64); put(array, 0x77b020baf9c81d17_u64)
+      put(array, 0x915e2486ef32cd60_u64); put(array, 0xace1474dc1d122e_u64)
+      put(array, 0xb5b5ada8aaff80b8_u64); put(array, 0xd819992132456ba_u64)
+      put(array, 0xe3231912d5bf60e6_u64); put(array, 0x10e1fff697ed6c69_u64)
+      put(array, 0x8df5efabc5979c8f_u64); put(array, 0xca8d3ffa1ef463c1_u64)
+      put(array, 0xb1736b96b6fd83b3_u64); put(array, 0xbd308ff8a6b17cb2_u64)
+      put(array, 0xddd0467c64bce4a0_u64); put(array, 0xac7cb3f6d05ddbde_u64)
+      put(array, 0x8aa22c0dbef60ee4_u64); put(array, 0x6bcdf07a423aa96b_u64)
+      put(array, 0xad4ab7112eb3929d_u64); put(array, 0x86c16c98d2c953c6_u64)
+      put(array, 0xd89d64d57a607744_u64); put(array, 0xe871c7bf077ba8b7_u64)
+      put(array, 0x87625f056c7c4a8b_u64); put(array, 0x11471cd764ad4972_u64)
+      put(array, 0xa93af6c6c79b5d2d_u64); put(array, 0xd598e40d3dd89bcf_u64)
+      put(array, 0xd389b47879823479_u64); put(array, 0x4aff1d108d4ec2c3_u64)
+      put(array, 0x843610cb4bf160cb_u64); put(array, 0xcedf722a585139ba_u64)
+      put(array, 0xa54394fe1eedb8fe_u64); put(array, 0xc2974eb4ee658828_u64)
+      put(array, 0xce947a3da6a9273e_u64); put(array, 0x733d226229feea32_u64)
+      put(array, 0x811ccc668829b887_u64); put(array, 0x806357d5a3f525f_u64)
+      put(array, 0xa163ff802a3426a8_u64); put(array, 0xca07c2dcb0cf26f7_u64)
+      put(array, 0xc9bcff6034c13052_u64); put(array, 0xfc89b393dd02f0b5_u64)
+      put(array, 0xfc2c3f3841f17c67_u64); put(array, 0xbbac2078d443ace2_u64)
+      put(array, 0x9d9ba7832936edc0_u64); put(array, 0xd54b944b84aa4c0d_u64)
+      put(array, 0xc5029163f384a931_u64); put(array, 0xa9e795e65d4df11_u64)
+      put(array, 0xf64335bcf065d37d_u64); put(array, 0x4d4617b5ff4a16d5_u64)
+      put(array, 0x99ea0196163fa42e_u64); put(array, 0x504bced1bf8e4e45_u64)
+      put(array, 0xc06481fb9bcf8d39_u64); put(array, 0xe45ec2862f71e1d6_u64)
+      put(array, 0xf07da27a82c37088_u64); put(array, 0x5d767327bb4e5a4c_u64)
+      put(array, 0x964e858c91ba2655_u64); put(array, 0x3a6a07f8d510f86f_u64)
+      put(array, 0xbbe226efb628afea_u64); put(array, 0x890489f70a55368b_u64)
+      put(array, 0xeadab0aba3b2dbe5_u64); put(array, 0x2b45ac74ccea842e_u64)
+      put(array, 0x92c8ae6b464fc96f_u64); put(array, 0x3b0b8bc90012929d_u64)
+      put(array, 0xb77ada0617e3bbcb_u64); put(array, 0x9ce6ebb40173744_u64)
+      put(array, 0xe55990879ddcaabd_u64); put(array, 0xcc420a6a101d0515_u64)
+      put(array, 0x8f57fa54c2a9eab6_u64); put(array, 0x9fa946824a12232d_u64)
+      put(array, 0xb32df8e9f3546564_u64); put(array, 0x47939822dc96abf9_u64)
+      put(array, 0xdff9772470297ebd_u64); put(array, 0x59787e2b93bc56f7_u64)
+      put(array, 0x8bfbea76c619ef36_u64); put(array, 0x57eb4edb3c55b65a_u64)
+      put(array, 0xaefae51477a06b03_u64); put(array, 0xede622920b6b23f1_u64)
+      put(array, 0xdab99e59958885c4_u64); put(array, 0xe95fab368e45eced_u64)
+      put(array, 0x88b402f7fd75539b_u64); put(array, 0x11dbcb0218ebb414_u64)
+      put(array, 0xaae103b5fcd2a881_u64); put(array, 0xd652bdc29f26a119_u64)
+      put(array, 0xd59944a37c0752a2_u64); put(array, 0x4be76d3346f0495f_u64)
+      put(array, 0x857fcae62d8493a5_u64); put(array, 0x6f70a4400c562ddb_u64)
+      put(array, 0xa6dfbd9fb8e5b88e_u64); put(array, 0xcb4ccd500f6bb952_u64)
+      put(array, 0xd097ad07a71f26b2_u64); put(array, 0x7e2000a41346a7a7_u64)
+      put(array, 0x825ecc24c873782f_u64); put(array, 0x8ed400668c0c28c8_u64)
+      put(array, 0xa2f67f2dfa90563b_u64); put(array, 0x728900802f0f32fa_u64)
+      put(array, 0xcbb41ef979346bca_u64); put(array, 0x4f2b40a03ad2ffb9_u64)
+      put(array, 0xfea126b7d78186bc_u64); put(array, 0xe2f610c84987bfa8_u64)
+      put(array, 0x9f24b832e6b0f436_u64); put(array, 0xdd9ca7d2df4d7c9_u64)
+      put(array, 0xc6ede63fa05d3143_u64); put(array, 0x91503d1c79720dbb_u64)
+      put(array, 0xf8a95fcf88747d94_u64); put(array, 0x75a44c6397ce912a_u64)
+      put(array, 0x9b69dbe1b548ce7c_u64); put(array, 0xc986afbe3ee11aba_u64)
+      put(array, 0xc24452da229b021b_u64); put(array, 0xfbe85badce996168_u64)
+      put(array, 0xf2d56790ab41c2a2_u64); put(array, 0xfae27299423fb9c3_u64)
+      put(array, 0x97c560ba6b0919a5_u64); put(array, 0xdccd879fc967d41a_u64)
+      put(array, 0xbdb6b8e905cb600f_u64); put(array, 0x5400e987bbc1c920_u64)
+      put(array, 0xed246723473e3813_u64); put(array, 0x290123e9aab23b68_u64)
+      put(array, 0x9436c0760c86e30b_u64); put(array, 0xf9a0b6720aaf6521_u64)
+      put(array, 0xb94470938fa89bce_u64); put(array, 0xf808e40e8d5b3e69_u64)
+      put(array, 0xe7958cb87392c2c2_u64); put(array, 0xb60b1d1230b20e04_u64)
+      put(array, 0x90bd77f3483bb9b9_u64); put(array, 0xb1c6f22b5e6f48c2_u64)
+      put(array, 0xb4ecd5f01a4aa828_u64); put(array, 0x1e38aeb6360b1af3_u64)
+      put(array, 0xe2280b6c20dd5232_u64); put(array, 0x25c6da63c38de1b0_u64)
+      put(array, 0x8d590723948a535f_u64); put(array, 0x579c487e5a38ad0e_u64)
+      put(array, 0xb0af48ec79ace837_u64); put(array, 0x2d835a9df0c6d851_u64)
+      put(array, 0xdcdb1b2798182244_u64); put(array, 0xf8e431456cf88e65_u64)
+      put(array, 0x8a08f0f8bf0f156b_u64); put(array, 0x1b8e9ecb641b58ff_u64)
+      put(array, 0xac8b2d36eed2dac5_u64); put(array, 0xe272467e3d222f3f_u64)
+      put(array, 0xd7adf884aa879177_u64); put(array, 0x5b0ed81dcc6abb0f_u64)
+      put(array, 0x86ccbb52ea94baea_u64); put(array, 0x98e947129fc2b4e9_u64)
+      put(array, 0xa87fea27a539e9a5_u64); put(array, 0x3f2398d747b36224_u64)
+      put(array, 0xd29fe4b18e88640e_u64); put(array, 0x8eec7f0d19a03aad_u64)
+      put(array, 0x83a3eeeef9153e89_u64); put(array, 0x1953cf68300424ac_u64)
+      put(array, 0xa48ceaaab75a8e2b_u64); put(array, 0x5fa8c3423c052dd7_u64)
+      put(array, 0xcdb02555653131b6_u64); put(array, 0x3792f412cb06794d_u64)
+      put(array, 0x808e17555f3ebf11_u64); put(array, 0xe2bbd88bbee40bd0_u64)
+      put(array, 0xa0b19d2ab70e6ed6_u64); put(array, 0x5b6aceaeae9d0ec4_u64)
+      put(array, 0xc8de047564d20a8b_u64); put(array, 0xf245825a5a445275_u64)
+      put(array, 0xfb158592be068d2e_u64); put(array, 0xeed6e2f0f0d56712_u64)
+      put(array, 0x9ced737bb6c4183d_u64); put(array, 0x55464dd69685606b_u64)
+      put(array, 0xc428d05aa4751e4c_u64); put(array, 0xaa97e14c3c26b886_u64)
+      put(array, 0xf53304714d9265df_u64); put(array, 0xd53dd99f4b3066a8_u64)
+      put(array, 0x993fe2c6d07b7fab_u64); put(array, 0xe546a8038efe4029_u64)
+      put(array, 0xbf8fdb78849a5f96_u64); put(array, 0xde98520472bdd033_u64)
+      put(array, 0xef73d256a5c0f77c_u64); put(array, 0x963e66858f6d4440_u64)
+      put(array, 0x95a8637627989aad_u64); put(array, 0xdde7001379a44aa8_u64)
+      put(array, 0xbb127c53b17ec159_u64); put(array, 0x5560c018580d5d52_u64)
+      put(array, 0xe9d71b689dde71af_u64); put(array, 0xaab8f01e6e10b4a6_u64)
+      put(array, 0x9226712162ab070d_u64); put(array, 0xcab3961304ca70e8_u64)
+      put(array, 0xb6b00d69bb55c8d1_u64); put(array, 0x3d607b97c5fd0d22_u64)
+      put(array, 0xe45c10c42a2b3b05_u64); put(array, 0x8cb89a7db77c506a_u64)
+      put(array, 0x8eb98a7a9a5b04e3_u64); put(array, 0x77f3608e92adb242_u64)
+      put(array, 0xb267ed1940f1c61c_u64); put(array, 0x55f038b237591ed3_u64)
+      put(array, 0xdf01e85f912e37a3_u64); put(array, 0x6b6c46dec52f6688_u64)
+      put(array, 0x8b61313bbabce2c6_u64); put(array, 0x2323ac4b3b3da015_u64)
+      put(array, 0xae397d8aa96c1b77_u64); put(array, 0xabec975e0a0d081a_u64)
+      put(array, 0xd9c7dced53c72255_u64); put(array, 0x96e7bd358c904a21_u64)
+      put(array, 0x881cea14545c7575_u64); put(array, 0x7e50d64177da2e54_u64)
+      put(array, 0xaa242499697392d2_u64); put(array, 0xdde50bd1d5d0b9e9_u64)
+      put(array, 0xd4ad2dbfc3d07787_u64); put(array, 0x955e4ec64b44e864_u64)
+      put(array, 0x84ec3c97da624ab4_u64); put(array, 0xbd5af13bef0b113e_u64)
+      put(array, 0xa6274bbdd0fadd61_u64); put(array, 0xecb1ad8aeacdd58e_u64)
+      put(array, 0xcfb11ead453994ba_u64); put(array, 0x67de18eda5814af2_u64)
+      put(array, 0x81ceb32c4b43fcf4_u64); put(array, 0x80eacf948770ced7_u64)
+      put(array, 0xa2425ff75e14fc31_u64); put(array, 0xa1258379a94d028d_u64)
+      put(array, 0xcad2f7f5359a3b3e_u64); put(array, 0x96ee45813a04330_u64)
+      put(array, 0xfd87b5f28300ca0d_u64); put(array, 0x8bca9d6e188853fc_u64)
+      put(array, 0x9e74d1b791e07e48_u64); put(array, 0x775ea264cf55347e_u64)
+      put(array, 0xc612062576589dda_u64); put(array, 0x95364afe032a819e_u64)
+      put(array, 0xf79687aed3eec551_u64); put(array, 0x3a83ddbd83f52205_u64)
+      put(array, 0x9abe14cd44753b52_u64); put(array, 0xc4926a9672793543_u64)
+      put(array, 0xc16d9a0095928a27_u64); put(array, 0x75b7053c0f178294_u64)
+      put(array, 0xf1c90080baf72cb1_u64); put(array, 0x5324c68b12dd6339_u64)
+      put(array, 0x971da05074da7bee_u64); put(array, 0xd3f6fc16ebca5e04_u64)
+      put(array, 0xbce5086492111aea_u64); put(array, 0x88f4bb1ca6bcf585_u64)
+      put(array, 0xec1e4a7db69561a5_u64); put(array, 0x2b31e9e3d06c32e6_u64)
+      put(array, 0x9392ee8e921d5d07_u64); put(array, 0x3aff322e62439fd0_u64)
+      put(array, 0xb877aa3236a4b449_u64); put(array, 0x9befeb9fad487c3_u64)
+      put(array, 0xe69594bec44de15b_u64); put(array, 0x4c2ebe687989a9b4_u64)
+      put(array, 0x901d7cf73ab0acd9_u64); put(array, 0xf9d37014bf60a11_u64)
+      put(array, 0xb424dc35095cd80f_u64); put(array, 0x538484c19ef38c95_u64)
+      put(array, 0xe12e13424bb40e13_u64); put(array, 0x2865a5f206b06fba_u64)
+      put(array, 0x8cbccc096f5088cb_u64); put(array, 0xf93f87b7442e45d4_u64)
+      put(array, 0xafebff0bcb24aafe_u64); put(array, 0xf78f69a51539d749_u64)
+      put(array, 0xdbe6fecebdedd5be_u64); put(array, 0xb573440e5a884d1c_u64)
+      put(array, 0x89705f4136b4a597_u64); put(array, 0x31680a88f8953031_u64)
+      put(array, 0xabcc77118461cefc_u64); put(array, 0xfdc20d2b36ba7c3e_u64)
+      put(array, 0xd6bf94d5e57a42bc_u64); put(array, 0x3d32907604691b4d_u64)
+      put(array, 0x8637bd05af6c69b5_u64); put(array, 0xa63f9a49c2c1b110_u64)
+      put(array, 0xa7c5ac471b478423_u64); put(array, 0xfcf80dc33721d54_u64)
+      put(array, 0xd1b71758e219652b_u64); put(array, 0xd3c36113404ea4a9_u64)
+      put(array, 0x83126e978d4fdf3b_u64); put(array, 0x645a1cac083126ea_u64)
+      put(array, 0xa3d70a3d70a3d70a_u64); put(array, 0x3d70a3d70a3d70a4_u64)
+      put(array, 0xcccccccccccccccc_u64); put(array, 0xcccccccccccccccd_u64)
+      put(array, 0x8000000000000000_u64); put(array, 0x0_u64)
+      put(array, 0xa000000000000000_u64); put(array, 0x0_u64)
+      put(array, 0xc800000000000000_u64); put(array, 0x0_u64)
+      put(array, 0xfa00000000000000_u64); put(array, 0x0_u64)
+      put(array, 0x9c40000000000000_u64); put(array, 0x0_u64)
+      put(array, 0xc350000000000000_u64); put(array, 0x0_u64)
+      put(array, 0xf424000000000000_u64); put(array, 0x0_u64)
+      put(array, 0x9896800000000000_u64); put(array, 0x0_u64)
+      put(array, 0xbebc200000000000_u64); put(array, 0x0_u64)
+      put(array, 0xee6b280000000000_u64); put(array, 0x0_u64)
+      put(array, 0x9502f90000000000_u64); put(array, 0x0_u64)
+      put(array, 0xba43b74000000000_u64); put(array, 0x0_u64)
+      put(array, 0xe8d4a51000000000_u64); put(array, 0x0_u64)
+      put(array, 0x9184e72a00000000_u64); put(array, 0x0_u64)
+      put(array, 0xb5e620f480000000_u64); put(array, 0x0_u64)
+      put(array, 0xe35fa931a0000000_u64); put(array, 0x0_u64)
+      put(array, 0x8e1bc9bf04000000_u64); put(array, 0x0_u64)
+      put(array, 0xb1a2bc2ec5000000_u64); put(array, 0x0_u64)
+      put(array, 0xde0b6b3a76400000_u64); put(array, 0x0_u64)
+      put(array, 0x8ac7230489e80000_u64); put(array, 0x0_u64)
+      put(array, 0xad78ebc5ac620000_u64); put(array, 0x0_u64)
+      put(array, 0xd8d726b7177a8000_u64); put(array, 0x0_u64)
+      put(array, 0x878678326eac9000_u64); put(array, 0x0_u64)
+      put(array, 0xa968163f0a57b400_u64); put(array, 0x0_u64)
+      put(array, 0xd3c21bcecceda100_u64); put(array, 0x0_u64)
+      put(array, 0x84595161401484a0_u64); put(array, 0x0_u64)
+      put(array, 0xa56fa5b99019a5c8_u64); put(array, 0x0_u64)
+      put(array, 0xcecb8f27f4200f3a_u64); put(array, 0x0_u64)
+      put(array, 0x813f3978f8940984_u64); put(array, 0x4000000000000000_u64)
+      put(array, 0xa18f07d736b90be5_u64); put(array, 0x5000000000000000_u64)
+      put(array, 0xc9f2c9cd04674ede_u64); put(array, 0xa400000000000000_u64)
+      put(array, 0xfc6f7c4045812296_u64); put(array, 0x4d00000000000000_u64)
+      put(array, 0x9dc5ada82b70b59d_u64); put(array, 0xf020000000000000_u64)
+      put(array, 0xc5371912364ce305_u64); put(array, 0x6c28000000000000_u64)
+      put(array, 0xf684df56c3e01bc6_u64); put(array, 0xc732000000000000_u64)
+      put(array, 0x9a130b963a6c115c_u64); put(array, 0x3c7f400000000000_u64)
+      put(array, 0xc097ce7bc90715b3_u64); put(array, 0x4b9f100000000000_u64)
+      put(array, 0xf0bdc21abb48db20_u64); put(array, 0x1e86d40000000000_u64)
+      put(array, 0x96769950b50d88f4_u64); put(array, 0x1314448000000000_u64)
+      put(array, 0xbc143fa4e250eb31_u64); put(array, 0x17d955a000000000_u64)
+      put(array, 0xeb194f8e1ae525fd_u64); put(array, 0x5dcfab0800000000_u64)
+      put(array, 0x92efd1b8d0cf37be_u64); put(array, 0x5aa1cae500000000_u64)
+      put(array, 0xb7abc627050305ad_u64); put(array, 0xf14a3d9e40000000_u64)
+      put(array, 0xe596b7b0c643c719_u64); put(array, 0x6d9ccd05d0000000_u64)
+      put(array, 0x8f7e32ce7bea5c6f_u64); put(array, 0xe4820023a2000000_u64)
+      put(array, 0xb35dbf821ae4f38b_u64); put(array, 0xdda2802c8a800000_u64)
+      put(array, 0xe0352f62a19e306e_u64); put(array, 0xd50b2037ad200000_u64)
+      put(array, 0x8c213d9da502de45_u64); put(array, 0x4526f422cc340000_u64)
+      put(array, 0xaf298d050e4395d6_u64); put(array, 0x9670b12b7f410000_u64)
+      put(array, 0xdaf3f04651d47b4c_u64); put(array, 0x3c0cdd765f114000_u64)
+      put(array, 0x88d8762bf324cd0f_u64); put(array, 0xa5880a69fb6ac800_u64)
+      put(array, 0xab0e93b6efee0053_u64); put(array, 0x8eea0d047a457a00_u64)
+      put(array, 0xd5d238a4abe98068_u64); put(array, 0x72a4904598d6d880_u64)
+      put(array, 0x85a36366eb71f041_u64); put(array, 0x47a6da2b7f864750_u64)
+      put(array, 0xa70c3c40a64e6c51_u64); put(array, 0x999090b65f67d924_u64)
+      put(array, 0xd0cf4b50cfe20765_u64); put(array, 0xfff4b4e3f741cf6d_u64)
+      put(array, 0x82818f1281ed449f_u64); put(array, 0xbff8f10e7a8921a4_u64)
+      put(array, 0xa321f2d7226895c7_u64); put(array, 0xaff72d52192b6a0d_u64)
+      put(array, 0xcbea6f8ceb02bb39_u64); put(array, 0x9bf4f8a69f764490_u64)
+      put(array, 0xfee50b7025c36a08_u64); put(array, 0x2f236d04753d5b4_u64)
+      put(array, 0x9f4f2726179a2245_u64); put(array, 0x1d762422c946590_u64)
+      put(array, 0xc722f0ef9d80aad6_u64); put(array, 0x424d3ad2b7b97ef5_u64)
+      put(array, 0xf8ebad2b84e0d58b_u64); put(array, 0xd2e0898765a7deb2_u64)
+      put(array, 0x9b934c3b330c8577_u64); put(array, 0x63cc55f49f88eb2f_u64)
+      put(array, 0xc2781f49ffcfa6d5_u64); put(array, 0x3cbf6b71c76b25fb_u64)
+      put(array, 0xf316271c7fc3908a_u64); put(array, 0x8bef464e3945ef7a_u64)
+      put(array, 0x97edd871cfda3a56_u64); put(array, 0x97758bf0e3cbb5ac_u64)
+      put(array, 0xbde94e8e43d0c8ec_u64); put(array, 0x3d52eeed1cbea317_u64)
+      put(array, 0xed63a231d4c4fb27_u64); put(array, 0x4ca7aaa863ee4bdd_u64)
+      put(array, 0x945e455f24fb1cf8_u64); put(array, 0x8fe8caa93e74ef6a_u64)
+      put(array, 0xb975d6b6ee39e436_u64); put(array, 0xb3e2fd538e122b44_u64)
+      put(array, 0xe7d34c64a9c85d44_u64); put(array, 0x60dbbca87196b616_u64)
+      put(array, 0x90e40fbeea1d3a4a_u64); put(array, 0xbc8955e946fe31cd_u64)
+      put(array, 0xb51d13aea4a488dd_u64); put(array, 0x6babab6398bdbe41_u64)
+      put(array, 0xe264589a4dcdab14_u64); put(array, 0xc696963c7eed2dd1_u64)
+      put(array, 0x8d7eb76070a08aec_u64); put(array, 0xfc1e1de5cf543ca2_u64)
+      put(array, 0xb0de65388cc8ada8_u64); put(array, 0x3b25a55f43294bcb_u64)
+      put(array, 0xdd15fe86affad912_u64); put(array, 0x49ef0eb713f39ebe_u64)
+      put(array, 0x8a2dbf142dfcc7ab_u64); put(array, 0x6e3569326c784337_u64)
+      put(array, 0xacb92ed9397bf996_u64); put(array, 0x49c2c37f07965404_u64)
+      put(array, 0xd7e77a8f87daf7fb_u64); put(array, 0xdc33745ec97be906_u64)
+      put(array, 0x86f0ac99b4e8dafd_u64); put(array, 0x69a028bb3ded71a3_u64)
+      put(array, 0xa8acd7c0222311bc_u64); put(array, 0xc40832ea0d68ce0c_u64)
+      put(array, 0xd2d80db02aabd62b_u64); put(array, 0xf50a3fa490c30190_u64)
+      put(array, 0x83c7088e1aab65db_u64); put(array, 0x792667c6da79e0fa_u64)
+      put(array, 0xa4b8cab1a1563f52_u64); put(array, 0x577001b891185938_u64)
+      put(array, 0xcde6fd5e09abcf26_u64); put(array, 0xed4c0226b55e6f86_u64)
+      put(array, 0x80b05e5ac60b6178_u64); put(array, 0x544f8158315b05b4_u64)
+      put(array, 0xa0dc75f1778e39d6_u64); put(array, 0x696361ae3db1c721_u64)
+      put(array, 0xc913936dd571c84c_u64); put(array, 0x3bc3a19cd1e38e9_u64)
+      put(array, 0xfb5878494ace3a5f_u64); put(array, 0x4ab48a04065c723_u64)
+      put(array, 0x9d174b2dcec0e47b_u64); put(array, 0x62eb0d64283f9c76_u64)
+      put(array, 0xc45d1df942711d9a_u64); put(array, 0x3ba5d0bd324f8394_u64)
+      put(array, 0xf5746577930d6500_u64); put(array, 0xca8f44ec7ee36479_u64)
+      put(array, 0x9968bf6abbe85f20_u64); put(array, 0x7e998b13cf4e1ecb_u64)
+      put(array, 0xbfc2ef456ae276e8_u64); put(array, 0x9e3fedd8c321a67e_u64)
+      put(array, 0xefb3ab16c59b14a2_u64); put(array, 0xc5cfe94ef3ea101e_u64)
+      put(array, 0x95d04aee3b80ece5_u64); put(array, 0xbba1f1d158724a12_u64)
+      put(array, 0xbb445da9ca61281f_u64); put(array, 0x2a8a6e45ae8edc97_u64)
+      put(array, 0xea1575143cf97226_u64); put(array, 0xf52d09d71a3293bd_u64)
+      put(array, 0x924d692ca61be758_u64); put(array, 0x593c2626705f9c56_u64)
+      put(array, 0xb6e0c377cfa2e12e_u64); put(array, 0x6f8b2fb00c77836c_u64)
+      put(array, 0xe498f455c38b997a_u64); put(array, 0xb6dfb9c0f956447_u64)
+      put(array, 0x8edf98b59a373fec_u64); put(array, 0x4724bd4189bd5eac_u64)
+      put(array, 0xb2977ee300c50fe7_u64); put(array, 0x58edec91ec2cb657_u64)
+      put(array, 0xdf3d5e9bc0f653e1_u64); put(array, 0x2f2967b66737e3ed_u64)
+      put(array, 0x8b865b215899f46c_u64); put(array, 0xbd79e0d20082ee74_u64)
+      put(array, 0xae67f1e9aec07187_u64); put(array, 0xecd8590680a3aa11_u64)
+      put(array, 0xda01ee641a708de9_u64); put(array, 0xe80e6f4820cc9495_u64)
+      put(array, 0x884134fe908658b2_u64); put(array, 0x3109058d147fdcdd_u64)
+      put(array, 0xaa51823e34a7eede_u64); put(array, 0xbd4b46f0599fd415_u64)
+      put(array, 0xd4e5e2cdc1d1ea96_u64); put(array, 0x6c9e18ac7007c91a_u64)
+      put(array, 0x850fadc09923329e_u64); put(array, 0x3e2cf6bc604ddb0_u64)
+      put(array, 0xa6539930bf6bff45_u64); put(array, 0x84db8346b786151c_u64)
+      put(array, 0xcfe87f7cef46ff16_u64); put(array, 0xe612641865679a63_u64)
+      put(array, 0x81f14fae158c5f6e_u64); put(array, 0x4fcb7e8f3f60c07e_u64)
+      put(array, 0xa26da3999aef7749_u64); put(array, 0xe3be5e330f38f09d_u64)
+      put(array, 0xcb090c8001ab551c_u64); put(array, 0x5cadf5bfd3072cc5_u64)
+      put(array, 0xfdcb4fa002162a63_u64); put(array, 0x73d9732fc7c8f7f6_u64)
+      put(array, 0x9e9f11c4014dda7e_u64); put(array, 0x2867e7fddcdd9afa_u64)
+      put(array, 0xc646d63501a1511d_u64); put(array, 0xb281e1fd541501b8_u64)
+      put(array, 0xf7d88bc24209a565_u64); put(array, 0x1f225a7ca91a4226_u64)
+      put(array, 0x9ae757596946075f_u64); put(array, 0x3375788de9b06958_u64)
+      put(array, 0xc1a12d2fc3978937_u64); put(array, 0x52d6b1641c83ae_u64)
+      put(array, 0xf209787bb47d6b84_u64); put(array, 0xc0678c5dbd23a49a_u64)
+      put(array, 0x9745eb4d50ce6332_u64); put(array, 0xf840b7ba963646e0_u64)
+      put(array, 0xbd176620a501fbff_u64); put(array, 0xb650e5a93bc3d898_u64)
+      put(array, 0xec5d3fa8ce427aff_u64); put(array, 0xa3e51f138ab4cebe_u64)
+      put(array, 0x93ba47c980e98cdf_u64); put(array, 0xc66f336c36b10137_u64)
+      put(array, 0xb8a8d9bbe123f017_u64); put(array, 0xb80b0047445d4184_u64)
+      put(array, 0xe6d3102ad96cec1d_u64); put(array, 0xa60dc059157491e5_u64)
+      put(array, 0x9043ea1ac7e41392_u64); put(array, 0x87c89837ad68db2f_u64)
+      put(array, 0xb454e4a179dd1877_u64); put(array, 0x29babe4598c311fb_u64)
+      put(array, 0xe16a1dc9d8545e94_u64); put(array, 0xf4296dd6fef3d67a_u64)
+      put(array, 0x8ce2529e2734bb1d_u64); put(array, 0x1899e4a65f58660c_u64)
+      put(array, 0xb01ae745b101e9e4_u64); put(array, 0x5ec05dcff72e7f8f_u64)
+      put(array, 0xdc21a1171d42645d_u64); put(array, 0x76707543f4fa1f73_u64)
+      put(array, 0x899504ae72497eba_u64); put(array, 0x6a06494a791c53a8_u64)
+      put(array, 0xabfa45da0edbde69_u64); put(array, 0x487db9d17636892_u64)
+      put(array, 0xd6f8d7509292d603_u64); put(array, 0x45a9d2845d3c42b6_u64)
+      put(array, 0x865b86925b9bc5c2_u64); put(array, 0xb8a2392ba45a9b2_u64)
+      put(array, 0xa7f26836f282b732_u64); put(array, 0x8e6cac7768d7141e_u64)
+      put(array, 0xd1ef0244af2364ff_u64); put(array, 0x3207d795430cd926_u64)
+      put(array, 0x8335616aed761f1f_u64); put(array, 0x7f44e6bd49e807b8_u64)
+      put(array, 0xa402b9c5a8d3a6e7_u64); put(array, 0x5f16206c9c6209a6_u64)
+      put(array, 0xcd036837130890a1_u64); put(array, 0x36dba887c37a8c0f_u64)
+      put(array, 0x802221226be55a64_u64); put(array, 0xc2494954da2c9789_u64)
+      put(array, 0xa02aa96b06deb0fd_u64); put(array, 0xf2db9baa10b7bd6c_u64)
+      put(array, 0xc83553c5c8965d3d_u64); put(array, 0x6f92829494e5acc7_u64)
+      put(array, 0xfa42a8b73abbf48c_u64); put(array, 0xcb772339ba1f17f9_u64)
+      put(array, 0x9c69a97284b578d7_u64); put(array, 0xff2a760414536efb_u64)
+      put(array, 0xc38413cf25e2d70d_u64); put(array, 0xfef5138519684aba_u64)
+      put(array, 0xf46518c2ef5b8cd1_u64); put(array, 0x7eb258665fc25d69_u64)
+      put(array, 0x98bf2f79d5993802_u64); put(array, 0xef2f773ffbd97a61_u64)
+      put(array, 0xbeeefb584aff8603_u64); put(array, 0xaafb550ffacfd8fa_u64)
+      put(array, 0xeeaaba2e5dbf6784_u64); put(array, 0x95ba2a53f983cf38_u64)
+      put(array, 0x952ab45cfa97a0b2_u64); put(array, 0xdd945a747bf26183_u64)
+      put(array, 0xba756174393d88df_u64); put(array, 0x94f971119aeef9e4_u64)
+      put(array, 0xe912b9d1478ceb17_u64); put(array, 0x7a37cd5601aab85d_u64)
+      put(array, 0x91abb422ccb812ee_u64); put(array, 0xac62e055c10ab33a_u64)
+      put(array, 0xb616a12b7fe617aa_u64); put(array, 0x577b986b314d6009_u64)
+      put(array, 0xe39c49765fdf9d94_u64); put(array, 0xed5a7e85fda0b80b_u64)
+      put(array, 0x8e41ade9fbebc27d_u64); put(array, 0x14588f13be847307_u64)
+      put(array, 0xb1d219647ae6b31c_u64); put(array, 0x596eb2d8ae258fc8_u64)
+      put(array, 0xde469fbd99a05fe3_u64); put(array, 0x6fca5f8ed9aef3bb_u64)
+      put(array, 0x8aec23d680043bee_u64); put(array, 0x25de7bb9480d5854_u64)
+      put(array, 0xada72ccc20054ae9_u64); put(array, 0xaf561aa79a10ae6a_u64)
+      put(array, 0xd910f7ff28069da4_u64); put(array, 0x1b2ba1518094da04_u64)
+      put(array, 0x87aa9aff79042286_u64); put(array, 0x90fb44d2f05d0842_u64)
+      put(array, 0xa99541bf57452b28_u64); put(array, 0x353a1607ac744a53_u64)
+      put(array, 0xd3fa922f2d1675f2_u64); put(array, 0x42889b8997915ce8_u64)
+      put(array, 0x847c9b5d7c2e09b7_u64); put(array, 0x69956135febada11_u64)
+      put(array, 0xa59bc234db398c25_u64); put(array, 0x43fab9837e699095_u64)
+      put(array, 0xcf02b2c21207ef2e_u64); put(array, 0x94f967e45e03f4bb_u64)
+      put(array, 0x8161afb94b44f57d_u64); put(array, 0x1d1be0eebac278f5_u64)
+      put(array, 0xa1ba1ba79e1632dc_u64); put(array, 0x6462d92a69731732_u64)
+      put(array, 0xca28a291859bbf93_u64); put(array, 0x7d7b8f7503cfdcfe_u64)
+      put(array, 0xfcb2cb35e702af78_u64); put(array, 0x5cda735244c3d43e_u64)
+      put(array, 0x9defbf01b061adab_u64); put(array, 0x3a0888136afa64a7_u64)
+      put(array, 0xc56baec21c7a1916_u64); put(array, 0x88aaa1845b8fdd0_u64)
+      put(array, 0xf6c69a72a3989f5b_u64); put(array, 0x8aad549e57273d45_u64)
+      put(array, 0x9a3c2087a63f6399_u64); put(array, 0x36ac54e2f678864b_u64)
+      put(array, 0xc0cb28a98fcf3c7f_u64); put(array, 0x84576a1bb416a7dd_u64)
+      put(array, 0xf0fdf2d3f3c30b9f_u64); put(array, 0x656d44a2a11c51d5_u64)
+      put(array, 0x969eb7c47859e743_u64); put(array, 0x9f644ae5a4b1b325_u64)
+      put(array, 0xbc4665b596706114_u64); put(array, 0x873d5d9f0dde1fee_u64)
+      put(array, 0xeb57ff22fc0c7959_u64); put(array, 0xa90cb506d155a7ea_u64)
+      put(array, 0x9316ff75dd87cbd8_u64); put(array, 0x9a7f12442d588f2_u64)
+      put(array, 0xb7dcbf5354e9bece_u64); put(array, 0xc11ed6d538aeb2f_u64)
+      put(array, 0xe5d3ef282a242e81_u64); put(array, 0x8f1668c8a86da5fa_u64)
+      put(array, 0x8fa475791a569d10_u64); put(array, 0xf96e017d694487bc_u64)
+      put(array, 0xb38d92d760ec4455_u64); put(array, 0x37c981dcc395a9ac_u64)
+      put(array, 0xe070f78d3927556a_u64); put(array, 0x85bbe253f47b1417_u64)
+      put(array, 0x8c469ab843b89562_u64); put(array, 0x93956d7478ccec8e_u64)
+      put(array, 0xaf58416654a6babb_u64); put(array, 0x387ac8d1970027b2_u64)
+      put(array, 0xdb2e51bfe9d0696a_u64); put(array, 0x6997b05fcc0319e_u64)
+      put(array, 0x88fcf317f22241e2_u64); put(array, 0x441fece3bdf81f03_u64)
+      put(array, 0xab3c2fddeeaad25a_u64); put(array, 0xd527e81cad7626c3_u64)
+      put(array, 0xd60b3bd56a5586f1_u64); put(array, 0x8a71e223d8d3b074_u64)
+      put(array, 0x85c7056562757456_u64); put(array, 0xf6872d5667844e49_u64)
+      put(array, 0xa738c6bebb12d16c_u64); put(array, 0xb428f8ac016561db_u64)
+      put(array, 0xd106f86e69d785c7_u64); put(array, 0xe13336d701beba52_u64)
+      put(array, 0x82a45b450226b39c_u64); put(array, 0xecc0024661173473_u64)
+      put(array, 0xa34d721642b06084_u64); put(array, 0x27f002d7f95d0190_u64)
+      put(array, 0xcc20ce9bd35c78a5_u64); put(array, 0x31ec038df7b441f4_u64)
+      put(array, 0xff290242c83396ce_u64); put(array, 0x7e67047175a15271_u64)
+      put(array, 0x9f79a169bd203e41_u64); put(array, 0xf0062c6e984d386_u64)
+      put(array, 0xc75809c42c684dd1_u64); put(array, 0x52c07b78a3e60868_u64)
+      put(array, 0xf92e0c3537826145_u64); put(array, 0xa7709a56ccdf8a82_u64)
+      put(array, 0x9bbcc7a142b17ccb_u64); put(array, 0x88a66076400bb691_u64)
+      put(array, 0xc2abf989935ddbfe_u64); put(array, 0x6acff893d00ea435_u64)
+      put(array, 0xf356f7ebf83552fe_u64); put(array, 0x583f6b8c4124d43_u64)
+      put(array, 0x98165af37b2153de_u64); put(array, 0xc3727a337a8b704a_u64)
+      put(array, 0xbe1bf1b059e9a8d6_u64); put(array, 0x744f18c0592e4c5c_u64)
+      put(array, 0xeda2ee1c7064130c_u64); put(array, 0x1162def06f79df73_u64)
+      put(array, 0x9485d4d1c63e8be7_u64); put(array, 0x8addcb5645ac2ba8_u64)
+      put(array, 0xb9a74a0637ce2ee1_u64); put(array, 0x6d953e2bd7173692_u64)
+      put(array, 0xe8111c87c5c1ba99_u64); put(array, 0xc8fa8db6ccdd0437_u64)
+      put(array, 0x910ab1d4db9914a0_u64); put(array, 0x1d9c9892400a22a2_u64)
+      put(array, 0xb54d5e4a127f59c8_u64); put(array, 0x2503beb6d00cab4b_u64)
+      put(array, 0xe2a0b5dc971f303a_u64); put(array, 0x2e44ae64840fd61d_u64)
+      put(array, 0x8da471a9de737e24_u64); put(array, 0x5ceaecfed289e5d2_u64)
+      put(array, 0xb10d8e1456105dad_u64); put(array, 0x7425a83e872c5f47_u64)
+      put(array, 0xdd50f1996b947518_u64); put(array, 0xd12f124e28f77719_u64)
+      put(array, 0x8a5296ffe33cc92f_u64); put(array, 0x82bd6b70d99aaa6f_u64)
+      put(array, 0xace73cbfdc0bfb7b_u64); put(array, 0x636cc64d1001550b_u64)
+      put(array, 0xd8210befd30efa5a_u64); put(array, 0x3c47f7e05401aa4e_u64)
+      put(array, 0x8714a775e3e95c78_u64); put(array, 0x65acfaec34810a71_u64)
+      put(array, 0xa8d9d1535ce3b396_u64); put(array, 0x7f1839a741a14d0d_u64)
+      put(array, 0xd31045a8341ca07c_u64); put(array, 0x1ede48111209a050_u64)
+      put(array, 0x83ea2b892091e44d_u64); put(array, 0x934aed0aab460432_u64)
+      put(array, 0xa4e4b66b68b65d60_u64); put(array, 0xf81da84d5617853f_u64)
+      put(array, 0xce1de40642e3f4b9_u64); put(array, 0x36251260ab9d668e_u64)
+      put(array, 0x80d2ae83e9ce78f3_u64); put(array, 0xc1d72b7c6b426019_u64)
+      put(array, 0xa1075a24e4421730_u64); put(array, 0xb24cf65b8612f81f_u64)
+      put(array, 0xc94930ae1d529cfc_u64); put(array, 0xdee033f26797b627_u64)
+      put(array, 0xfb9b7cd9a4a7443c_u64); put(array, 0x169840ef017da3b1_u64)
+      put(array, 0x9d412e0806e88aa5_u64); put(array, 0x8e1f289560ee864e_u64)
+      put(array, 0xc491798a08a2ad4e_u64); put(array, 0xf1a6f2bab92a27e2_u64)
+      put(array, 0xf5b5d7ec8acb58a2_u64); put(array, 0xae10af696774b1db_u64)
+      put(array, 0x9991a6f3d6bf1765_u64); put(array, 0xacca6da1e0a8ef29_u64)
+      put(array, 0xbff610b0cc6edd3f_u64); put(array, 0x17fd090a58d32af3_u64)
+      put(array, 0xeff394dcff8a948e_u64); put(array, 0xddfc4b4cef07f5b0_u64)
+      put(array, 0x95f83d0a1fb69cd9_u64); put(array, 0x4abdaf101564f98e_u64)
+      put(array, 0xbb764c4ca7a4440f_u64); put(array, 0x9d6d1ad41abe37f1_u64)
+      put(array, 0xea53df5fd18d5513_u64); put(array, 0x84c86189216dc5ed_u64)
+      put(array, 0x92746b9be2f8552c_u64); put(array, 0x32fd3cf5b4e49bb4_u64)
+      put(array, 0xb7118682dbb66a77_u64); put(array, 0x3fbc8c33221dc2a1_u64)
+      put(array, 0xe4d5e82392a40515_u64); put(array, 0xfabaf3feaa5334a_u64)
+      put(array, 0x8f05b1163ba6832d_u64); put(array, 0x29cb4d87f2a7400e_u64)
+      put(array, 0xb2c71d5bca9023f8_u64); put(array, 0x743e20e9ef511012_u64)
+      put(array, 0xdf78e4b2bd342cf6_u64); put(array, 0x914da9246b255416_u64)
+      put(array, 0x8bab8eefb6409c1a_u64); put(array, 0x1ad089b6c2f7548e_u64)
+      put(array, 0xae9672aba3d0c320_u64); put(array, 0xa184ac2473b529b1_u64)
+      put(array, 0xda3c0f568cc4f3e8_u64); put(array, 0xc9e5d72d90a2741e_u64)
+      put(array, 0x8865899617fb1871_u64); put(array, 0x7e2fa67c7a658892_u64)
+      put(array, 0xaa7eebfb9df9de8d_u64); put(array, 0xddbb901b98feeab7_u64)
+      put(array, 0xd51ea6fa85785631_u64); put(array, 0x552a74227f3ea565_u64)
+      put(array, 0x8533285c936b35de_u64); put(array, 0xd53a88958f87275f_u64)
+      put(array, 0xa67ff273b8460356_u64); put(array, 0x8a892abaf368f137_u64)
+      put(array, 0xd01fef10a657842c_u64); put(array, 0x2d2b7569b0432d85_u64)
+      put(array, 0x8213f56a67f6b29b_u64); put(array, 0x9c3b29620e29fc73_u64)
+      put(array, 0xa298f2c501f45f42_u64); put(array, 0x8349f3ba91b47b8f_u64)
+      put(array, 0xcb3f2f7642717713_u64); put(array, 0x241c70a936219a73_u64)
+      put(array, 0xfe0efb53d30dd4d7_u64); put(array, 0xed238cd383aa0110_u64)
+      put(array, 0x9ec95d1463e8a506_u64); put(array, 0xf4363804324a40aa_u64)
+      put(array, 0xc67bb4597ce2ce48_u64); put(array, 0xb143c6053edcd0d5_u64)
+      put(array, 0xf81aa16fdc1b81da_u64); put(array, 0xdd94b7868e94050a_u64)
+      put(array, 0x9b10a4e5e9913128_u64); put(array, 0xca7cf2b4191c8326_u64)
+      put(array, 0xc1d4ce1f63f57d72_u64); put(array, 0xfd1c2f611f63a3f0_u64)
+      put(array, 0xf24a01a73cf2dccf_u64); put(array, 0xbc633b39673c8cec_u64)
+      put(array, 0x976e41088617ca01_u64); put(array, 0xd5be0503e085d813_u64)
+      put(array, 0xbd49d14aa79dbc82_u64); put(array, 0x4b2d8644d8a74e18_u64)
+      put(array, 0xec9c459d51852ba2_u64); put(array, 0xddf8e7d60ed1219e_u64)
+      put(array, 0x93e1ab8252f33b45_u64); put(array, 0xcabb90e5c942b503_u64)
+      put(array, 0xb8da1662e7b00a17_u64); put(array, 0x3d6a751f3b936243_u64)
+      put(array, 0xe7109bfba19c0c9d_u64); put(array, 0xcc512670a783ad4_u64)
+      put(array, 0x906a617d450187e2_u64); put(array, 0x27fb2b80668b24c5_u64)
+      put(array, 0xb484f9dc9641e9da_u64); put(array, 0xb1f9f660802dedf6_u64)
+      put(array, 0xe1a63853bbd26451_u64); put(array, 0x5e7873f8a0396973_u64)
+      put(array, 0x8d07e33455637eb2_u64); put(array, 0xdb0b487b6423e1e8_u64)
+      put(array, 0xb049dc016abc5e5f_u64); put(array, 0x91ce1a9a3d2cda62_u64)
+      put(array, 0xdc5c5301c56b75f7_u64); put(array, 0x7641a140cc7810fb_u64)
+      put(array, 0x89b9b3e11b6329ba_u64); put(array, 0xa9e904c87fcb0a9d_u64)
+      put(array, 0xac2820d9623bf429_u64); put(array, 0x546345fa9fbdcd44_u64)
+      put(array, 0xd732290fbacaf133_u64); put(array, 0xa97c177947ad4095_u64)
+      put(array, 0x867f59a9d4bed6c0_u64); put(array, 0x49ed8eabcccc485d_u64)
+      put(array, 0xa81f301449ee8c70_u64); put(array, 0x5c68f256bfff5a74_u64)
+      put(array, 0xd226fc195c6a2f8c_u64); put(array, 0x73832eec6fff3111_u64)
+      put(array, 0x83585d8fd9c25db7_u64); put(array, 0xc831fd53c5ff7eab_u64)
+      put(array, 0xa42e74f3d032f525_u64); put(array, 0xba3e7ca8b77f5e55_u64)
+      put(array, 0xcd3a1230c43fb26f_u64); put(array, 0x28ce1bd2e55f35eb_u64)
+      put(array, 0x80444b5e7aa7cf85_u64); put(array, 0x7980d163cf5b81b3_u64)
+      put(array, 0xa0555e361951c366_u64); put(array, 0xd7e105bcc332621f_u64)
+      put(array, 0xc86ab5c39fa63440_u64); put(array, 0x8dd9472bf3fefaa7_u64)
+      put(array, 0xfa856334878fc150_u64); put(array, 0xb14f98f6f0feb951_u64)
+      put(array, 0x9c935e00d4b9d8d2_u64); put(array, 0x6ed1bf9a569f33d3_u64)
+      put(array, 0xc3b8358109e84f07_u64); put(array, 0xa862f80ec4700c8_u64)
+      put(array, 0xf4a642e14c6262c8_u64); put(array, 0xcd27bb612758c0fa_u64)
+      put(array, 0x98e7e9cccfbd7dbd_u64); put(array, 0x8038d51cb897789c_u64)
+      put(array, 0xbf21e44003acdd2c_u64); put(array, 0xe0470a63e6bd56c3_u64)
+      put(array, 0xeeea5d5004981478_u64); put(array, 0x1858ccfce06cac74_u64)
+      put(array, 0x95527a5202df0ccb_u64); put(array, 0xf37801e0c43ebc8_u64)
+      put(array, 0xbaa718e68396cffd_u64); put(array, 0xd30560258f54e6ba_u64)
+      put(array, 0xe950df20247c83fd_u64); put(array, 0x47c6b82ef32a2069_u64)
+      put(array, 0x91d28b7416cdd27e_u64); put(array, 0x4cdc331d57fa5441_u64)
+      put(array, 0xb6472e511c81471d_u64); put(array, 0xe0133fe4adf8e952_u64)
+      put(array, 0xe3d8f9e563a198e5_u64); put(array, 0x58180fddd97723a6_u64)
+      put(array, 0x8e679c2f5e44ff8f_u64); put(array, 0x570f09eaa7ea7648_u64)
+      array
+    end
+  end
+end
diff --git a/src/float/fast_float/float_common.cr b/src/float/fast_float/float_common.cr
new file mode 100644
index 000000000000..a66dc99f82f7
--- /dev/null
+++ b/src/float/fast_float/float_common.cr
@@ -0,0 +1,294 @@
+module Float::FastFloat
+  @[Flags]
+  enum CharsFormat
+    Scientific = 1 << 0
+    Fixed      = 1 << 2
+    Hex        = 1 << 3
+    NoInfnan   = 1 << 4
+    JsonFmt    = 1 << 5
+    FortranFmt = 1 << 6
+
+    # RFC 8259: https://datatracker.ietf.org/doc/html/rfc8259#section-6
+    Json = JsonFmt | Fixed | Scientific | NoInfnan
+
+    # Extension of RFC 8259 where, e.g., "inf" and "nan" are allowed.
+    JsonOrInfnan = JsonFmt | Fixed | Scientific
+
+    Fortran = FortranFmt | Fixed | Scientific
+    General = Fixed | Scientific
+  end
+
+  # NOTE(crystal): uses `Errno` to represent C++'s `std::errc`
+  record FromCharsResultT(UC), ptr : UC*, ec : Errno
+
+  alias FromCharsResult = FromCharsResultT(UInt8)
+
+  record ParseOptionsT(UC), format : CharsFormat = :general, decimal_point : UC = 0x2E # '.'.ord
+
+  alias ParseOptions = ParseOptionsT(UInt8)
+
+  # rust style `try!()` macro, or `?` operator
+  macro fastfloat_try(x)
+    unless {{ x }}
+      return false
+    end
+  end
+
+  # Compares two ASCII strings in a case insensitive manner.
+  def self.fastfloat_strncasecmp(input1 : UC*, input2 : UC*, length : Int) : Bool forall UC
+    running_diff = 0_u8
+    length.times do |i|
+      running_diff |= input1[i].to_u8! ^ input2[i].to_u8!
+    end
+    running_diff.in?(0_u8, 32_u8)
+  end
+
+  record Value128, low : UInt64, high : UInt64 do
+    def self.new(x : UInt128) : self
+      new(low: x.to_u64!, high: x.unsafe_shr(64).to_u64!)
+    end
+  end
+
+  struct AdjustedMantissa
+    property mantissa : UInt64
+    property power2 : Int32
+
+    def initialize(@mantissa : UInt64 = 0, @power2 : Int32 = 0)
+    end
+  end
+
+  INVALID_AM_BIAS = -0x8000
+
+  CONSTANT_55555 = 3125_u64
+
+  module BinaryFormat(T, EquivUint)
+  end
+
+  struct BinaryFormat_Float64
+    include BinaryFormat(Float64, UInt64)
+
+    POWERS_OF_TEN = [
+      1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11,
+      1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22,
+    ]
+
+    # Largest integer value v so that (5**index * v) <= 1<<53.
+    # 0x20000000000000 == 1 << 53
+    MAX_MANTISSA = [
+      0x20000000000000_u64,
+      0x20000000000000_u64.unsafe_div(5),
+      0x20000000000000_u64.unsafe_div(5 * 5),
+      0x20000000000000_u64.unsafe_div(5 * 5 * 5),
+      0x20000000000000_u64.unsafe_div(5 * 5 * 5 * 5),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * 5),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * 5 * 5),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * 5 * 5 * 5),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * 5 * 5 * 5 * 5),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * 5),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * 5 * 5),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * 5 * 5 * 5),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * 5),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * 5 * 5),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * 5 * 5 * 5),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * 5 * 5 * 5 * 5),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * 5),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * 5 * 5),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * 5 * 5 * 5),
+      0x20000000000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * CONSTANT_55555 * 5 * 5 * 5 * 5),
+    ]
+
+    def min_exponent_fast_path : Int32
+      -22
+    end
+
+    def mantissa_explicit_bits : Int32
+      52
+    end
+
+    def max_exponent_round_to_even : Int32
+      23
+    end
+
+    def min_exponent_round_to_even : Int32
+      -4
+    end
+
+    def minimum_exponent : Int32
+      -1023
+    end
+
+    def infinite_power : Int32
+      0x7FF
+    end
+
+    def sign_index : Int32
+      63
+    end
+
+    def max_exponent_fast_path : Int32
+      22
+    end
+
+    def max_mantissa_fast_path : UInt64
+      0x20000000000000_u64
+    end
+
+    def max_mantissa_fast_path(power : Int64) : UInt64
+      # caller is responsible to ensure that
+      # power >= 0 && power <= 22
+      MAX_MANTISSA.unsafe_fetch(power)
+    end
+
+    def exact_power_of_ten(power : Int64) : Float64
+      POWERS_OF_TEN.unsafe_fetch(power)
+    end
+
+    def largest_power_of_ten : Int32
+      308
+    end
+
+    def smallest_power_of_ten : Int32
+      -342
+    end
+
+    def max_digits : Int32
+      769
+    end
+
+    def exponent_mask : EquivUint
+      0x7FF0000000000000_u64
+    end
+
+    def mantissa_mask : EquivUint
+      0x000FFFFFFFFFFFFF_u64
+    end
+
+    def hidden_bit_mask : EquivUint
+      0x0010000000000000_u64
+    end
+  end
+
+  struct BinaryFormat_Float32
+    include BinaryFormat(Float32, UInt32)
+
+    POWERS_OF_TEN = [
+      1e0f32, 1e1f32, 1e2f32, 1e3f32, 1e4f32, 1e5f32, 1e6f32, 1e7f32, 1e8f32, 1e9f32, 1e10f32,
+    ]
+
+    # Largest integer value v so that (5**index * v) <= 1<<24.
+    # 0x1000000 == 1<<24
+    MAX_MANTISSA = [
+      0x1000000_u64,
+      0x1000000_u64.unsafe_div(5),
+      0x1000000_u64.unsafe_div(5 * 5),
+      0x1000000_u64.unsafe_div(5 * 5 * 5),
+      0x1000000_u64.unsafe_div(5 * 5 * 5 * 5),
+      0x1000000_u64.unsafe_div(CONSTANT_55555),
+      0x1000000_u64.unsafe_div(CONSTANT_55555 * 5),
+      0x1000000_u64.unsafe_div(CONSTANT_55555 * 5 * 5),
+      0x1000000_u64.unsafe_div(CONSTANT_55555 * 5 * 5 * 5),
+      0x1000000_u64.unsafe_div(CONSTANT_55555 * 5 * 5 * 5 * 5),
+      0x1000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555),
+      0x1000000_u64.unsafe_div(CONSTANT_55555 * CONSTANT_55555 * 5),
+    ]
+
+    def min_exponent_fast_path : Int32
+      -10
+    end
+
+    def mantissa_explicit_bits : Int32
+      23
+    end
+
+    def max_exponent_round_to_even : Int32
+      10
+    end
+
+    def min_exponent_round_to_even : Int32
+      -17
+    end
+
+    def minimum_exponent : Int32
+      -127
+    end
+
+    def infinite_power : Int32
+      0xFF
+    end
+
+    def sign_index : Int32
+      31
+    end
+
+    def max_exponent_fast_path : Int32
+      10
+    end
+
+    def max_mantissa_fast_path : UInt64
+      0x1000000_u64
+    end
+
+    def max_mantissa_fast_path(power : Int64) : UInt64
+      # caller is responsible to ensure that
+      # power >= 0 && power <= 10
+      MAX_MANTISSA.unsafe_fetch(power)
+    end
+
+    def exact_power_of_ten(power : Int64) : Float32
+      POWERS_OF_TEN.unsafe_fetch(power)
+    end
+
+    def largest_power_of_ten : Int32
+      38
+    end
+
+    def smallest_power_of_ten : Int32
+      -64
+    end
+
+    def max_digits : Int32
+      114
+    end
+
+    def exponent_mask : EquivUint
+      0x7F800000_u32
+    end
+
+    def mantissa_mask : EquivUint
+      0x007FFFFF_u32
+    end
+
+    def hidden_bit_mask : EquivUint
+      0x00800000_u32
+    end
+  end
+
+  module BinaryFormat(T, EquivUint)
+    # NOTE(crystal): returns the new *value* by value
+    def to_float(negative : Bool, am : AdjustedMantissa) : T
+      word = EquivUint.new!(am.mantissa)
+      word |= EquivUint.new!(am.power2).unsafe_shl(mantissa_explicit_bits)
+      word |= EquivUint.new!(negative ? 1 : 0).unsafe_shl(sign_index)
+      word.unsafe_as(T)
+    end
+  end
+
+  def self.int_cmp_zeros(uc : UC.class) : UInt64 forall UC
+    case sizeof(UC)
+    when 1
+      0x3030303030303030_u64
+    when 2
+      0x0030003000300030_u64
+    else
+      0x0000003000000030_u64
+    end
+  end
+
+  def self.int_cmp_len(uc : UC.class) : Int32 forall UC
+    sizeof(UInt64).unsafe_div(sizeof(UC))
+  end
+end
diff --git a/src/float/fast_float/parse_number.cr b/src/float/fast_float/parse_number.cr
new file mode 100644
index 000000000000..3c1ac4c1cb24
--- /dev/null
+++ b/src/float/fast_float/parse_number.cr
@@ -0,0 +1,197 @@
+require "./ascii_number"
+require "./decimal_to_binary"
+require "./digit_comparison"
+require "./float_common"
+
+module Float::FastFloat
+  module Detail
+    def self.parse_infnan(first : UC*, last : UC*, value : T*) : FromCharsResultT(UC) forall T, UC
+      ptr = first
+      ec = Errno::NONE # be optimistic
+      minus_sign = false
+      if first.value === '-' # assume first < last, so dereference without checks
+        minus_sign = true
+        first += 1
+      elsif first.value === '+'
+        first += 1
+      end
+
+      if last - first >= 3
+        if FastFloat.fastfloat_strncasecmp(first, "nan".to_unsafe, 3)
+          first += 3
+          ptr = first
+          value.value = minus_sign ? -T::NAN : T::NAN
+          # Check for possible nan(n-char-seq-opt), C++17 20.19.3.7,
+          # C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan).
+          if first != last && first.value === '('
+            ptr2 = first + 1
+            while ptr2 != last
+              case ptr2.value.unsafe_chr
+              when ')'
+                ptr = ptr2 + 1 # valid nan(n-char-seq-opt)
+                break
+              when 'a'..'z', 'A'..'Z', '0'..'9', '_'
+                # Do nothing
+              else
+                break # forbidden char, not nan(n-char-seq-opt)
+              end
+              ptr2 += 1
+            end
+          end
+          return FromCharsResultT(UC).new(ptr, ec)
+        end
+      end
+      if FastFloat.fastfloat_strncasecmp(first, "inf".to_unsafe, 3)
+        if last - first >= 8 && FastFloat.fastfloat_strncasecmp(first + 3, "inity".to_unsafe, 5)
+          ptr = first + 8
+        else
+          ptr = first + 3
+        end
+        value.value = minus_sign ? -T::INFINITY : T::INFINITY
+        return FromCharsResultT(UC).new(ptr, ec)
+      end
+
+      ec = Errno::EINVAL
+      FromCharsResultT(UC).new(ptr, ec)
+    end
+
+    # See
+    # A fast function to check your floating-point rounding mode
+    # https://lemire.me/blog/2022/11/16/a-fast-function-to-check-your-floating-point-rounding-mode/
+    #
+    # This function is meant to be equivalent to :
+    # prior: #include <cfenv>
+    #  return fegetround() == FE_TONEAREST;
+    # However, it is expected to be much faster than the fegetround()
+    # function call.
+    #
+    # NOTE(crystal): uses a pointer instead of a volatile variable to prevent
+    # LLVM optimization
+    @@fmin : Float32* = Pointer(Float32).malloc(1, Float32::MIN_POSITIVE)
+
+    # Returns true if the floating-pointing rounding mode is to 'nearest'.
+    # It is the default on most system. This function is meant to be inexpensive.
+    # Credit : @mwalcott3
+    def self.rounds_to_nearest? : Bool
+      fmin = @@fmin.value # we copy it so that it gets loaded at most once.
+
+      # Explanation:
+      # Only when fegetround() == FE_TONEAREST do we have that
+      # fmin + 1.0f == 1.0f - fmin.
+      #
+      # FE_UPWARD:
+      #  fmin + 1.0f > 1
+      #  1.0f - fmin == 1
+      #
+      # FE_DOWNWARD or  FE_TOWARDZERO:
+      #  fmin + 1.0f == 1
+      #  1.0f - fmin < 1
+      #
+      # Note: This may fail to be accurate if fast-math has been
+      # enabled, as rounding conventions may not apply.
+      fmin + 1.0_f32 == 1.0_f32 - fmin
+    end
+  end
+
+  module BinaryFormat(T, EquivUint)
+    def from_chars_advanced(pns : ParsedNumberStringT(UC), value : T*) : FromCharsResultT(UC) forall UC
+      {% raise "only some floating-point types are supported" unless T == Float32 || T == Float64 %}
+
+      # TODO(crystal): support UInt16 and UInt32
+      {% raise "only UInt8 is supported" unless UC == UInt8 %}
+
+      ec = Errno::NONE # be optimistic
+      ptr = pns.lastmatch
+      # The implementation of the Clinger's fast path is convoluted because
+      # we want round-to-nearest in all cases, irrespective of the rounding mode
+      # selected on the thread.
+      # We proceed optimistically, assuming that detail::rounds_to_nearest()
+      # returns true.
+      if (min_exponent_fast_path <= pns.exponent <= max_exponent_fast_path) && !pns.too_many_digits
+        # Unfortunately, the conventional Clinger's fast path is only possible
+        # when the system rounds to the nearest float.
+        #
+        # We expect the next branch to almost always be selected.
+        # We could check it first (before the previous branch), but
+        # there might be performance advantages at having the check
+        # be last.
+        if Detail.rounds_to_nearest?
+          # We have that fegetround() == FE_TONEAREST.
+          # Next is Clinger's fast path.
+          if pns.mantissa <= max_mantissa_fast_path
+            if pns.mantissa == 0
+              value.value = pns.negative ? T.new(-0.0) : T.new(0.0)
+              return FromCharsResultT(UC).new(ptr, ec)
+            end
+            value.value = T.new(pns.mantissa)
+            if pns.exponent < 0
+              value.value /= exact_power_of_ten(0_i64 &- pns.exponent)
+            else
+              value.value *= exact_power_of_ten(pns.exponent)
+            end
+            if pns.negative
+              value.value = -value.value
+            end
+            return FromCharsResultT(UC).new(ptr, ec)
+          end
+        else
+          # We do not have that fegetround() == FE_TONEAREST.
+          # Next is a modified Clinger's fast path, inspired by Jakub Jelínek's
+          # proposal
+          if pns.exponent >= 0 && pns.mantissa <= max_mantissa_fast_path(pns.exponent)
+            # Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD
+            if pns.mantissa == 0
+              value.value = pns.negative ? T.new(-0.0) : T.new(0.0)
+              return FromCharsResultT(UC).new(ptr, ec)
+            end
+            value.value = T.new(pns.mantissa) * exact_power_of_ten(pns.exponent)
+            if pns.negative
+              value.value = -value.value
+            end
+            return FromCharsResultT(UC).new(ptr, ec)
+          end
+        end
+      end
+      am = compute_float(pns.exponent, pns.mantissa)
+      if pns.too_many_digits && am.power2 >= 0
+        if am != compute_float(pns.exponent, pns.mantissa &+ 1)
+          am = compute_error(pns.exponent, pns.mantissa)
+        end
+      end
+      # If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa)
+      # and we have an invalid power (am.power2 < 0), then we need to go the long
+      # way around again. This is very uncommon.
+      if am.power2 < 0
+        am = digit_comp(pns, am)
+      end
+      value.value = to_float(pns.negative, am)
+      # Test for over/underflow.
+      if (pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) || am.power2 == infinite_power
+        ec = Errno::ERANGE
+      end
+      FromCharsResultT(UC).new(ptr, ec)
+    end
+
+    def from_chars_advanced(first : UC*, last : UC*, value : T*, options : ParseOptionsT(UC)) : FromCharsResultT(UC) forall UC
+      {% raise "only some floating-point types are supported" unless T == Float32 || T == Float64 %}
+
+      # TODO(crystal): support UInt16 and UInt32
+      {% raise "only UInt8 is supported" unless UC == UInt8 %}
+
+      if first == last
+        return FromCharsResultT(UC).new(first, Errno::EINVAL)
+      end
+      pns = FastFloat.parse_number_string(first, last, options)
+      if !pns.valid
+        if options.format.no_infnan?
+          return FromCharsResultT(UC).new(first, Errno::EINVAL)
+        else
+          return Detail.parse_infnan(first, last, value)
+        end
+      end
+
+      # call overload that takes parsed_number_string_t directly.
+      from_chars_advanced(pns, value)
+    end
+  end
+end
diff --git a/src/lib_c/x86_64-windows-msvc/c/stdlib.cr b/src/lib_c/x86_64-windows-msvc/c/stdlib.cr
index 63c38003fd6a..140e49a229a7 100644
--- a/src/lib_c/x86_64-windows-msvc/c/stdlib.cr
+++ b/src/lib_c/x86_64-windows-msvc/c/stdlib.cr
@@ -11,13 +11,13 @@ lib LibC
   fun free(ptr : Void*) : Void
   fun malloc(size : SizeT) : Void*
   fun realloc(ptr : Void*, size : SizeT) : Void*
-  fun strtof(nptr : Char*, endptr : Char**) : Float
-  fun strtod(nptr : Char*, endptr : Char**) : Double
 
   alias InvalidParameterHandler = WCHAR*, WCHAR*, WCHAR*, UInt, UIntPtrT ->
   fun _set_invalid_parameter_handler(pNew : InvalidParameterHandler) : InvalidParameterHandler
 
   # unused
+  fun strtof(nptr : Char*, endptr : Char**) : Float
+  fun strtod(nptr : Char*, endptr : Char**) : Double
   fun atof(nptr : Char*) : Double
   fun div(numer : Int, denom : Int) : DivT
   fun putenv(string : Char*) : Int
diff --git a/src/string.cr b/src/string.cr
index d47e87638976..9bc9d0c22701 100644
--- a/src/string.cr
+++ b/src/string.cr
@@ -1,9 +1,9 @@
-require "c/stdlib"
 require "c/string"
 require "crystal/small_deque"
 {% unless flag?(:without_iconv) %}
   require "crystal/iconv"
 {% end %}
+require "float/fast_float"
 
 # A `String` represents an immutable sequence of UTF-8 characters.
 #
@@ -738,10 +738,7 @@ class String
 
   # :ditto:
   def to_f64?(whitespace : Bool = true, strict : Bool = true) : Float64?
-    to_f_impl(whitespace: whitespace, strict: strict) do
-      v = LibC.strtod self, out endptr
-      {v, endptr}
-    end
+    Float::FastFloat.to_f64?(self, whitespace, strict)
   end
 
   # Same as `#to_f` but returns a Float32.
@@ -751,59 +748,7 @@ class String
 
   # Same as `#to_f?` but returns a Float32.
   def to_f32?(whitespace : Bool = true, strict : Bool = true) : Float32?
-    to_f_impl(whitespace: whitespace, strict: strict) do
-      v = LibC.strtof self, out endptr
-      {v, endptr}
-    end
-  end
-
-  private def to_f_impl(whitespace : Bool = true, strict : Bool = true, &)
-    return unless first_char = self[0]?
-    return unless whitespace || '0' <= first_char <= '9' || first_char.in?('-', '+', 'i', 'I', 'n', 'N')
-
-    v, endptr = yield
-
-    unless v.finite?
-      startptr = to_unsafe
-      if whitespace
-        while startptr.value.unsafe_chr.ascii_whitespace?
-          startptr += 1
-        end
-      end
-      if startptr.value.unsafe_chr.in?('+', '-')
-        startptr += 1
-      end
-
-      if v.nan?
-        return unless startptr.value.unsafe_chr.in?('n', 'N')
-      else
-        return unless startptr.value.unsafe_chr.in?('i', 'I')
-      end
-    end
-
-    string_end = to_unsafe + bytesize
-
-    # blank string
-    return if endptr == to_unsafe
-
-    if strict
-      if whitespace
-        while endptr < string_end && endptr.value.unsafe_chr.ascii_whitespace?
-          endptr += 1
-        end
-      end
-      # reached the end of the string
-      v if endptr == string_end
-    else
-      ptr = to_unsafe
-      if whitespace
-        while ptr < string_end && ptr.value.unsafe_chr.ascii_whitespace?
-          ptr += 1
-        end
-      end
-      # consumed some bytes
-      v if endptr > ptr
-    end
+    Float::FastFloat.to_f32?(self, whitespace, strict)
   end
 
   # Returns the `Char` at the given *index*.
@@ -2166,7 +2111,8 @@ class String
     remove_excess_left(excess_left)
   end
 
-  private def calc_excess_right
+  # :nodoc:
+  def calc_excess_right
     if single_byte_optimizable?
       i = bytesize - 1
       while i >= 0 && to_unsafe[i].unsafe_chr.ascii_whitespace?
@@ -2204,7 +2150,8 @@ class String
     bytesize - byte_index
   end
 
-  private def calc_excess_left
+  # :nodoc:
+  def calc_excess_left
     if single_byte_optimizable?
       excess_left = 0
       # All strings end with '\0', and it's not a whitespace