Merge branch 'master' into fix-15269

crystal-lang · Dec 22, 2024 · a7321e8 · a7321e8
2 parents 6129001 + c5455ce
commit a7321e8
Show file tree

Hide file tree

Showing 14 changed files with 2,859 additions and 62 deletions.
diff --git a/spec/manual/string_to_f32_spec.cr b/spec/manual/string_to_f32_spec.cr
@@ -0,0 +1,27 @@
+require "spec"
+
+# Exhaustively checks that for all 4294967296 possible `Float32` values,
+# `to_s.to_f32` returns the original number. Splits the floats into 4096 bins
+# for better progress tracking. Also useful as a sort of benchmark.
+#
+# This was originally added when `String#to_f` moved from `LibC.strtod` to
+# `fast_float`, but is applicable to any other implementation as well.
+describe "x.to_s.to_f32 == x" do
+  (0_u32..0xFFF_u32).each do |i|
+    it "%03x00000..%03xfffff" % {i, i} do
+      0x100000.times do |j|
+        bits = i << 20 | j
+        float = bits.unsafe_as(Float32)
+        str = float.to_s
+        val = str.to_f32?.should_not be_nil
+
+        if float.nan?
+          val.nan?.should be_true
+        else
+          val.should eq(float)
+          Math.copysign(1, val).should eq(Math.copysign(1, float))
+        end
+      end
+    end
+  end
+end
diff --git a/spec/manual/string_to_f_supplemental_spec.cr b/spec/manual/string_to_f_supplemental_spec.cr
@@ -0,0 +1,103 @@
+# Runs the fast_float supplemental test suite:
+# https://github.com/fastfloat/supplemental_test_files
+#
+#   Supplemental data files for testing floating parsing (credit: Nigel Tao for
+#   the data)
+#
+#   LICENSE file (Apache 2): https://github.com/nigeltao/parse-number-fxx-test-data/blob/main/LICENSE
+#
+# Due to the sheer volume of the test cases (5.2+ million test cases across
+# 270+ MB of text) these specs are not vendored into the Crystal repository.
+
+require "spec"
+require "http/client"
+require "../support/number"
+require "wait_group"
+
+# these specs permit underflow and overflow to return 0 and infinity
+# respectively (when `ret.rc == Errno::ERANGE`), so we have to use
+# `Float::FastFloat` directly
+def fast_float_to_f32(str)
+  value = uninitialized Float32
+  start = str.to_unsafe
+  finish = start + str.bytesize
+  options = Float::FastFloat::ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general)
+
+  ret = Float::FastFloat::BinaryFormat_Float32.new.from_chars_advanced(start, finish, pointerof(value), options)
+  {Errno::NONE, Errno::ERANGE}.should contain(ret.ec)
+  value
+end
+
+def fast_float_to_f64(str)
+  value = uninitialized Float64
+  start = str.to_unsafe
+  finish = start + str.bytesize
+  options = Float::FastFloat::ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general)
+
+  ret = Float::FastFloat::BinaryFormat_Float64.new.from_chars_advanced(start, finish, pointerof(value), options)
+  {Errno::NONE, Errno::ERANGE}.should contain(ret.ec)
+  value
+end
+
+RAW_BASE_URL = "https://raw.githubusercontent.com/fastfloat/supplemental_test_files/7cc512a7c60361ebe1baf54991d7905efdc62aa0/data/" # @1.0.0
+
+TEST_SUITES = %w(
+  freetype-2-7.txt
+  google-double-conversion.txt
+  google-wuffs.txt
+  ibm-fpgen.txt
+  lemire-fast-double-parser.txt
+  lemire-fast-float.txt
+  more-test-cases.txt
+  remyoudompheng-fptest-0.txt
+  remyoudompheng-fptest-1.txt
+  remyoudompheng-fptest-2.txt
+  remyoudompheng-fptest-3.txt
+  tencent-rapidjson.txt
+  ulfjack-ryu.txt
+)
+
+test_suite_cache = {} of String => Array({UInt32, UInt64, String})
+puts "Fetching #{TEST_SUITES.size} test suites"
+WaitGroup.wait do |wg|
+  TEST_SUITES.each do |suite|
+    wg.spawn do
+      url = RAW_BASE_URL + suite
+
+      cache = HTTP::Client.get(url) do |res|
+        res.body_io.each_line.map do |line|
+          args = line.split(' ')
+          raise "BUG: should have 4 args" unless args.size == 4
+
+          # f16_bits = args[0].to_u16(16)
+          f32_bits = args[1].to_u32(16)
+          f64_bits = args[2].to_u64(16)
+          str = args[3]
+
+          {f32_bits, f64_bits, str}
+        end.to_a
+      end
+
+      puts "#{cache.size} test cases cached from #{url}"
+      test_suite_cache[suite] = cache
+    end
+  end
+end
+puts "There are a total of #{test_suite_cache.sum(&.last.size)} test cases"
+
+describe String do
+  describe "#to_f" do
+    test_suite_cache.each do |suite, cache|
+      describe suite do
+        each_hardware_rounding_mode do |mode, mode_name|
+          it mode_name do
+            cache.each do |f32_bits, f64_bits, str|
+              fast_float_to_f32(str).unsafe_as(UInt32).should eq(f32_bits)
+              fast_float_to_f64(str).unsafe_as(UInt64).should eq(f64_bits)
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr
@@ -482,6 +482,7 @@ describe "String" do
     it { "1Y2P0IJ32E8E7".to_i64(36).should eq(9223372036854775807) }
   end
 
+  # more specs are available in `spec/manual/string_to_f_supplemental_spec.cr`
   it "does to_f" do
     expect_raises(ArgumentError) { "".to_f }
     "".to_f?.should be_nil
@@ -503,6 +504,7 @@ describe "String" do
     "  1234.56  ".to_f?(whitespace: false).should be_nil
     expect_raises(ArgumentError) { "  1234.56foo".to_f }
     "  1234.56foo".to_f?.should be_nil
+    "\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f.should eq(1234.56_f64)
     "123.45 x".to_f64(strict: false).should eq(123.45_f64)
     expect_raises(ArgumentError) { "x1.2".to_f64 }
     "x1.2".to_f64?.should be_nil
@@ -547,6 +549,7 @@ describe "String" do
     "  1234.56  ".to_f32?(whitespace: false).should be_nil
     expect_raises(ArgumentError) { "  1234.56foo".to_f32 }
     "  1234.56foo".to_f32?.should be_nil
+    "\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f32.should eq(1234.56_f32)
     "123.45 x".to_f32(strict: false).should eq(123.45_f32)
     expect_raises(ArgumentError) { "x1.2".to_f32 }
     "x1.2".to_f32?.should be_nil
@@ -590,6 +593,7 @@ describe "String" do
     "  1234.56  ".to_f64?(whitespace: false).should be_nil
     expect_raises(ArgumentError) { "  1234.56foo".to_f64 }
     "  1234.56foo".to_f64?.should be_nil
+    "\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f64.should eq(1234.56_f64)
     "123.45 x".to_f64(strict: false).should eq(123.45_f64)
     expect_raises(ArgumentError) { "x1.2".to_f64 }
     "x1.2".to_f64?.should be_nil

diff --git a/spec/support/number.cr b/spec/support/number.cr
@@ -94,3 +94,35 @@ macro hexfloat(str)
     ::Float64.parse_hexfloat({{ str }})
   {% end %}
 end
+
+# See also: https://github.com/crystal-lang/crystal/issues/15192
+lib LibC
+  {% if flag?(:win32) %}
+    FE_TONEAREST  = 0x00000000
+    FE_DOWNWARD   = 0x00000100
+    FE_UPWARD     = 0x00000200
+    FE_TOWARDZERO = 0x00000300
+  {% else %}
+    FE_TONEAREST  = 0x00000000
+    FE_DOWNWARD   = 0x00000400
+    FE_UPWARD     = 0x00000800
+    FE_TOWARDZERO = 0x00000C00
+  {% end %}
+
+  fun fegetround : Int
+  fun fesetround(round : Int) : Int
+end
+
+def with_hardware_rounding_mode(mode, &)
+  old_mode = LibC.fegetround
+  LibC.fesetround(mode)
+  yield ensure LibC.fesetround(old_mode)
+end
+
+def each_hardware_rounding_mode(&)
+  {% for mode in %w(FE_TONEAREST FE_DOWNWARD FE_UPWARD FE_TOWARDZERO) %}
+    with_hardware_rounding_mode(LibC::{{ mode.id }}) do
+      yield LibC::{{ mode.id }}, {{ mode }}
+    end
+  {% end %}
+end
diff --git a/src/float/fast_float.cr b/src/float/fast_float.cr
@@ -0,0 +1,75 @@
+struct Float
+  # :nodoc:
+  # Source port of the floating-point part of fast_float for C++:
+  # https://github.com/fastfloat/fast_float
+  #
+  # fast_float implements the C++17 `std::from_chars`, which accepts a subset of
+  # the C `strtod` / `strtof`'s string format:
+  #
+  # - a leading plus sign is disallowed, but both fast_float and this port
+  #   accept it;
+  # - the exponent may be required or disallowed, depending on the format
+  #   argument (this port always allows both);
+  # - hexfloats are not enabled by default, and fast_float doesn't implement it;
+  #   (https://github.com/fastfloat/fast_float/issues/124)
+  # - hexfloats cannot start with `0x` or `0X`.
+  #
+  # The following is their license:
+  #
+  #   Licensed under either of Apache License, Version 2.0 or MIT license or
+  #   BOOST license.
+  #
+  #   Unless you explicitly state otherwise, any contribution intentionally
+  #   submitted for inclusion in this repository by you, as defined in the
+  #   Apache-2.0 license, shall be triple licensed as above, without any
+  #   additional terms or conditions.
+  #
+  # Main differences from the original fast_float:
+  #
+  # - Only `UC == UInt8` is implemented and tested, not the other wide chars;
+  # - No explicit SIMD (the original mainly uses this for wide char strings).
+  #
+  # The following compile-time configuration is assumed:
+  #
+  # - #define FASTFLOAT_ALLOWS_LEADING_PLUS
+  # - #define FLT_EVAL_METHOD 0
+  module FastFloat
+    # Current revision: https://github.com/fastfloat/fast_float/tree/v6.1.6
+
+    def self.to_f64?(str : String, whitespace : Bool, strict : Bool) : Float64?
+      value = uninitialized Float64
+      start = str.to_unsafe
+      finish = start + str.bytesize
+      options = ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general)
+
+      if whitespace
+        start += str.calc_excess_left
+        finish -= str.calc_excess_right
+      end
+
+      ret = BinaryFormat_Float64.new.from_chars_advanced(start, finish, pointerof(value), options)
+      if ret.ec == Errno::NONE && (!strict || ret.ptr == finish)
+        value
+      end
+    end
+
+    def self.to_f32?(str : String, whitespace : Bool, strict : Bool) : Float32?
+      value = uninitialized Float32
+      start = str.to_unsafe
+      finish = start + str.bytesize
+      options = ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general)
+
+      if whitespace
+        start += str.calc_excess_left
+        finish -= str.calc_excess_right
+      end
+
+      ret = BinaryFormat_Float32.new.from_chars_advanced(start, finish, pointerof(value), options)
+      if ret.ec == Errno::NONE && (!strict || ret.ptr == finish)
+        value
+      end
+    end
+  end
+end
+
+require "./fast_float/parse_number"