From 605a84ffc96efb50c1d9b45ec6e62fef947f6c45 Mon Sep 17 00:00:00 2001
From: Arne H Juul <arnej27959@users.noreply.github.com>
Date: Fri, 6 Sep 2024 07:00:48 +0200
Subject: [PATCH] remove unused and confusing float16 constants (#21999)

### Description
Remove unused and confusing special constants in MLFloat16 and BFloat16
types.

### Motivation and Context
While looking at adding a specialization for std::numeric_limits for the
16-bit floating point types, I found that there are various special
constants in those types that are confusing or just wrong.

MLFLoat16::Epsilon is not an epsilon at all, but approximates "e". Looks
like a copy-paste bug.
BFloat16::Epsilon does not correspond to `numeric_limits::epsilon()`,
nor even to the C# Float.Epsilon.
Instead, it corresponds to `numeric_limits::min()` which was really
confusing to me.

The "MinValue" constants does correspond to the C# `Float.MinValue`
constant, but this is C++ so it would be better renamed to "LowestValue"
since it corresponds to `numeric_limits::lowest()`. As it was unused
except for some unit tests I have replaced it with the equivalent
`MaxValue.Negate()` here.

There's also an unused `kSignaling_NaNBits` constant which is just wrong
(has the same value as `kPositiveInfinityBits` instead of a NaN).
---
 include/onnxruntime/core/framework/float16.h  |  4 ---
 .../core/session/onnxruntime_float16.h        |  5 ---
 onnxruntime/core/framework/data_types.cc      |  4 ---
 onnxruntime/test/framework/data_types_test.cc | 16 ++++-----
 .../test/shared_lib/test_nontensor_types.cc   | 34 +++++++------------
 5 files changed, 21 insertions(+), 42 deletions(-)
diff --git a/include/onnxruntime/core/framework/float16.h b/include/onnxruntime/core/framework/float16.h
index 2d289d6febb8d..1f2f175c6e691 100644
--- a/include/onnxruntime/core/framework/float16.h
+++ b/include/onnxruntime/core/framework/float16.h
@@ -45,8 +45,6 @@ struct MLFloat16 : onnxruntime_float16::Float16Impl<MLFloat16> {
   static const MLFloat16 NegativeNaN;
   static const MLFloat16 Infinity;
   static const MLFloat16 NegativeInfinity;
-  static const MLFloat16 Epsilon;
-  static const MLFloat16 MinValue;
   static const MLFloat16 MaxValue;
   static const MLFloat16 Zero;
   static const MLFloat16 One;
@@ -181,8 +179,6 @@ struct BFloat16 : onnxruntime_float16::BFloat16Impl<BFloat16> {
   static const BFloat16 NegativeNaN;
   static const BFloat16 Infinity;
   static const BFloat16 NegativeInfinity;
-  static const BFloat16 Epsilon;
-  static const BFloat16 MinValue;
   static const BFloat16 MaxValue;
   static const BFloat16 Zero;
   static const BFloat16 One;
diff --git a/include/onnxruntime/core/session/onnxruntime_float16.h b/include/onnxruntime/core/session/onnxruntime_float16.h
index 0b066a9cc9ad1..408d3ccfb2416 100644
--- a/include/onnxruntime/core/session/onnxruntime_float16.h
+++ b/include/onnxruntime/core/session/onnxruntime_float16.h
@@ -75,8 +75,6 @@ struct Float16Impl {
   static constexpr uint16_t kNegativeInfinityBits = 0xFC00U;
   static constexpr uint16_t kPositiveQNaNBits = 0x7E00U;
   static constexpr uint16_t kNegativeQNaNBits = 0xFE00U;
-  static constexpr uint16_t kEpsilonBits = 0x4170U;
-  static constexpr uint16_t kMinValueBits = 0xFBFFU;  // Minimum normal number
   static constexpr uint16_t kMaxValueBits = 0x7BFFU;  // Largest normal number
   static constexpr uint16_t kOneBits = 0x3C00U;
   static constexpr uint16_t kMinusOneBits = 0xBC00U;
@@ -364,9 +362,6 @@ struct BFloat16Impl {
   static constexpr uint16_t kNegativeInfinityBits = 0xFF80U;
   static constexpr uint16_t kPositiveQNaNBits = 0x7FC1U;
   static constexpr uint16_t kNegativeQNaNBits = 0xFFC1U;
-  static constexpr uint16_t kSignaling_NaNBits = 0x7F80U;
-  static constexpr uint16_t kEpsilonBits = 0x0080U;
-  static constexpr uint16_t kMinValueBits = 0xFF7FU;
   static constexpr uint16_t kMaxValueBits = 0x7F7FU;
   static constexpr uint16_t kRoundToNearest = 0x7FFFU;
   static constexpr uint16_t kOneBits = 0x3F80U;
diff --git a/onnxruntime/core/framework/data_types.cc b/onnxruntime/core/framework/data_types.cc
index 72ab5a9e898c7..322afcf384864 100644
--- a/onnxruntime/core/framework/data_types.cc
+++ b/onnxruntime/core/framework/data_types.cc
@@ -36,8 +36,6 @@ const MLFloat16 MLFloat16::NaN(MLFloat16::FromBits(MLFloat16::kPositiveQNaNBits)
 const MLFloat16 MLFloat16::NegativeNaN(MLFloat16::FromBits(MLFloat16::kNegativeQNaNBits));
 const MLFloat16 MLFloat16::Infinity(MLFloat16::FromBits(MLFloat16::kPositiveInfinityBits));
 const MLFloat16 MLFloat16::NegativeInfinity(MLFloat16::FromBits(MLFloat16::kNegativeInfinityBits));
-const MLFloat16 MLFloat16::Epsilon(MLFloat16::FromBits(MLFloat16::kEpsilonBits));
-const MLFloat16 MLFloat16::MinValue(MLFloat16::FromBits(MLFloat16::kMinValueBits));
 const MLFloat16 MLFloat16::MaxValue(MLFloat16::FromBits(MLFloat16::kMaxValueBits));
 const MLFloat16 MLFloat16::Zero(MLFloat16::FromBits(0));
 const MLFloat16 MLFloat16::One(MLFloat16::FromBits(MLFloat16::kOneBits));
@@ -47,8 +45,6 @@ const BFloat16 BFloat16::NaN(BFloat16::FromBits(BFloat16::kPositiveQNaNBits));
 const BFloat16 BFloat16::NegativeNaN(BFloat16::FromBits(BFloat16::kNegativeQNaNBits));
 const BFloat16 BFloat16::Infinity(BFloat16::FromBits(BFloat16::kPositiveInfinityBits));
 const BFloat16 BFloat16::NegativeInfinity(BFloat16::FromBits(BFloat16::kNegativeInfinityBits));
-const BFloat16 BFloat16::Epsilon(BFloat16::FromBits(BFloat16::kEpsilonBits));
-const BFloat16 BFloat16::MinValue(BFloat16::FromBits(BFloat16::kMinValueBits));
 const BFloat16 BFloat16::MaxValue(BFloat16::FromBits(BFloat16::kMaxValueBits));
 const BFloat16 BFloat16::Zero(BFloat16::FromBits(0));
 const BFloat16 BFloat16::One(BFloat16::FromBits(BFloat16::kOneBits));
diff --git a/onnxruntime/test/framework/data_types_test.cc b/onnxruntime/test/framework/data_types_test.cc
index 897bc71b50c51..871b255831029 100644
--- a/onnxruntime/test/framework/data_types_test.cc
+++ b/onnxruntime/test/framework/data_types_test.cc
@@ -480,7 +480,7 @@ TEST_F(DataTypeTest, MLFloat16Comparision) {
   const MLFloat16 right = MLFloat16(66.66f);
   const MLFloat16 right_same = MLFloat16(66.66f);
 
-  EXPECT_TRUE(MLFloat16::Epsilon < right);
+  EXPECT_TRUE(MLFloat16::One < right);
 
   EXPECT_EQ(left, left_same);
   EXPECT_NE(left, left_same.Negate());
@@ -513,14 +513,14 @@ TEST_F(DataTypeTest, MLFloat16NaNComparision) {
 
   EXPECT_FALSE(MLFloat16::MaxValue < MLFloat16::NaN);
   EXPECT_FALSE(MLFloat16::MaxValue == MLFloat16::NaN);
-  EXPECT_FALSE(MLFloat16::MinValue < MLFloat16::NaN);
+  EXPECT_FALSE(MLFloat16::MaxValue.Negate() < MLFloat16::NaN);
   EXPECT_FALSE(MLFloat16::NaN < MLFloat16::MaxValue);
 
-  EXPECT_TRUE(MLFloat16::MinValue < MLFloat16::MaxValue);
+  EXPECT_TRUE(MLFloat16::Zero < MLFloat16::MaxValue);
 }
 
 TEST_F(DataTypeTest, MLFloat16Infinity) {
-  EXPECT_FALSE(MLFloat16::MinValue.IsInfinity());
+  EXPECT_FALSE(MLFloat16::MaxValue.Negate().IsInfinity());
   EXPECT_FALSE(MLFloat16::MaxValue.IsInfinity());
   EXPECT_TRUE(MLFloat16::MaxValue.IsFinite());
 
@@ -625,7 +625,7 @@ TEST_F(DataTypeTest, BFloat16Comparision) {
   const BFloat16 right = BFloat16(66.66f);
   const BFloat16 right_same = BFloat16(66.66f);
 
-  EXPECT_TRUE(BFloat16::Epsilon < right);
+  EXPECT_TRUE(BFloat16::One < right);
 
   EXPECT_EQ(left, left_same);
   EXPECT_NE(left, left_same.Negate());
@@ -657,14 +657,14 @@ TEST_F(DataTypeTest, BFloat16NaNComparision) {
 
   EXPECT_FALSE(BFloat16::MaxValue < BFloat16::NaN);
   EXPECT_FALSE(BFloat16::MaxValue == BFloat16::NaN);
-  EXPECT_FALSE(BFloat16::MinValue < BFloat16::NaN);
+  EXPECT_FALSE(BFloat16::MaxValue.Negate() < BFloat16::NaN);
   EXPECT_FALSE(BFloat16::NaN < BFloat16::MaxValue);
 
-  EXPECT_TRUE(BFloat16::MinValue < BFloat16::MaxValue);
+  EXPECT_TRUE(BFloat16::Zero < BFloat16::MaxValue);
 }
 
 TEST_F(DataTypeTest, BFloat16Infinity) {
-  EXPECT_FALSE(BFloat16::MinValue.IsInfinity());
+  EXPECT_FALSE(BFloat16::MaxValue.Negate().IsInfinity());
   EXPECT_FALSE(BFloat16::MaxValue.IsInfinity());
   EXPECT_TRUE(BFloat16::MaxValue.IsFinite());
 
diff --git a/onnxruntime/test/shared_lib/test_nontensor_types.cc b/onnxruntime/test/shared_lib/test_nontensor_types.cc
index 5fa4fb31e1c91..8171a6eecc91d 100644
--- a/onnxruntime/test/shared_lib/test_nontensor_types.cc
+++ b/onnxruntime/test/shared_lib/test_nontensor_types.cc
@@ -370,11 +370,10 @@ TEST(CPPApi, Float16Zeros) {
 }
 
 namespace {
-const auto EpsilonFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kEpsilonBits);
 const auto NaNFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kPositiveQNaNBits);
-const auto MinValueFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kMinValueBits);
 const auto MaxValueFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kMaxValueBits);
 const auto InfinityFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kPositiveInfinityBits);
+const auto ZeroFl16 = Ort::Float16_t::FromBits(0x0000);
 }  // namespace
 
 TEST(CPPApi, Float16Comparision) {
@@ -383,8 +382,6 @@ TEST(CPPApi, Float16Comparision) {
   const auto right = Ort::Float16_t(66.66f);
   const auto right_same = Ort::Float16_t(66.66f);
 
-  EXPECT_LT(EpsilonFl16, right);
-
   EXPECT_EQ(left, left_same);
   EXPECT_NE(left, left_same.Negate());
 
@@ -416,13 +413,12 @@ TEST(CPPApi, Float16NaNComparision) {
 
   EXPECT_FALSE(MaxValueFl16 < NaNFl16);
   EXPECT_FALSE(MaxValueFl16 == NaNFl16);
-  EXPECT_FALSE(NaNFl16 < MinValueFl16);
+  EXPECT_FALSE(NaNFl16 < MaxValueFl16);
 
-  EXPECT_LT(MinValueFl16, MaxValueFl16);
+  EXPECT_LT(ZeroFl16, MaxValueFl16);
 }
 
 TEST(CPPApi, Float16Infinity) {
-  EXPECT_FALSE(MinValueFl16.IsInfinity());
   EXPECT_FALSE(MaxValueFl16.IsInfinity());
   EXPECT_TRUE(MaxValueFl16.IsFinite());
 
@@ -530,11 +526,10 @@ TEST(CPPApi, BFloat16Zeros) {
 }
 
 namespace {
-const auto EpsilonBfl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kEpsilonBits);
-const auto NaNBfl15 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kPositiveQNaNBits);
-const auto MinValueBfl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kMinValueBits);
+const auto NaNBfl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kPositiveQNaNBits);
 const auto MaxValueBfl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kMaxValueBits);
 const auto InfinityBFl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kPositiveInfinityBits);
+const auto ZeroBfl16 = Ort::BFloat16_t::FromBits(0x0000);
 }  // namespace
 
 TEST(CPPApi, BFloat16Comparision) {
@@ -543,8 +538,6 @@ TEST(CPPApi, BFloat16Comparision) {
   const auto right = Ort::BFloat16_t(66.66f);
   const auto right_same = Ort::BFloat16_t(66.66f);
 
-  EXPECT_LT(EpsilonBfl16, right);
-
   EXPECT_EQ(left, left_same);
   EXPECT_NE(left, left_same.Negate());
 
@@ -561,7 +554,7 @@ TEST(CPPApi, BFloat16TestNAN) {
   EXPECT_TRUE(fp16NANFromSingle.IsNaN());
 
   // NaN are not equal to each other
-  EXPECT_NE(NaNBfl15, fp16NANFromSingle);
+  EXPECT_NE(NaNBfl16, fp16NANFromSingle);
 
   const float NanFromBFloat16 = fp16NANFromSingle.ToFloat();
   EXPECT_TRUE(std::isnan(NanFromBFloat16));
@@ -570,19 +563,18 @@ TEST(CPPApi, BFloat16TestNAN) {
 }
 
 TEST(CPPApi, BFloat16NaNComparision) {
-  EXPECT_FALSE(NaNBfl15 < NaNBfl15);
-  EXPECT_TRUE(NaNBfl15 != NaNBfl15);
-  EXPECT_FALSE(NaNBfl15 == NaNBfl15);
+  EXPECT_FALSE(NaNBfl16 < NaNBfl16);
+  EXPECT_TRUE(NaNBfl16 != NaNBfl16);
+  EXPECT_FALSE(NaNBfl16 == NaNBfl16);
 
-  EXPECT_FALSE(MaxValueBfl16 < NaNBfl15);
-  EXPECT_FALSE(MaxValueBfl16 == NaNBfl15);
-  EXPECT_FALSE(NaNBfl15 < MinValueBfl16);
+  EXPECT_FALSE(MaxValueBfl16 < NaNBfl16);
+  EXPECT_FALSE(MaxValueBfl16 == NaNBfl16);
+  EXPECT_FALSE(NaNBfl16 < MaxValueBfl16);
 
-  EXPECT_LT(MinValueBfl16, MaxValueBfl16);
+  EXPECT_LT(ZeroBfl16, MaxValueBfl16);
 }
 
 TEST(CPPApi, BFloat16Infinity) {
-  EXPECT_FALSE(MinValueBfl16.IsInfinity());
   EXPECT_FALSE(MaxValueBfl16.IsInfinity());
   EXPECT_TRUE(MaxValueBfl16.IsFinite());