From 605a84ffc96efb50c1d9b45ec6e62fef947f6c45 Mon Sep 17 00:00:00 2001 From: Arne H Juul Date: Fri, 6 Sep 2024 07:00:48 +0200 Subject: [PATCH] remove unused and confusing float16 constants (#21999) ### Description Remove unused and confusing special constants in MLFloat16 and BFloat16 types. ### Motivation and Context While looking at adding a specialization for std::numeric_limits for the 16-bit floating point types, I found that there are various special constants in those types that are confusing or just wrong. MLFLoat16::Epsilon is not an epsilon at all, but approximates "e". Looks like a copy-paste bug. BFloat16::Epsilon does not correspond to `numeric_limits::epsilon()`, nor even to the C# Float.Epsilon. Instead, it corresponds to `numeric_limits::min()` which was really confusing to me. The "MinValue" constants does correspond to the C# `Float.MinValue` constant, but this is C++ so it would be better renamed to "LowestValue" since it corresponds to `numeric_limits::lowest()`. As it was unused except for some unit tests I have replaced it with the equivalent `MaxValue.Negate()` here. There's also an unused `kSignaling_NaNBits` constant which is just wrong (has the same value as `kPositiveInfinityBits` instead of a NaN). --- include/onnxruntime/core/framework/float16.h | 4 --- .../core/session/onnxruntime_float16.h | 5 --- onnxruntime/core/framework/data_types.cc | 4 --- onnxruntime/test/framework/data_types_test.cc | 16 ++++----- .../test/shared_lib/test_nontensor_types.cc | 34 +++++++------------ 5 files changed, 21 insertions(+), 42 deletions(-) diff --git a/include/onnxruntime/core/framework/float16.h b/include/onnxruntime/core/framework/float16.h index 2d289d6febb8d..1f2f175c6e691 100644 --- a/include/onnxruntime/core/framework/float16.h +++ b/include/onnxruntime/core/framework/float16.h @@ -45,8 +45,6 @@ struct MLFloat16 : onnxruntime_float16::Float16Impl { static const MLFloat16 NegativeNaN; static const MLFloat16 Infinity; static const MLFloat16 NegativeInfinity; - static const MLFloat16 Epsilon; - static const MLFloat16 MinValue; static const MLFloat16 MaxValue; static const MLFloat16 Zero; static const MLFloat16 One; @@ -181,8 +179,6 @@ struct BFloat16 : onnxruntime_float16::BFloat16Impl { static const BFloat16 NegativeNaN; static const BFloat16 Infinity; static const BFloat16 NegativeInfinity; - static const BFloat16 Epsilon; - static const BFloat16 MinValue; static const BFloat16 MaxValue; static const BFloat16 Zero; static const BFloat16 One; diff --git a/include/onnxruntime/core/session/onnxruntime_float16.h b/include/onnxruntime/core/session/onnxruntime_float16.h index 0b066a9cc9ad1..408d3ccfb2416 100644 --- a/include/onnxruntime/core/session/onnxruntime_float16.h +++ b/include/onnxruntime/core/session/onnxruntime_float16.h @@ -75,8 +75,6 @@ struct Float16Impl { static constexpr uint16_t kNegativeInfinityBits = 0xFC00U; static constexpr uint16_t kPositiveQNaNBits = 0x7E00U; static constexpr uint16_t kNegativeQNaNBits = 0xFE00U; - static constexpr uint16_t kEpsilonBits = 0x4170U; - static constexpr uint16_t kMinValueBits = 0xFBFFU; // Minimum normal number static constexpr uint16_t kMaxValueBits = 0x7BFFU; // Largest normal number static constexpr uint16_t kOneBits = 0x3C00U; static constexpr uint16_t kMinusOneBits = 0xBC00U; @@ -364,9 +362,6 @@ struct BFloat16Impl { static constexpr uint16_t kNegativeInfinityBits = 0xFF80U; static constexpr uint16_t kPositiveQNaNBits = 0x7FC1U; static constexpr uint16_t kNegativeQNaNBits = 0xFFC1U; - static constexpr uint16_t kSignaling_NaNBits = 0x7F80U; - static constexpr uint16_t kEpsilonBits = 0x0080U; - static constexpr uint16_t kMinValueBits = 0xFF7FU; static constexpr uint16_t kMaxValueBits = 0x7F7FU; static constexpr uint16_t kRoundToNearest = 0x7FFFU; static constexpr uint16_t kOneBits = 0x3F80U; diff --git a/onnxruntime/core/framework/data_types.cc b/onnxruntime/core/framework/data_types.cc index 72ab5a9e898c7..322afcf384864 100644 --- a/onnxruntime/core/framework/data_types.cc +++ b/onnxruntime/core/framework/data_types.cc @@ -36,8 +36,6 @@ const MLFloat16 MLFloat16::NaN(MLFloat16::FromBits(MLFloat16::kPositiveQNaNBits) const MLFloat16 MLFloat16::NegativeNaN(MLFloat16::FromBits(MLFloat16::kNegativeQNaNBits)); const MLFloat16 MLFloat16::Infinity(MLFloat16::FromBits(MLFloat16::kPositiveInfinityBits)); const MLFloat16 MLFloat16::NegativeInfinity(MLFloat16::FromBits(MLFloat16::kNegativeInfinityBits)); -const MLFloat16 MLFloat16::Epsilon(MLFloat16::FromBits(MLFloat16::kEpsilonBits)); -const MLFloat16 MLFloat16::MinValue(MLFloat16::FromBits(MLFloat16::kMinValueBits)); const MLFloat16 MLFloat16::MaxValue(MLFloat16::FromBits(MLFloat16::kMaxValueBits)); const MLFloat16 MLFloat16::Zero(MLFloat16::FromBits(0)); const MLFloat16 MLFloat16::One(MLFloat16::FromBits(MLFloat16::kOneBits)); @@ -47,8 +45,6 @@ const BFloat16 BFloat16::NaN(BFloat16::FromBits(BFloat16::kPositiveQNaNBits)); const BFloat16 BFloat16::NegativeNaN(BFloat16::FromBits(BFloat16::kNegativeQNaNBits)); const BFloat16 BFloat16::Infinity(BFloat16::FromBits(BFloat16::kPositiveInfinityBits)); const BFloat16 BFloat16::NegativeInfinity(BFloat16::FromBits(BFloat16::kNegativeInfinityBits)); -const BFloat16 BFloat16::Epsilon(BFloat16::FromBits(BFloat16::kEpsilonBits)); -const BFloat16 BFloat16::MinValue(BFloat16::FromBits(BFloat16::kMinValueBits)); const BFloat16 BFloat16::MaxValue(BFloat16::FromBits(BFloat16::kMaxValueBits)); const BFloat16 BFloat16::Zero(BFloat16::FromBits(0)); const BFloat16 BFloat16::One(BFloat16::FromBits(BFloat16::kOneBits)); diff --git a/onnxruntime/test/framework/data_types_test.cc b/onnxruntime/test/framework/data_types_test.cc index 897bc71b50c51..871b255831029 100644 --- a/onnxruntime/test/framework/data_types_test.cc +++ b/onnxruntime/test/framework/data_types_test.cc @@ -480,7 +480,7 @@ TEST_F(DataTypeTest, MLFloat16Comparision) { const MLFloat16 right = MLFloat16(66.66f); const MLFloat16 right_same = MLFloat16(66.66f); - EXPECT_TRUE(MLFloat16::Epsilon < right); + EXPECT_TRUE(MLFloat16::One < right); EXPECT_EQ(left, left_same); EXPECT_NE(left, left_same.Negate()); @@ -513,14 +513,14 @@ TEST_F(DataTypeTest, MLFloat16NaNComparision) { EXPECT_FALSE(MLFloat16::MaxValue < MLFloat16::NaN); EXPECT_FALSE(MLFloat16::MaxValue == MLFloat16::NaN); - EXPECT_FALSE(MLFloat16::MinValue < MLFloat16::NaN); + EXPECT_FALSE(MLFloat16::MaxValue.Negate() < MLFloat16::NaN); EXPECT_FALSE(MLFloat16::NaN < MLFloat16::MaxValue); - EXPECT_TRUE(MLFloat16::MinValue < MLFloat16::MaxValue); + EXPECT_TRUE(MLFloat16::Zero < MLFloat16::MaxValue); } TEST_F(DataTypeTest, MLFloat16Infinity) { - EXPECT_FALSE(MLFloat16::MinValue.IsInfinity()); + EXPECT_FALSE(MLFloat16::MaxValue.Negate().IsInfinity()); EXPECT_FALSE(MLFloat16::MaxValue.IsInfinity()); EXPECT_TRUE(MLFloat16::MaxValue.IsFinite()); @@ -625,7 +625,7 @@ TEST_F(DataTypeTest, BFloat16Comparision) { const BFloat16 right = BFloat16(66.66f); const BFloat16 right_same = BFloat16(66.66f); - EXPECT_TRUE(BFloat16::Epsilon < right); + EXPECT_TRUE(BFloat16::One < right); EXPECT_EQ(left, left_same); EXPECT_NE(left, left_same.Negate()); @@ -657,14 +657,14 @@ TEST_F(DataTypeTest, BFloat16NaNComparision) { EXPECT_FALSE(BFloat16::MaxValue < BFloat16::NaN); EXPECT_FALSE(BFloat16::MaxValue == BFloat16::NaN); - EXPECT_FALSE(BFloat16::MinValue < BFloat16::NaN); + EXPECT_FALSE(BFloat16::MaxValue.Negate() < BFloat16::NaN); EXPECT_FALSE(BFloat16::NaN < BFloat16::MaxValue); - EXPECT_TRUE(BFloat16::MinValue < BFloat16::MaxValue); + EXPECT_TRUE(BFloat16::Zero < BFloat16::MaxValue); } TEST_F(DataTypeTest, BFloat16Infinity) { - EXPECT_FALSE(BFloat16::MinValue.IsInfinity()); + EXPECT_FALSE(BFloat16::MaxValue.Negate().IsInfinity()); EXPECT_FALSE(BFloat16::MaxValue.IsInfinity()); EXPECT_TRUE(BFloat16::MaxValue.IsFinite()); diff --git a/onnxruntime/test/shared_lib/test_nontensor_types.cc b/onnxruntime/test/shared_lib/test_nontensor_types.cc index 5fa4fb31e1c91..8171a6eecc91d 100644 --- a/onnxruntime/test/shared_lib/test_nontensor_types.cc +++ b/onnxruntime/test/shared_lib/test_nontensor_types.cc @@ -370,11 +370,10 @@ TEST(CPPApi, Float16Zeros) { } namespace { -const auto EpsilonFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kEpsilonBits); const auto NaNFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kPositiveQNaNBits); -const auto MinValueFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kMinValueBits); const auto MaxValueFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kMaxValueBits); const auto InfinityFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kPositiveInfinityBits); +const auto ZeroFl16 = Ort::Float16_t::FromBits(0x0000); } // namespace TEST(CPPApi, Float16Comparision) { @@ -383,8 +382,6 @@ TEST(CPPApi, Float16Comparision) { const auto right = Ort::Float16_t(66.66f); const auto right_same = Ort::Float16_t(66.66f); - EXPECT_LT(EpsilonFl16, right); - EXPECT_EQ(left, left_same); EXPECT_NE(left, left_same.Negate()); @@ -416,13 +413,12 @@ TEST(CPPApi, Float16NaNComparision) { EXPECT_FALSE(MaxValueFl16 < NaNFl16); EXPECT_FALSE(MaxValueFl16 == NaNFl16); - EXPECT_FALSE(NaNFl16 < MinValueFl16); + EXPECT_FALSE(NaNFl16 < MaxValueFl16); - EXPECT_LT(MinValueFl16, MaxValueFl16); + EXPECT_LT(ZeroFl16, MaxValueFl16); } TEST(CPPApi, Float16Infinity) { - EXPECT_FALSE(MinValueFl16.IsInfinity()); EXPECT_FALSE(MaxValueFl16.IsInfinity()); EXPECT_TRUE(MaxValueFl16.IsFinite()); @@ -530,11 +526,10 @@ TEST(CPPApi, BFloat16Zeros) { } namespace { -const auto EpsilonBfl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kEpsilonBits); -const auto NaNBfl15 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kPositiveQNaNBits); -const auto MinValueBfl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kMinValueBits); +const auto NaNBfl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kPositiveQNaNBits); const auto MaxValueBfl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kMaxValueBits); const auto InfinityBFl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kPositiveInfinityBits); +const auto ZeroBfl16 = Ort::BFloat16_t::FromBits(0x0000); } // namespace TEST(CPPApi, BFloat16Comparision) { @@ -543,8 +538,6 @@ TEST(CPPApi, BFloat16Comparision) { const auto right = Ort::BFloat16_t(66.66f); const auto right_same = Ort::BFloat16_t(66.66f); - EXPECT_LT(EpsilonBfl16, right); - EXPECT_EQ(left, left_same); EXPECT_NE(left, left_same.Negate()); @@ -561,7 +554,7 @@ TEST(CPPApi, BFloat16TestNAN) { EXPECT_TRUE(fp16NANFromSingle.IsNaN()); // NaN are not equal to each other - EXPECT_NE(NaNBfl15, fp16NANFromSingle); + EXPECT_NE(NaNBfl16, fp16NANFromSingle); const float NanFromBFloat16 = fp16NANFromSingle.ToFloat(); EXPECT_TRUE(std::isnan(NanFromBFloat16)); @@ -570,19 +563,18 @@ TEST(CPPApi, BFloat16TestNAN) { } TEST(CPPApi, BFloat16NaNComparision) { - EXPECT_FALSE(NaNBfl15 < NaNBfl15); - EXPECT_TRUE(NaNBfl15 != NaNBfl15); - EXPECT_FALSE(NaNBfl15 == NaNBfl15); + EXPECT_FALSE(NaNBfl16 < NaNBfl16); + EXPECT_TRUE(NaNBfl16 != NaNBfl16); + EXPECT_FALSE(NaNBfl16 == NaNBfl16); - EXPECT_FALSE(MaxValueBfl16 < NaNBfl15); - EXPECT_FALSE(MaxValueBfl16 == NaNBfl15); - EXPECT_FALSE(NaNBfl15 < MinValueBfl16); + EXPECT_FALSE(MaxValueBfl16 < NaNBfl16); + EXPECT_FALSE(MaxValueBfl16 == NaNBfl16); + EXPECT_FALSE(NaNBfl16 < MaxValueBfl16); - EXPECT_LT(MinValueBfl16, MaxValueBfl16); + EXPECT_LT(ZeroBfl16, MaxValueBfl16); } TEST(CPPApi, BFloat16Infinity) { - EXPECT_FALSE(MinValueBfl16.IsInfinity()); EXPECT_FALSE(MaxValueBfl16.IsInfinity()); EXPECT_TRUE(MaxValueBfl16.IsFinite());