Skip to content

Commit

Permalink
remove unused and confusing float16 constants (#21999)
Browse files Browse the repository at this point in the history
### Description
Remove unused and confusing special constants in MLFloat16 and BFloat16
types.

### Motivation and Context
While looking at adding a specialization for std::numeric_limits for the
16-bit floating point types, I found that there are various special
constants in those types that are confusing or just wrong.

MLFLoat16::Epsilon is not an epsilon at all, but approximates "e". Looks
like a copy-paste bug.
BFloat16::Epsilon does not correspond to `numeric_limits::epsilon()`,
nor even to the C# Float.Epsilon.
Instead, it corresponds to `numeric_limits::min()` which was really
confusing to me.

The "MinValue" constants does correspond to the C# `Float.MinValue`
constant, but this is C++ so it would be better renamed to "LowestValue"
since it corresponds to `numeric_limits::lowest()`. As it was unused
except for some unit tests I have replaced it with the equivalent
`MaxValue.Negate()` here.

There's also an unused `kSignaling_NaNBits` constant which is just wrong
(has the same value as `kPositiveInfinityBits` instead of a NaN).
  • Loading branch information
arnej27959 authored Sep 6, 2024
1 parent 970ebc2 commit 605a84f
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 42 deletions.
4 changes: 0 additions & 4 deletions include/onnxruntime/core/framework/float16.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ struct MLFloat16 : onnxruntime_float16::Float16Impl<MLFloat16> {
static const MLFloat16 NegativeNaN;
static const MLFloat16 Infinity;
static const MLFloat16 NegativeInfinity;
static const MLFloat16 Epsilon;
static const MLFloat16 MinValue;
static const MLFloat16 MaxValue;
static const MLFloat16 Zero;
static const MLFloat16 One;
Expand Down Expand Up @@ -181,8 +179,6 @@ struct BFloat16 : onnxruntime_float16::BFloat16Impl<BFloat16> {
static const BFloat16 NegativeNaN;
static const BFloat16 Infinity;
static const BFloat16 NegativeInfinity;
static const BFloat16 Epsilon;
static const BFloat16 MinValue;
static const BFloat16 MaxValue;
static const BFloat16 Zero;
static const BFloat16 One;
Expand Down
5 changes: 0 additions & 5 deletions include/onnxruntime/core/session/onnxruntime_float16.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,6 @@ struct Float16Impl {
static constexpr uint16_t kNegativeInfinityBits = 0xFC00U;
static constexpr uint16_t kPositiveQNaNBits = 0x7E00U;
static constexpr uint16_t kNegativeQNaNBits = 0xFE00U;
static constexpr uint16_t kEpsilonBits = 0x4170U;
static constexpr uint16_t kMinValueBits = 0xFBFFU; // Minimum normal number
static constexpr uint16_t kMaxValueBits = 0x7BFFU; // Largest normal number
static constexpr uint16_t kOneBits = 0x3C00U;
static constexpr uint16_t kMinusOneBits = 0xBC00U;
Expand Down Expand Up @@ -364,9 +362,6 @@ struct BFloat16Impl {
static constexpr uint16_t kNegativeInfinityBits = 0xFF80U;
static constexpr uint16_t kPositiveQNaNBits = 0x7FC1U;
static constexpr uint16_t kNegativeQNaNBits = 0xFFC1U;
static constexpr uint16_t kSignaling_NaNBits = 0x7F80U;
static constexpr uint16_t kEpsilonBits = 0x0080U;
static constexpr uint16_t kMinValueBits = 0xFF7FU;
static constexpr uint16_t kMaxValueBits = 0x7F7FU;
static constexpr uint16_t kRoundToNearest = 0x7FFFU;
static constexpr uint16_t kOneBits = 0x3F80U;
Expand Down
4 changes: 0 additions & 4 deletions onnxruntime/core/framework/data_types.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ const MLFloat16 MLFloat16::NaN(MLFloat16::FromBits(MLFloat16::kPositiveQNaNBits)
const MLFloat16 MLFloat16::NegativeNaN(MLFloat16::FromBits(MLFloat16::kNegativeQNaNBits));
const MLFloat16 MLFloat16::Infinity(MLFloat16::FromBits(MLFloat16::kPositiveInfinityBits));
const MLFloat16 MLFloat16::NegativeInfinity(MLFloat16::FromBits(MLFloat16::kNegativeInfinityBits));
const MLFloat16 MLFloat16::Epsilon(MLFloat16::FromBits(MLFloat16::kEpsilonBits));
const MLFloat16 MLFloat16::MinValue(MLFloat16::FromBits(MLFloat16::kMinValueBits));
const MLFloat16 MLFloat16::MaxValue(MLFloat16::FromBits(MLFloat16::kMaxValueBits));
const MLFloat16 MLFloat16::Zero(MLFloat16::FromBits(0));
const MLFloat16 MLFloat16::One(MLFloat16::FromBits(MLFloat16::kOneBits));
Expand All @@ -47,8 +45,6 @@ const BFloat16 BFloat16::NaN(BFloat16::FromBits(BFloat16::kPositiveQNaNBits));
const BFloat16 BFloat16::NegativeNaN(BFloat16::FromBits(BFloat16::kNegativeQNaNBits));
const BFloat16 BFloat16::Infinity(BFloat16::FromBits(BFloat16::kPositiveInfinityBits));
const BFloat16 BFloat16::NegativeInfinity(BFloat16::FromBits(BFloat16::kNegativeInfinityBits));
const BFloat16 BFloat16::Epsilon(BFloat16::FromBits(BFloat16::kEpsilonBits));
const BFloat16 BFloat16::MinValue(BFloat16::FromBits(BFloat16::kMinValueBits));
const BFloat16 BFloat16::MaxValue(BFloat16::FromBits(BFloat16::kMaxValueBits));
const BFloat16 BFloat16::Zero(BFloat16::FromBits(0));
const BFloat16 BFloat16::One(BFloat16::FromBits(BFloat16::kOneBits));
Expand Down
16 changes: 8 additions & 8 deletions onnxruntime/test/framework/data_types_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ TEST_F(DataTypeTest, MLFloat16Comparision) {
const MLFloat16 right = MLFloat16(66.66f);
const MLFloat16 right_same = MLFloat16(66.66f);

EXPECT_TRUE(MLFloat16::Epsilon < right);
EXPECT_TRUE(MLFloat16::One < right);

EXPECT_EQ(left, left_same);
EXPECT_NE(left, left_same.Negate());
Expand Down Expand Up @@ -513,14 +513,14 @@ TEST_F(DataTypeTest, MLFloat16NaNComparision) {

EXPECT_FALSE(MLFloat16::MaxValue < MLFloat16::NaN);
EXPECT_FALSE(MLFloat16::MaxValue == MLFloat16::NaN);
EXPECT_FALSE(MLFloat16::MinValue < MLFloat16::NaN);
EXPECT_FALSE(MLFloat16::MaxValue.Negate() < MLFloat16::NaN);
EXPECT_FALSE(MLFloat16::NaN < MLFloat16::MaxValue);

EXPECT_TRUE(MLFloat16::MinValue < MLFloat16::MaxValue);
EXPECT_TRUE(MLFloat16::Zero < MLFloat16::MaxValue);
}

TEST_F(DataTypeTest, MLFloat16Infinity) {
EXPECT_FALSE(MLFloat16::MinValue.IsInfinity());
EXPECT_FALSE(MLFloat16::MaxValue.Negate().IsInfinity());
EXPECT_FALSE(MLFloat16::MaxValue.IsInfinity());
EXPECT_TRUE(MLFloat16::MaxValue.IsFinite());

Expand Down Expand Up @@ -625,7 +625,7 @@ TEST_F(DataTypeTest, BFloat16Comparision) {
const BFloat16 right = BFloat16(66.66f);
const BFloat16 right_same = BFloat16(66.66f);

EXPECT_TRUE(BFloat16::Epsilon < right);
EXPECT_TRUE(BFloat16::One < right);

EXPECT_EQ(left, left_same);
EXPECT_NE(left, left_same.Negate());
Expand Down Expand Up @@ -657,14 +657,14 @@ TEST_F(DataTypeTest, BFloat16NaNComparision) {

EXPECT_FALSE(BFloat16::MaxValue < BFloat16::NaN);
EXPECT_FALSE(BFloat16::MaxValue == BFloat16::NaN);
EXPECT_FALSE(BFloat16::MinValue < BFloat16::NaN);
EXPECT_FALSE(BFloat16::MaxValue.Negate() < BFloat16::NaN);
EXPECT_FALSE(BFloat16::NaN < BFloat16::MaxValue);

EXPECT_TRUE(BFloat16::MinValue < BFloat16::MaxValue);
EXPECT_TRUE(BFloat16::Zero < BFloat16::MaxValue);
}

TEST_F(DataTypeTest, BFloat16Infinity) {
EXPECT_FALSE(BFloat16::MinValue.IsInfinity());
EXPECT_FALSE(BFloat16::MaxValue.Negate().IsInfinity());
EXPECT_FALSE(BFloat16::MaxValue.IsInfinity());
EXPECT_TRUE(BFloat16::MaxValue.IsFinite());

Expand Down
34 changes: 13 additions & 21 deletions onnxruntime/test/shared_lib/test_nontensor_types.cc
Original file line number Diff line number Diff line change
Expand Up @@ -370,11 +370,10 @@ TEST(CPPApi, Float16Zeros) {
}

namespace {
const auto EpsilonFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kEpsilonBits);
const auto NaNFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kPositiveQNaNBits);
const auto MinValueFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kMinValueBits);
const auto MaxValueFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kMaxValueBits);
const auto InfinityFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kPositiveInfinityBits);
const auto ZeroFl16 = Ort::Float16_t::FromBits(0x0000);
} // namespace

TEST(CPPApi, Float16Comparision) {
Expand All @@ -383,8 +382,6 @@ TEST(CPPApi, Float16Comparision) {
const auto right = Ort::Float16_t(66.66f);
const auto right_same = Ort::Float16_t(66.66f);

EXPECT_LT(EpsilonFl16, right);

EXPECT_EQ(left, left_same);
EXPECT_NE(left, left_same.Negate());

Expand Down Expand Up @@ -416,13 +413,12 @@ TEST(CPPApi, Float16NaNComparision) {

EXPECT_FALSE(MaxValueFl16 < NaNFl16);
EXPECT_FALSE(MaxValueFl16 == NaNFl16);
EXPECT_FALSE(NaNFl16 < MinValueFl16);
EXPECT_FALSE(NaNFl16 < MaxValueFl16);

EXPECT_LT(MinValueFl16, MaxValueFl16);
EXPECT_LT(ZeroFl16, MaxValueFl16);
}

TEST(CPPApi, Float16Infinity) {
EXPECT_FALSE(MinValueFl16.IsInfinity());
EXPECT_FALSE(MaxValueFl16.IsInfinity());
EXPECT_TRUE(MaxValueFl16.IsFinite());

Expand Down Expand Up @@ -530,11 +526,10 @@ TEST(CPPApi, BFloat16Zeros) {
}

namespace {
const auto EpsilonBfl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kEpsilonBits);
const auto NaNBfl15 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kPositiveQNaNBits);
const auto MinValueBfl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kMinValueBits);
const auto NaNBfl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kPositiveQNaNBits);
const auto MaxValueBfl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kMaxValueBits);
const auto InfinityBFl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kPositiveInfinityBits);
const auto ZeroBfl16 = Ort::BFloat16_t::FromBits(0x0000);
} // namespace

TEST(CPPApi, BFloat16Comparision) {
Expand All @@ -543,8 +538,6 @@ TEST(CPPApi, BFloat16Comparision) {
const auto right = Ort::BFloat16_t(66.66f);
const auto right_same = Ort::BFloat16_t(66.66f);

EXPECT_LT(EpsilonBfl16, right);

EXPECT_EQ(left, left_same);
EXPECT_NE(left, left_same.Negate());

Expand All @@ -561,7 +554,7 @@ TEST(CPPApi, BFloat16TestNAN) {
EXPECT_TRUE(fp16NANFromSingle.IsNaN());

// NaN are not equal to each other
EXPECT_NE(NaNBfl15, fp16NANFromSingle);
EXPECT_NE(NaNBfl16, fp16NANFromSingle);

const float NanFromBFloat16 = fp16NANFromSingle.ToFloat();
EXPECT_TRUE(std::isnan(NanFromBFloat16));
Expand All @@ -570,19 +563,18 @@ TEST(CPPApi, BFloat16TestNAN) {
}

TEST(CPPApi, BFloat16NaNComparision) {
EXPECT_FALSE(NaNBfl15 < NaNBfl15);
EXPECT_TRUE(NaNBfl15 != NaNBfl15);
EXPECT_FALSE(NaNBfl15 == NaNBfl15);
EXPECT_FALSE(NaNBfl16 < NaNBfl16);
EXPECT_TRUE(NaNBfl16 != NaNBfl16);
EXPECT_FALSE(NaNBfl16 == NaNBfl16);

EXPECT_FALSE(MaxValueBfl16 < NaNBfl15);
EXPECT_FALSE(MaxValueBfl16 == NaNBfl15);
EXPECT_FALSE(NaNBfl15 < MinValueBfl16);
EXPECT_FALSE(MaxValueBfl16 < NaNBfl16);
EXPECT_FALSE(MaxValueBfl16 == NaNBfl16);
EXPECT_FALSE(NaNBfl16 < MaxValueBfl16);

EXPECT_LT(MinValueBfl16, MaxValueBfl16);
EXPECT_LT(ZeroBfl16, MaxValueBfl16);
}

TEST(CPPApi, BFloat16Infinity) {
EXPECT_FALSE(MinValueBfl16.IsInfinity());
EXPECT_FALSE(MaxValueBfl16.IsInfinity());
EXPECT_TRUE(MaxValueBfl16.IsFinite());

Expand Down

0 comments on commit 605a84f

Please sign in to comment.