From 92d0834421d287a2579a423cc4a72576c1810375 Mon Sep 17 00:00:00 2001 From: Theodore Omtzigt Date: Fri, 24 Nov 2023 13:23:34 -0500 Subject: [PATCH] Fast specialized posits (#384) * enabling fast specialized posit regression test for missing standard posits * adding posit<8,2> skeleton * value conversion of fast posit<8,2> implementation * compilation fix for clang and gcc, those compilers do not support multi-valued case statements * unifying the specialized posit test benches --------- Signed-off-by: Theodore Omtzigt --- .github/workflows/cmake.yml | 2 +- .../number/posit/specializations.hpp | 4 + .../number/posit/specialized/posit_16_1.hpp | 38 +- .../number/posit/specialized/posit_16_2.hpp | 945 +++++++++++++++++- .../number/posit/specialized/posit_8_0.h | 2 +- .../number/posit/specialized/posit_8_0.hpp | 4 +- .../number/posit/specialized/posit_8_1.hpp | 4 +- .../number/posit/specialized/posit_8_2.hpp | 664 +++++++++++- static/posit/specialized/posit_128_2.cpp | 2 +- static/posit/specialized/posit_128_4.cpp | 10 +- static/posit/specialized/posit_16_1.cpp | 34 +- static/posit/specialized/posit_16_2.cpp | 32 +- static/posit/specialized/posit_256_2.cpp | 2 +- static/posit/specialized/posit_256_5.cpp | 10 +- static/posit/specialized/posit_2_0.cpp | 10 +- static/posit/specialized/posit_32_2.cpp | 10 +- static/posit/specialized/posit_3_0.cpp | 10 +- static/posit/specialized/posit_48_2.cpp | 10 +- static/posit/specialized/posit_4_0.cpp | 10 +- static/posit/specialized/posit_64_2.cpp | 3 +- static/posit/specialized/posit_64_3.cpp | 10 +- static/posit/specialized/posit_8_0.cpp | 10 +- static/posit/specialized/posit_8_1.cpp | 18 +- static/posit/specialized/posit_8_2.cpp | 49 +- static/posit/specialized/quire_32_2.cpp | 10 +- 25 files changed, 1747 insertions(+), 156 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 2f4bd0042..2df25fcc1 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -2,7 +2,7 @@ name: CMake on: push: - branches: [ v3.73, dev, main ] + branches: [ v3.74, dev, main ] pull_request: branches: [ main ] diff --git a/include/universal/number/posit/specializations.hpp b/include/universal/number/posit/specializations.hpp index f7607d6b0..052e4b183 100644 --- a/include/universal/number/posit/specializations.hpp +++ b/include/universal/number/posit/specializations.hpp @@ -18,12 +18,16 @@ #define POSIT_FAST_POSIT_3_1 1 #define POSIT_FAST_POSIT_4_0 1 #define POSIT_FAST_POSIT_8_0 1 +#define POSIT_FAST_POSIT_8_2 1 #define POSIT_FAST_POSIT_16_1 1 +#define POSIT_FAST_POSIT_16_2 1 #define POSIT_FAST_POSIT_32_2 1 #define POSIT_FAST_POSIT_48_2 0 #define POSIT_FAST_POSIT_64_2 0 #define POSIT_FAST_POSIT_64_3 0 +#define POSIT_FAST_POSIT_128_2 0 #define POSIT_FAST_POSIT_128_4 0 +#define POSIT_FAST_POSIT_256_2 0 #define POSIT_FAST_POSIT_256_5 0 #endif diff --git a/include/universal/number/posit/specialized/posit_16_1.hpp b/include/universal/number/posit/specialized/posit_16_1.hpp index 69f22dc33..495bc2e15 100644 --- a/include/universal/number/posit/specialized/posit_16_1.hpp +++ b/include/universal/number/posit/specialized/posit_16_1.hpp @@ -19,7 +19,7 @@ namespace sw { namespace universal { #if POSIT_FAST_POSIT_16_1 #ifdef _MSC_VER #pragma message("Fast specialization of posit<16,1>") -//#else +//#else some compile time message that indicates that we are using a specialization for non MS compilers //#warning("Fast specialization of posit<16,1>") #endif @@ -884,22 +884,6 @@ inline std::string to_string(const posit& p, std::streamsi return ss.str(); } -inline bool twosComplementLessThan(std::uint16_t lhs, std::uint16_t rhs) { - // comparison of the sign bit - uint32_t mask = 0x8000; - if ((lhs & mask) == 0 && (rhs & mask) == mask) return false; - if ((lhs & mask) == mask && (rhs & mask) == 0) return true; - // sign is equal, compare the remaining bits - mask >>= 1; - while (mask > 0) { - if ((lhs & mask) == 0 && (rhs & mask) == mask) return true; - if ((lhs & mask) == mask && (rhs & mask) == 0) return false; - mask >>= 1; - } - // numbers are equal - return false; -} - // posit - posit binary logic operators inline bool operator==(const posit& lhs, const posit& rhs) { return lhs._bits == rhs._bits; @@ -968,8 +952,26 @@ inline bool operator>=(int lhs, const posit& rhs) { return !operator<(posit(lhs), rhs); } +/* +inline bool twosComplementLessThan(std::uint16_t lhs, std::uint16_t rhs) { + // comparison of the sign bit + uint32_t mask = 0x8000; + if ((lhs & mask) == 0 && (rhs & mask) == mask) return false; + if ((lhs & mask) == mask && (rhs & mask) == 0) return true; + // sign is equal, compare the remaining bits + mask >>= 1; + while (mask > 0) { + if ((lhs & mask) == 0 && (rhs & mask) == mask) return true; + if ((lhs & mask) == mask && (rhs & mask) == 0) return false; + mask >>= 1; + } + // numbers are equal + return false; +} +*/ + inline bool operator< (const posit& lhs, double rhs) { - return twosComplementLessThan(lhs._bits, posit(rhs)._bits); + return int16_t(lhs._bits) < int16_t(posit(rhs)._bits); } #endif // POSIT_ENABLE_LITERALS diff --git a/include/universal/number/posit/specialized/posit_16_2.hpp b/include/universal/number/posit/specialized/posit_16_2.hpp index 5393df950..20b29f918 100644 --- a/include/universal/number/posit/specialized/posit_16_2.hpp +++ b/include/universal/number/posit/specialized/posit_16_2.hpp @@ -1,7 +1,7 @@ #pragma once // posit_16_2.hpp: specialized 16-bit posit using fast implementation specialized for posit<16,2> // -// Copyright (C) 2017-2022 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. @@ -11,13 +11,6 @@ #ifndef POSIT_FAST_POSIT_16_2 #define POSIT_FAST_POSIT_16_2 0 -#endif - - // guard for the fact that we don't have a specialization yet -#if POSIT_FAST_POSIT_16_2 -#undef POSIT_FAST_POSIT_16_2 -#define POSIT_FAST_POSIT_16_2 0 -#pragma message("Fast specialization of posit<16,2> requested but ignored as fast implemention is TBD") #endif namespace sw { namespace universal { @@ -26,12 +19,944 @@ namespace sw { namespace universal { #if POSIT_FAST_POSIT_16_2 #ifdef _MSC_VER #pragma message("Fast specialization of posit<16,2>") -//#else +//#else some compile time message that indicates that we are using a specialization for non MS compilers //#warning("Fast specialization of posit<16,2>") #endif // fast specialized posit<16,2> -// TODO +template<> +class posit { +public: + static constexpr unsigned nbits = NBITS_IS_16; + static constexpr unsigned es = ES_IS_2; + static constexpr unsigned sbits = 1; + static constexpr unsigned rbits = nbits - sbits; + static constexpr unsigned ebits = es; + static constexpr unsigned fbits = nbits - 3 - es; + static constexpr unsigned fhbits = fbits + 1; + static constexpr uint16_t sign_mask = 0x8000u; + + constexpr posit() : _bits(0) {} + posit(const posit&) = default; + posit(posit&&) = default; + posit& operator=(const posit&) = default; + posit& operator=(posit&&) = default; + + // specific value constructor + constexpr posit(const SpecificValue code) : _bits(0) { + switch (code) { + case SpecificValue::infpos: + case SpecificValue::maxpos: + maxpos(); + break; + case SpecificValue::minpos: + minpos(); + break; + case SpecificValue::zero: + default: + zero(); + break; + case SpecificValue::minneg: + minneg(); + break; + case SpecificValue::infneg: + case SpecificValue::maxneg: + maxneg(); + break; + case SpecificValue::qnan: + case SpecificValue::snan: + case SpecificValue::nar: + setnar(); + break; + } + } + + // initializers for native types + explicit constexpr posit(signed char initial_value) : _bits(0) { *this = initial_value; } + explicit constexpr posit(short initial_value) : _bits(0) { *this = initial_value; } + explicit constexpr posit(int initial_value) : _bits(0) { *this = initial_value; } + explicit constexpr posit(long initial_value) : _bits(0) { *this = initial_value; } + explicit constexpr posit(long long initial_value) : _bits(0) { *this = initial_value; } + explicit constexpr posit(char initial_value) : _bits(0) { *this = initial_value; } + explicit constexpr posit(unsigned short initial_value) : _bits(0) { *this = initial_value; } + explicit constexpr posit(unsigned int initial_value) : _bits(0) { *this = initial_value; } + explicit constexpr posit(unsigned long initial_value) : _bits(0) { *this = initial_value; } + explicit constexpr posit(unsigned long long initial_value) : _bits(0) { *this = initial_value; } + explicit posit(float initial_value) : _bits(0) { *this = initial_value; } + posit(double initial_value) : _bits(0) { *this = initial_value; } + explicit posit(long double initial_value) : _bits(0) { *this = initial_value; } + + // assignment operators for native types + constexpr posit& operator=(signed char rhs) { return integer_assign((long)rhs); } + constexpr posit& operator=(short rhs) { return integer_assign((long)rhs); } + constexpr posit& operator=(int rhs) { return integer_assign((long)rhs); } + constexpr posit& operator=(long rhs) { return integer_assign(rhs); } + constexpr posit& operator=(long long rhs) { return integer_assign((long)rhs); } + constexpr posit& operator=(char rhs) { return integer_assign((long)rhs); } + constexpr posit& operator=(unsigned short rhs) { return integer_assign((long)rhs); } + constexpr posit& operator=(unsigned int rhs) { return integer_assign((long)rhs); } + constexpr posit& operator=(unsigned long rhs) { return integer_assign((long)rhs); } + constexpr posit& operator=(unsigned long long rhs) { return integer_assign((long)rhs); } + posit& operator=(float rhs) { return float_assign(double(rhs)); } + posit& operator=(double rhs) { return float_assign(rhs); } + posit& operator=(long double rhs) { return float_assign(double(rhs)); } + + explicit operator long double() const { return to_long_double(); } + explicit operator double() const { return to_double(); } + explicit operator float() const { return to_float(); } + explicit operator long long() const { return to_long_long(); } + explicit operator long() const { return to_long(); } + explicit operator int() const { return to_int(); } + explicit operator unsigned long long() const { return to_long_long(); } + explicit operator unsigned long() const { return to_long(); } + explicit operator unsigned int() const { return to_int(); } + + posit& setBitblock(const bitblock& raw) { + _bits = uint16_t(raw.to_ulong()); + return *this; + } + constexpr posit& setbits(uint64_t value) { + _bits = uint16_t(value & 0xffffu); + return *this; + } + + // arithmetic assignment operators + constexpr posit operator-() const { + posit p; + return p.setbits((~_bits) + 1ul); + } + posit& operator+=(const posit& b) { + // process special cases +#if POSIT_THROW_ARITHMETIC_EXCEPTION + if (isnar() || b.isnar()) { + throw posit_operand_is_nar{}; + } +#else + if (isnar() || b.isnar()) { + setnar(); + return *this; + } +#endif + if (b.iszero()) return *this; + if (iszero()) { _bits = b._bits; return *this; } + if (isneg() != b.isneg()) return *this -= b.twosComplement(); + + uint16_t lhs = _bits; + uint16_t rhs = b._bits; + bool sign = bool(_bits & sign_mask); + if (sign) { + lhs = -lhs & 0xFFFF; + rhs = -rhs & 0xFFFF; + } + if (lhs < rhs) std::swap(lhs, rhs); + + // decode the regime of lhs + int8_t m = 0; // pattern length + uint16_t remaining = 0; + decode_regime(lhs, m, remaining); + + // extract the exponent + uint16_t exp = remaining >> 14; + + // extract remaining fraction bits + uint32_t lhs_fraction = (0x4000 | remaining) << 16; + int8_t shiftRight = m; + + // adjust shift and extract fraction bits of rhs + extractAddand(rhs, shiftRight, remaining); + uint32_t rhs_fraction = (0x4000 | remaining) << 16; + + // this is 2kZ + expZ; (where kZ=kA-kB and expZ=expA-expB) + shiftRight = (shiftRight << 1) + exp - (remaining >> 14); + + if (shiftRight == 0) { + lhs_fraction += rhs_fraction; // this will always product a carry + if (exp) ++m; + exp ^= 1; + lhs_fraction >>= 1; + } + else { + (shiftRight > 31) ? (rhs_fraction = 0) : (rhs_fraction >>= shiftRight); // frac32B >>= shiftRight + lhs_fraction += rhs_fraction; + + bool rcarry = 0x80000000 & lhs_fraction; // first left bit + if (rcarry) { + if (exp) ++m; + exp ^= 1; + lhs_fraction >>= 1; + } + } + + _bits = round(m, exp, lhs_fraction); + if (sign) _bits = -_bits & 0xFFFF; + return *this; + } + posit& operator-=(const posit& b) { + // process special cases +#if POSIT_THROW_ARITHMETIC_EXCEPTION + if (isnar() || b.isnar()) { + throw posit_operand_is_nar{}; + } +#else + if (isnar() || b.isnar()) { + setnar(); + return *this; + } +#endif + if (b.iszero()) return *this; + if (iszero()) { _bits = -int16_t(b._bits) & 0xFFFF; return *this; } + posit bComplement = b.twosComplement(); + if (isneg() != b.isneg()) return *this += bComplement; + + uint16_t lhs = _bits; + uint16_t rhs = bComplement._bits; + // Both operands are actually the same sign if rhs inherits sign of sub: Make both positive + bool sign = bool(lhs & sign_mask); + (sign) ? (lhs = (-lhs & 0xFFFF)) : (rhs = (-rhs & 0xFFFF)); + + if (lhs == rhs) { + _bits = 0x0; + return *this; + } + if (lhs < rhs) { + std::swap(lhs, rhs); + sign = !sign; + } + + // decode the regime of lhs + int8_t m = 0; // pattern length + uint16_t remaining = 0; + decode_regime(lhs, m, remaining); + + // extract the exponent + uint16_t exp = remaining >> 14; + + uint32_t lhs_fraction = (0x4000 | remaining) << 16; + int8_t shiftRight = m; + + // adjust shift and extract fraction bits of rhs + extractAddand(rhs, shiftRight, remaining); + uint32_t rhs_fraction = (0x4000 | remaining) << 16; + + // align the fractions for subtraction + shiftRight = (shiftRight << 1) + exp - (remaining >> 14); + if (shiftRight != 0) { + if (shiftRight >= 29) { + _bits = lhs; + if (sign) _bits = -_bits & 0xFFFF; + return *this; + } + else { + rhs_fraction >>= shiftRight; + } + } + else { + rhs_fraction >>= shiftRight; + } + lhs_fraction -= rhs_fraction; + + while ((lhs_fraction >> 29) == 0) { + --m; + lhs_fraction <<= 2; + } + bool ecarry = bool(0x40000000 & lhs_fraction); + if (!ecarry) { + if (exp == 0) --m; + exp ^= 1; + lhs_fraction <<= 1; + } + + _bits = round(m, exp, lhs_fraction); + if (sign) _bits = -_bits & 0xFFFF; + return *this; + } + posit& operator*=(const posit& b) { + // process special cases +#if POSIT_THROW_ARITHMETIC_EXCEPTION + if (isnar() || b.isnar()) { + throw posit_operand_is_nar{}; + } +#else + if (isnar() || b.isnar()) { + setnar(); + return *this; + } +#endif + if (iszero() || b.iszero()) { + _bits = 0x0000; + return *this; + } + uint16_t lhs = _bits; + uint16_t rhs = b._bits; + // calculate the sign of the result + bool sign = bool(lhs & sign_mask) ^ bool(rhs & sign_mask); + lhs = lhs & sign_mask ? -lhs : lhs; + rhs = rhs & sign_mask ? -rhs : rhs; + + // decode the regime of lhs + int8_t m = 0; // pattern length + uint16_t remaining = 0; + decode_regime(lhs, m, remaining); + + // extract the exponent + int32_t exp = remaining >> 14; + + // add the hidden bit + uint32_t lhs_fraction = (0x4000 | remaining); + // adjust shift and extract fraction bits of rhs + extractMultiplicand(rhs, m, remaining); + exp += (remaining >> 14); + uint32_t rhs_fraction = (0x4000 | remaining); + uint32_t result_fraction = lhs_fraction * rhs_fraction; + //std::cout << "fbits 0x" << std::hex << result_fraction << std::dec << std::endl; + + if (exp > 1) { + ++m; + exp ^= 0x2; + } + bool rcarry = bool(result_fraction & 0x20000000); + if (rcarry) { + if (exp) m++; + exp ^= 0x1; + result_fraction >>= 1; + } + + // round + _bits = adjustAndRound(m, exp, result_fraction); + if (sign) _bits = -_bits & 0xFFFF; + return *this; + } + posit& operator/=(const posit& b) { + // process special cases + // since we are encoding error conditions as NaR (Not a Real), we need to process that condition first +#if POSIT_THROW_ARITHMETIC_EXCEPTION + if (b.iszero()) { + throw posit_divide_by_zero{}; // not throwing is a quiet signalling NaR + } + if (b.isnar()) { + throw posit_divide_by_nar{}; + } + if (isnar()) { + throw posit_numerator_is_nar{}; + } +#else + if (isnar() || b.isnar() || b.iszero()) { + setnar(); + return *this; + } +#endif // POSIT_THROW_ARITHMETIC_EXCEPTION + + uint16_t lhs = _bits; + uint16_t rhs = b._bits; + if (iszero()) { + _bits = 0x0000; + return *this; + } + + // calculate the sign of the result + bool sign = bool(lhs & sign_mask) ^ bool(rhs & sign_mask); + lhs = lhs & sign_mask ? -lhs : lhs; + rhs = rhs & sign_mask ? -rhs : rhs; + + // decode the regime of lhs + int8_t m = 0; // pattern length + uint16_t remaining = 0; + decode_regime(lhs, m, remaining); + + // extract the exponent + int32_t exp = remaining >> 14; + + // extract the fraction + uint16_t lhs_fraction = (0x4000 | remaining); + uint32_t fraction = lhs_fraction << 14; + + // adjust shift and extract fraction bits of rhs + extractDividand(rhs, m, remaining); + exp -= remaining >> 14; + uint16_t rhs_fraction = (0x4000 | remaining); + + div_t result = div(fraction, rhs_fraction); + uint32_t result_fraction = result.quot; + uint32_t remainder = result.rem; + + // adjust the exponent if needed + if (exp < 0) { + exp = 0x01; + --m; + } + if (result_fraction != 0) { + bool rcarry = result_fraction >> 14; // this is the hidden bit (14th bit), extreme right bit is bit 0 + if (!rcarry) { + if (exp == 0) --m; + exp ^= 0x01; + result_fraction <<= 1; + } + } + + // round + _bits = divRound(m, exp, result_fraction, remainder != 0); + if (sign) _bits = -_bits & 0xFFFF; + + return *this; + } + // prefix/postfix operators + posit& operator++() { + ++_bits; + return *this; + } + posit operator++(int) { + posit tmp(*this); + operator++(); + return tmp; + } + posit& operator--() { + --_bits; + return *this; + } + posit operator--(int) { + posit tmp(*this); + operator--(); + return tmp; + } + + posit reciprocate() const { + posit p = 1.0 / *this; + return p; + } + posit abs() const { + if (isneg()) { + return posit(-*this); + } + return *this; + } + + // Selectors + inline bool sign() const { return (_bits & sign_mask); } + inline bool isnar() const { return (_bits == sign_mask); } + inline bool iszero() const { return (_bits == 0x0); } + inline bool isone() const { return (_bits == 0x4000); } // pattern 010000... + inline bool isminusone() const { return (_bits == 0xC000); } // pattern 110000... + inline bool isneg() const { return (_bits & sign_mask); } + inline bool ispos() const { return !isneg(); } + inline bool ispowerof2() const { return !(_bits & 0x1); } + + inline int sign_value() const { return (_bits & 0x8 ? -1 : 1); } + + bitblock get() const { bitblock bb; bb = int(_bits); return bb; } + unsigned long long encoding() const { return (unsigned long long)(_bits); } + + // Modifiers + inline void clear() { _bits = 0; } + inline void setzero() { clear(); } + inline void setnar() { _bits = sign_mask; } + inline posit& minpos() { + clear(); + return ++(*this); + } + inline posit& maxpos() { + setnar(); + return --(*this); + } + inline posit& zero() { + clear(); + return *this; + } + inline posit& minneg() { + clear(); + return --(*this); + } + inline posit& maxneg() { + setnar(); + return ++(*this); + } + inline posit twosComplement() const { + posit p; + return p.setbits(~_bits + 1ul); + } + + internal::value to_value() const { + bool _sign; + regime _regime; + exponent _exponent; + fraction _fraction; + bitblock _raw_bits; + _raw_bits.reset(); + uint64_t mask = 1; + for (unsigned i = 0; i < nbits; i++) { + _raw_bits.set(i, (_bits & mask)); + mask <<= 1; + } + decode(_raw_bits, _sign, _regime, _exponent, _fraction); + return internal::value(_sign, _regime.scale() + _exponent.scale(), _fraction.get(), iszero(), isnar()); + } + +private: + uint16_t _bits; + + // Conversion functions +#if POSIT_THROW_ARITHMETIC_EXCEPTION + int to_int() const { + if (iszero()) return 0; + if (isnar()) throw posit_nar{}; + return int(to_float()); + } + long to_long() const { + if (iszero()) return 0; + if (isnar()) throw posit_nar{}; + return long(to_double()); + } + long long to_long_long() const { + if (iszero()) return 0; + if (isnar()) throw posit_nar{}; + return long(to_long_double()); + } +#else + int to_int() const { + if (iszero()) return 0; + if (isnar()) return int(INFINITY); + return int(to_float()); + } + long to_long() const { + if (iszero()) return 0; + if (isnar()) return long(INFINITY); + return long(to_double()); + } + long long to_long_long() const { + if (iszero()) return 0; + if (isnar()) return (long long)(INFINITY); + return long(to_long_double()); + } +#endif + float to_float() const { + return (float)to_double(); + } + double to_double() const { + if (iszero()) return 0.0; + if (isnar()) return NAN; + bool _sign; + regime _regime; + exponent _exponent; + fraction _fraction; + bitblock _raw_bits; + _raw_bits.reset(); + uint64_t mask = 1; + for (unsigned i = 0; i < nbits; i++) { + _raw_bits.set(i, (_bits & mask)); + mask <<= 1; + } + decode(_raw_bits, _sign, _regime, _exponent, _fraction); + double s = (_sign ? -1.0 : 1.0); + double r = _regime.value(); + double e = _exponent.value(); + double f = (1.0 + _fraction.value()); + return s * r * e * f; + } + long double to_long_double() const { + if (iszero()) return 0.0; + if (isnar()) return NAN; + bool _sign; + regime _regime; + exponent _exponent; + fraction _fraction; + bitblock _raw_bits; + _raw_bits.reset(); + uint64_t mask = 1; + for (unsigned i = 0; i < nbits; i++) { + _raw_bits.set(i, (_bits & mask)); + mask <<= 1; + } + decode(_raw_bits, _sign, _regime, _exponent, _fraction); + long double s = (_sign ? -1.0 : 1.0); + long double r = _regime.value(); + long double e = _exponent.value(); + long double f = (1.0 + _fraction.value()); + return s * r * e * f; + } + + + // helper methods + constexpr posit& integer_assign(long rhs) { + // special case for speed as this is a common initialization + if (rhs == 0) { + _bits = 0x0; + return *this; + } + + bool sign = (rhs < 0); + uint32_t v = sign ? -rhs : rhs; // project to positve side of the projective reals + uint16_t raw = 0; + if (v > 0x08000000) { // v > 134,217,728 + raw = 0x7FFFu; // +-maxpos + } + else if (v > 0x02FFFFFF) { // 50,331,647 < v < 134,217,728 + raw = 0x7FFEu; // 0.5 of maxpos + } + else if (v < 2) { // v == 0 or v == 1 + raw = (v << 14); // generates 0x0000 if v is 0, or 0x4000 if 1 + } + else { + uint32_t mask = 0x02000000; + int8_t scale = 25; + uint32_t fraction_bits = v; + while (!(fraction_bits & mask)) { + --scale; + fraction_bits <<= 1; + } + int8_t k = scale >> 1; + uint16_t exp = (scale & 0x01) << (12 - k); // extract exponent and shift to correct location + fraction_bits = (fraction_bits ^ mask); + raw = (0x7FFF ^ (0x3FFF >> k)) | exp | (fraction_bits >> (k + 13)); + + mask = 0x1000 << k; // bitNPlusOne + if (mask & fraction_bits) { + if (((mask - 1) & fraction_bits) | ((mask << 1) & fraction_bits)) raw++; // increment by 1 + } + } + _bits = sign ? -raw : raw; + return *this; + } + + // convert a double precision IEEE floating point to a posit<16,1>. You need to use at least doubles to capture + // enough bits to correctly round mul/div and elementary function results. That is, if you use a single precision + // float, you will inject errors in the validation suites. + posit& float_assign(double rhs) { + constexpr int dfbits = std::numeric_limits::digits - 1; + internal::value v(rhs); + // special case processing + if (v.iszero()) { + setzero(); + return *this; + } + if (v.isinf() || v.isnan()) { // posit encode for FP_INFINITE and NaN as NaR (Not a Real) + setnar(); + return *this; + } + + bitblock ptt; + convert_to_bb(v.sign(), v.scale(), v.fraction(), ptt); // TODO: needs to be faster + _bits = uint16_t(ptt.to_ulong()); + return *this; + } + + // decode_regime takes the raw bits of the posit, and returns the regime run-length, m, and the remaining fraction bits in remainder + inline void decode_regime(const uint16_t bits, int8_t& m, uint16_t& remaining) const { + remaining = (bits << 2) & 0xFFFF; + if (bits & 0x4000) { // positive regimes + while (remaining >> 15) { + ++m; + remaining = (remaining << 1) & 0xFFFF; + } + } + else { // negative regimes + m = -1; + while (!(remaining >> 15)) { + --m; + remaining = (remaining << 1) & 0xFFFF; + } + remaining &= 0x7FFF; + } + } + inline void extractAddand(const uint16_t bits, int8_t& m, uint16_t& remaining) const { + remaining = (bits << 2) & 0xFFFF; + if (bits & 0x4000) { // positive regimes + while (remaining >> 15) { + --m; + remaining = (remaining << 1) & 0xFFFF; + } + } + else { // negative regimes + ++m; + while (!(remaining >> 15)) { + ++m; + remaining = (remaining << 1) & 0xFFFF; + } + remaining &= 0x7FFF; + } + } + inline void extractMultiplicand(const uint16_t bits, int8_t& m, uint16_t& remaining) const { + remaining = (bits << 2) & 0xFFFF; + if (bits & 0x4000) { // positive regimes + while (remaining >> 15) { + ++m; + remaining = (remaining << 1) & 0xFFFF; + } + } + else { // negative regimes + --m; + while (!(remaining >> 15)) { + --m; + remaining = (remaining << 1) & 0xFFFF; + } + remaining &= 0x7FFF; + } + } + inline void extractDividand(const uint16_t bits, int8_t& m, uint16_t& remaining) const { + remaining = (bits << 2) & 0xFFFF; + if (bits & 0x4000) { // positive regimes + while (remaining >> 15) { + --m; + remaining = (remaining << 1) & 0xFFFF; + } + } + else { // negative regimes + ++m; + while (!(remaining >> 15)) { + ++m; + remaining = (remaining << 1) & 0xFFFF; + } + remaining &= 0x7FFF; + } + } + inline uint16_t round(const int8_t m, uint16_t exp, uint32_t fraction) const { + uint16_t scale, regime, bits; + if (m < 0) { + scale = (-m & 0xFFFF); + regime = 0x4000 >> scale; + } + else { + scale = m + 1; + regime = 0x7FFF - (0x7FFF >> scale); + } + + if (scale > 14) { + bits = m < 0 ? 0x0001 : 0x7FFF; // minpos and maxpos + } + else { + fraction = (fraction & 0x3FFFFFFF) >> (scale + 1); // remove both carry bits + uint16_t final_fbits = uint16_t(fraction >> 16); + bool bitNPlusOne = false; + if (scale != 14) { + bitNPlusOne = bool(0x8000 & fraction); + } + else if (final_fbits > 0) { + final_fbits = 0; + } + if (scale == 14 && exp != 0) { + bitNPlusOne = true; + exp = 0; + } + else { + exp <<= (13 - scale); + } + bits = uint16_t(regime) + uint16_t(exp) + uint16_t(final_fbits); + // n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + if (bitNPlusOne) { + uint16_t moreBits = (0x7FFF & fraction) ? 0x0001 : 0x0000; + bits += (bits & 0x0001) | moreBits; + } + } + return bits; + } + inline uint16_t divRound(const int8_t m, uint16_t exp, uint32_t fraction, bool nonZeroRemainder) const { + uint16_t scale, regime, bits; + if (m < 0) { + scale = (-m & 0xFFFF); + regime = 0x4000 >> scale; + } + else { + scale = m + 1; + regime = 0x7FFF - (0x7FFF >> scale); + } + + if (scale > 14) { + bits = m < 0 ? 0x0001 : 0x7FFF; // minpos and maxpos + } + else { + fraction &= 0x3FFF; // remove both carry bits + uint16_t final_fbits = uint16_t(fraction >> (scale + 1)); + bool bitNPlusOne = false; + if (scale != 14) { + bitNPlusOne = bool((fraction >> scale) & 0x1); + } + else if (final_fbits > 0) { + final_fbits = 0; + } + if (scale == 14 && exp != 0) { + bitNPlusOne = true; + exp = 0; + } + else { + exp <<= (13 - scale); + } + bits = uint16_t(regime) + uint16_t(exp) + uint16_t(final_fbits); + + if (bitNPlusOne) { + uint16_t moreBits = (fraction & ((1 << scale) - 1)) ? 0x0001 : 0x0000; + if (nonZeroRemainder) moreBits = 0x0001; + // n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + bits += (bits & 0x0001) | moreBits; + } + } + return bits; + } + inline uint16_t adjustAndRound(const int8_t m, uint16_t exp, uint32_t fraction) const { + uint16_t scale, regime, bits; + if (m < 0) { + scale = (-m & 0xFFFF); + regime = 0x4000 >> scale; + } + else { + scale = m + 1; + regime = 0x7FFF - (0x7FFF >> scale); + } + + if (scale > 14) { + bits = m < 0 ? 0x0001 : 0x7FFF; // minpos and maxpos + } + else { + fraction = (fraction & 0x0FFFFFFF) >> (scale - 1); // remove both carry bits + uint16_t final_fbits = uint16_t(fraction >> 16); + bool bitNPlusOne = false; + if (scale != 14) { + bitNPlusOne = bool(0x8000 & fraction); + } + else if (final_fbits > 0) { + final_fbits = 0; + } + if (scale == 14 && exp != 0) { + bitNPlusOne = true; + exp = 0; + } + else { + exp <<= (13 - scale); + } + + bits = uint16_t(regime) + uint16_t(exp) + uint16_t(final_fbits); + // n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + if (bitNPlusOne) { + uint16_t moreBits = (0x7FFF & fraction) ? 0x0001 : 0x0000; + bits += (bits & 0x0001) | moreBits; + } + } + return bits; + } + // I/O operators + friend std::ostream& operator<< (std::ostream& ostr, const posit& p); + friend std::istream& operator>> (std::istream& istr, posit& p); + + // posit - posit logic functions + friend bool operator==(const posit& lhs, const posit& rhs); + friend bool operator!=(const posit& lhs, const posit& rhs); + friend bool operator< (const posit& lhs, const posit& rhs); + friend bool operator> (const posit& lhs, const posit& rhs); + friend bool operator<=(const posit& lhs, const posit& rhs); + friend bool operator>=(const posit& lhs, const posit& rhs); + + friend bool operator< (const posit& lhs, double rhs); +}; + +// posit I/O operators +// generate a posit format ASCII format nbits.esxNN...NNp +inline std::ostream& operator<<(std::ostream& ostr, const posit& p) { + // to make certain that setw and left/right operators work properly + // we need to transform the posit into a string + std::stringstream ss; +#if POSIT_ERROR_FREE_IO_FORMAT + ss << NBITS_IS_16 << '.' << ES_IS_2 << 'x' << to_hex(p.get()) << 'p'; +#else + std::streamsize prec = ostr.precision(); + std::streamsize width = ostr.width(); + std::ios_base::fmtflags ff; + ff = ostr.flags(); + ss.flags(ff); + ss << std::setw(width) << std::setprecision(prec) << to_string(p, prec); // TODO: we need a true native serialization function +#endif + return ostr << ss.str(); +} + +// read an ASCII float or posit format: nbits.esxNN...NNp, for example: 32.2x80000000p +inline std::istream& operator>> (std::istream& istr, posit& p) { + std::string txt; + istr >> txt; + if (!parse(txt, p)) { + std::cerr << "unable to parse -" << txt << "- into a posit value\n"; + } + return istr; +} + +// convert a posit value to a string using "nar" as designation of NaR +inline std::string to_string(const posit& p, std::streamsize precision) { + if (p.isnar()) { + return std::string("nar"); + } + std::stringstream ss; + ss << std::setprecision(precision) << float(p); + return ss.str(); +} + +// posit - posit binary logic operators +inline bool operator==(const posit& lhs, const posit& rhs) { + return lhs._bits == rhs._bits; +} +inline bool operator!=(const posit& lhs, const posit& rhs) { + return !operator==(lhs, rhs); +} +inline bool operator< (const posit& lhs, const posit& rhs) { + return int16_t(lhs._bits) < int16_t(rhs._bits); +} +inline bool operator> (const posit& lhs, const posit& rhs) { + return operator< (rhs, lhs); +} +inline bool operator<=(const posit& lhs, const posit& rhs) { + return operator< (lhs, rhs) || operator==(lhs, rhs); +} +inline bool operator>=(const posit& lhs, const posit& rhs) { + return !operator< (lhs, rhs); +} + +// binary operator+() is provided by generic function +// binary operator-() is provided by generic function +// binary operator*() is provided by generic function +// binary operator/() is provided by generic function + +#if POSIT_ENABLE_LITERALS +// posit - literal logic functions + +// posit - int logic operators +inline bool operator==(const posit& lhs, int rhs) { + return operator==(lhs, posit(rhs)); +} +inline bool operator!=(const posit& lhs, int rhs) { + return !operator==(lhs, posit(rhs)); +} +inline bool operator< (const posit& lhs, int rhs) { + return operator<(lhs, posit(rhs)); +} +inline bool operator> (const posit& lhs, int rhs) { + return operator< (posit(rhs), lhs); +} +inline bool operator<=(const posit& lhs, int rhs) { + return operator< (lhs, posit(rhs)) || operator==(lhs, posit(rhs)); +} +inline bool operator>=(const posit& lhs, int rhs) { + return !operator<(lhs, posit(rhs)); +} + +// int - posit logic operators +inline bool operator==(int lhs, const posit& rhs) { + return posit(lhs) == rhs; +} +inline bool operator!=(int lhs, const posit& rhs) { + return !operator==(posit(lhs), rhs); +} +inline bool operator< (int lhs, const posit& rhs) { + return operator<(posit(lhs), rhs); +} +inline bool operator> (int lhs, const posit& rhs) { + return operator< (posit(rhs), lhs); +} +inline bool operator<=(int lhs, const posit& rhs) { + return operator< (posit(lhs), rhs) || operator==(posit(lhs), rhs); +} +inline bool operator>=(int lhs, const posit& rhs) { + return !operator<(posit(lhs), rhs); +} + +inline bool operator< (const posit& lhs, double rhs) { + return int16_t(lhs._bits) < int16_t(posit(rhs)._bits); +} + +#endif // POSIT_ENABLE_LITERALS #endif // POSIT_FAST_POSIT_16_2 diff --git a/include/universal/number/posit/specialized/posit_8_0.h b/include/universal/number/posit/specialized/posit_8_0.h index aac75fcce..9dfc1fab3 100644 --- a/include/universal/number/posit/specialized/posit_8_0.h +++ b/include/universal/number/posit/specialized/posit_8_0.h @@ -22,7 +22,7 @@ inline bool posit8_isneg(posit8_t p) { return (p.v & 0x80); } inline bool posit8_ispos(posit8_t p) { return !(p.v & 0x80); } inline bool posit8_ispowerof2(posit8_t p) { return !(p.v & 0x1); } -// decode takes the raw bits of the posit, and returns the regime, m, and returns the fraction bits in 'remainder' +// decode takes the raw bits of the posit, and returns the regime, m, and returns the remaining bits in 'remaining' inline int8_t posit8_decode_regime(const uint8_t bits, uint8_t* remaining) { int8_t m = 0; *remaining = (bits << 2) & 0xFF; diff --git a/include/universal/number/posit/specialized/posit_8_0.hpp b/include/universal/number/posit/specialized/posit_8_0.hpp index 968759bdc..2994632ed 100644 --- a/include/universal/number/posit/specialized/posit_8_0.hpp +++ b/include/universal/number/posit/specialized/posit_8_0.hpp @@ -1,7 +1,7 @@ #pragma once // posit_8_0.hpp: specialized 8-bit posit using fast compute specialized for posit<8,0> // -// Copyright (C) 2017-2022 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. @@ -19,7 +19,7 @@ namespace sw { namespace universal { #if POSIT_FAST_POSIT_8_0 #ifdef _MSC_VER #pragma message("Fast specialization of posit<8,0>") -//#else +//#else some compile time message that indicates that we are using a specialization for non MS compilers //#warning("Fast specialization of posit<8,0>") #endif diff --git a/include/universal/number/posit/specialized/posit_8_1.hpp b/include/universal/number/posit/specialized/posit_8_1.hpp index d0c49a2fe..ac4dac790 100644 --- a/include/universal/number/posit/specialized/posit_8_1.hpp +++ b/include/universal/number/posit/specialized/posit_8_1.hpp @@ -1,7 +1,7 @@ #pragma once // posit_8_1.hpp: specialized 8-bit posit using fast implementation specialized for posit<8,1> // -// Copyright (C) 2017-2022 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. @@ -26,7 +26,7 @@ namespace sw { namespace universal { #if POSIT_FAST_POSIT_8_1 #ifdef _MSC_VER #pragma message("Fast specialization of posit<8,1>") -//#else +//#else some compile time message that indicates that we are using a specialization for non MS compilers //#warning("Fast specialization of posit<8,1>") #endif diff --git a/include/universal/number/posit/specialized/posit_8_2.hpp b/include/universal/number/posit/specialized/posit_8_2.hpp index 46d788aa3..5691fe3c7 100644 --- a/include/universal/number/posit/specialized/posit_8_2.hpp +++ b/include/universal/number/posit/specialized/posit_8_2.hpp @@ -1,23 +1,18 @@ #pragma once // posit_8_2.hpp: specialized 8-bit posit using fast implementation specialized for posit<8,2> // -// Copyright (C) 2017-2022 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. // DO NOT USE DIRECTLY! // the compile guards in this file are only valid in the context of the specialization logic -// configured in the main +// configured in the main -#ifndef POSIT_FAST_POSIT_8_2 -#define POSIT_FAST_POSIT_8_2 0 -#endif +#include - // guard for the fact that we don't have a specialization yet -#if POSIT_FAST_POSIT_8_2 -#undef POSIT_FAST_POSIT_8_2 +#ifndef POSIT_FAST_POSIT_8_2 #define POSIT_FAST_POSIT_8_2 0 -#pragma message("Fast specialization of posit<8,2> requested but ignored as fast implemention is TBD") #endif namespace sw { namespace universal { @@ -26,12 +21,659 @@ namespace sw { namespace universal { #if POSIT_FAST_POSIT_8_2 #ifdef _MSC_VER #pragma message("Fast specialization of posit<8,2>") -//#else +//#else some compile time message that indicates that we are using a specialization for non MS compilers //#warning("Fast specialization of posit<8,2>") #endif // fast specialized posit<8,2> -// TODO +template<> +class posit { +public: + static constexpr unsigned nbits = NBITS_IS_8; + static constexpr unsigned es = ES_IS_2; + static constexpr unsigned sbits = 1; + static constexpr unsigned rbits = nbits - sbits; + static constexpr unsigned ebits = es; + static constexpr unsigned fbits = nbits - 3 - es; + static constexpr unsigned fhbits = fbits + 1; + static constexpr uint8_t sign_mask = 0x80; + + constexpr posit() : _bits(0) {} + posit(const posit&) = default; + posit(posit&&) = default; + posit& operator=(const posit&) = default; + posit& operator=(posit&&) = default; + + // specific value constructor + constexpr posit(const SpecificValue code) : _bits(0) { + switch (code) { + case SpecificValue::infpos: + case SpecificValue::maxpos: + maxpos(); + break; + case SpecificValue::minpos: + minpos(); + break; + case SpecificValue::zero: + default: + zero(); + break; + case SpecificValue::minneg: + minneg(); + break; + case SpecificValue::infneg: + case SpecificValue::maxneg: + maxneg(); + break; + case SpecificValue::qnan: + case SpecificValue::snan: + case SpecificValue::nar: + setnar(); + break; + } + } + + // initializers for native types + constexpr explicit posit(signed char initial_value) : _bits(0) { *this = initial_value; } + constexpr explicit posit(short initial_value) : _bits(0) { *this = initial_value; } + constexpr explicit posit(int initial_value) : _bits(0) { *this = initial_value; } + constexpr explicit posit(long initial_value) : _bits(0) { *this = initial_value; } + constexpr explicit posit(long long initial_value) : _bits(0) { *this = initial_value; } + constexpr explicit posit(char initial_value) : _bits(0) { *this = initial_value; } + constexpr explicit posit(unsigned short initial_value) : _bits(0) { *this = initial_value; } + constexpr explicit posit(unsigned int initial_value) : _bits(0) { *this = initial_value; } + constexpr explicit posit(unsigned long initial_value) : _bits(0) { *this = initial_value; } + constexpr explicit posit(unsigned long long initial_value) : _bits(0) { *this = initial_value; } + explicit posit(float initial_value) : _bits(0) { *this = initial_value; } + posit(double initial_value) : _bits(0) { *this = initial_value; } + explicit posit(long double initial_value) : _bits(0) { *this = initial_value; } + + // assignment operators for native types + constexpr posit& operator=(signed char rhs) { return operator=((int)(rhs)); } + constexpr posit& operator=(short rhs) { return operator=((int)(rhs)); } + constexpr posit& operator=(int rhs) { return integer_assign(rhs); } + constexpr posit& operator=(long rhs) { return operator=((int)(rhs)); } + constexpr posit& operator=(long long rhs) { return operator=((int)(rhs)); } + constexpr posit& operator=(char rhs) { return operator=((int)(rhs)); } + constexpr posit& operator=(unsigned short rhs) { return operator=((int)(rhs)); } + constexpr posit& operator=(unsigned int rhs) { return operator=((int)(rhs)); } + constexpr posit& operator=(unsigned long rhs) { return operator=((int)(rhs)); } + constexpr posit& operator=(unsigned long long rhs) { return operator=((int)(rhs)); } + posit& operator=(float rhs) { return float_assign(rhs); } + posit& operator=(double rhs) { return float_assign(float(rhs)); } + posit& operator=(long double rhs) { return float_assign(float(rhs)); } + + explicit operator long double() const { return to_long_double(); } + explicit operator double() const { return to_double(); } + explicit operator float() const { return to_float(); } + explicit operator long long() const { return to_long_long(); } + explicit operator long() const { return to_long(); } + explicit operator int() const { return to_int(); } + explicit operator unsigned long long() const { return to_long_long(); } + explicit operator unsigned long() const { return to_long(); } + explicit operator unsigned int() const { return to_int(); } + + posit& setBitblock(const sw::universal::bitblock& raw) { + _bits = uint8_t(raw.to_ulong()); + return *this; + } + constexpr posit& setbits(uint64_t value) { + _bits = uint8_t(value & 0xffu); + return *this; + } + constexpr posit operator-() const { + posit p; + return p.setbits((~_bits) + 1ul); + } + // arithmetic assignment operators + posit& operator+=(const posit& b) { + + return *this; + } + posit& operator-=(const posit& b) { + + return *this; + } + posit& operator*=(const posit& b) { + + return *this; + } + posit& operator/=(const posit& b) { + + return *this; + } + + // prefix/postfix operators + posit& operator++() { + ++_bits; + return *this; + } + posit operator++(int) { + posit tmp(*this); + operator++(); + return tmp; + } + posit& operator--() { + --_bits; + return *this; + } + posit operator--(int) { + posit tmp(*this); + operator--(); + return tmp; + } + + posit reciprocate() const { + posit p = 1.0 / *this; + return p; + } + posit abs() const { + if (isneg()) { + return posit(-*this); + } + return *this; + } + + // Selelctors + inline bool sign() const { return (_bits & sign_mask); } + inline bool isnar() const { return (_bits == sign_mask); } + inline bool iszero() const { return (_bits == 0x00); } + inline bool isone() const { return (_bits == 0x40); } // pattern 010000... + inline bool isminusone() const { return (_bits == 0xC0); } // pattern 110000... + inline bool isneg() const { return (_bits & sign_mask); } + inline bool ispos() const { return !isneg(); } + inline bool ispowerof2() const { return !(_bits & 0x1); } + + inline int sign_value() const { return (_bits & sign_mask ? -1 : 1); } + + bitblock get() const { bitblock bb; bb = int(_bits); return bb; } + unsigned long long encoding() const { return (unsigned long long)(_bits); } + + // Modifiers + inline void clear() { _bits = 0; } + inline void setzero() { clear(); } + inline void setnar() { _bits = 0x80; } + inline posit& minpos() { + clear(); + return ++(*this); + } + inline posit& maxpos() { + setnar(); + return --(*this); + } + inline posit& zero() { + clear(); + return *this; + } + inline posit& minneg() { + clear(); + return --(*this); + } + inline posit& maxneg() { + setnar(); + return ++(*this); + } + inline posit twosComplement() const { + posit p; + int8_t v = -*(int8_t*)&_bits; + p.setbits(v); + return p; + } + +private: + uint8_t _bits; + + // decode takes the raw bits of the posit, + // returns the regime, m, and leaves the remaining bits in 'remainder' + int8_t decode_regime(uint8_t bits, uint8_t* remaining) const { + int8_t m = 0; + *remaining = (bits << 2) & 0xFF; + if (bits & 0x40) { // positive regimes + while (*remaining >> 7) { + ++m; + *remaining = (*remaining << 1) & 0xFF; + } + } + else { // negative regimes + m = -1; + while (!(*remaining >> 7)) { + --m; + *remaining = (*remaining << 1) & 0xFF; + } + *remaining &= 0x7F; + } + return m; + } + + // extract_exponent takes the regime, and the remaining bits + // returns the exponent value, and updates remaining to hold just the fraction bits + uint8_t extract_exponent(int8_t m, uint8_t* remaining) const { + // 765 432 10 + // 0.01.00.000 m = -1 2 ebits #00.000.-- >> 5 + // 0.001.00.00 m = -2 2 ebits #00.00.--- >> 5 + // 0.0001.00.0 m = -3 2 ebits #00.0.---- >> 5 + // 0.00001.00. m = -4 2 ebits #00.------ >> 5 + // 0.000001.0. m = -5 1 ebit #0-.------ >> 6 + // 0.0000001.. m = -6 0 ebits #. >> 7 = 0 + // 0.0000000.. m = -7 0 ebits #. >> 7 = 0 + + // 0.10.00.000 m = 0 2 ebits #00.000.-- >> 5 + // 0.110.00.00 m = 1 2 ebits #00.00.--- >> 5 + // 0.1110.00.0 m = 2 2 ebits #00.0.---- >> 5 + // 0.11110.00. m = 3 2 ebits #00..----- >> 5 + // 0.111110.0. m = 4 1 ebit #0.------- >> 6 + // 0.1111110.. m = 5 0 ebits #. >> 7 = 0 + // 0.1111111.. m = 6 0 ebits #. >> 7 = 0 + uint8_t ebits{ 0 }; + switch (m) { + case -5: case 4: + ebits = (*remaining >> 5); + *remaining <<= 1; + break; + case -7: case -6: case 5: case 6: + ebits = 0; + *remaining = 0; + default: + ebits = (*remaining >> 5); + *remaining <<= 2; + break; + } + return ebits; + } + + float fraction_value(uint8_t fraction) const { + float v = 0.0f; + float scale = 1.0f; + uint8_t mask = 0x80; + for (int i = 5; i >= 0; i--) { + if (fraction & mask) v += scale; + scale *= 0.5f; + mask >>= 1; + if (scale == 0.0) break; + } + return v; + } + + void checkExtraTwoBits(float f, float temp, bool* bitsNPlusOne, bool* bitsMore) { + temp /= 2.0; + if (temp <= f) { + *bitsNPlusOne = 1; + f -= temp; + } + if (f > 0) + *bitsMore = 1; + } + uint16_t convertFraction(float f, uint8_t fracLength, bool* bitsNPlusOne, bool* bitsMore) { + + uint_fast8_t frac = 0; + + if (f == 0) return 0; + else if (f == INFINITY) return 0x80; + + f -= 1; //remove hidden bit + if (fracLength == 0) { + checkExtraTwoBits(f, 1.0, bitsNPlusOne, bitsMore); + } + else { + float temp = 1; + while (true) { + temp /= 2; + if (temp <= f) { + f -= temp; + fracLength--; + frac = (frac << 1) + 1; //shift in one + if (f == 0) { + //put in the rest of the bits + frac <<= (uint_fast8_t)fracLength; + break; + } + + if (fracLength == 0) { + checkExtraTwoBits(f, temp, bitsNPlusOne, bitsMore); + + break; + } + } + else { + frac <<= 1; //shift in a zero + fracLength--; + if (fracLength == 0) { + checkExtraTwoBits(f, temp, bitsNPlusOne, bitsMore); + break; + } + } + } + } + //printf("convertfloat: frac:%d bitsNPlusOne: %d, bitsMore: %d\n", frac, bitsNPlusOne, bitsMore); + return frac; + } + + // Conversion functions +#if POSIT_THROW_ARITHMETIC_EXCEPTION + int to_int() const { + if (iszero()) return 0; + if (isnar()) throw posit_nar{}; + return int(to_float()); + } + long to_long() const { + if (iszero()) return 0; + if (isnar()) throw posit_nar{}; + return long(to_double()); + } + long long to_long_long() const { + if (iszero()) return 0; + if (isnar()) throw posit_nar{}; + return long(to_long_double()); + } +#else + int to_int() const { + if (iszero()) return 0; + if (isnar()) return int(INFINITY); + return int(to_float()); + } + long to_long() const { + if (iszero()) return 0; + if (isnar()) return long(INFINITY); + return long(to_double()); + } + long long to_long_long() const { + if (iszero()) return 0; + if (isnar()) return (long long)(INFINITY); + return long(to_long_double()); + } +#endif + + float to_float() const { + if (iszero()) return 0.0f; + if (isnar()) return NAN; // INFINITY is not semantically correct. NaR is Not a Real and thus is more closely related to a NAN, or Not a Number + + uint8_t bits = ((_bits & 0x80) ? -_bits : _bits); + uint8_t remaining = 0; + int8_t m = decode_regime(bits, &remaining); +// std::cout << to_binary(bits, 8) << " : " << to_binary(remaining, 8) << " : "; + int regimeScale = (1 << es) * m; + float s = (float)(sign_value()); + float r = (m > 0 ? (float)(1 << regimeScale) : (1.0f / (float)(1 << -regimeScale))); + uint8_t ebits = extract_exponent(m, &remaining); +// std::cout << to_binary(ebits, 2) << " : " << to_binary(remaining, 8) << '\n'; + float e = float((uint32_t(1) << ebits)); + remaining |= 0x80; // set hidden bit + float f = fraction_value(remaining); +// std::cout << "regime value : " << r << '\n'; +// std::cout << "exponent value : " << e << '\n'; +// std::cout << "fraction value : " << f << '\n'; + + return s * r * e * f; + } + double to_double() const { + return (double)to_float(); + } + long double to_long_double() const { + return (long double)to_float(); + } + + // helper methods + constexpr posit& integer_assign(int rhs) { + // special case for speed as this is a common initialization + if (rhs == 0) { + _bits = 0x0; + return *this; + } + bool sign = (rhs < 0) ? true : false; + int v = sign ? -rhs : rhs; // project to positive side of the projective reals + uint8_t raw = 0; + if (v > 48) { // +-maxpos + raw = 0x7F; + } + else if (v < 2) { + raw = (v << 6); + } + else { + uint8_t mask = 0x40; + int8_t k = 6; + uint8_t fraction_bits = (v & 0xff); + while (!(fraction_bits & mask)) { + k--; + fraction_bits <<= 1; + } + fraction_bits = (fraction_bits ^ mask); + raw = (0x7F ^ (0x3F >> k)) | (fraction_bits >> (k + 1)); + + mask = 0x1 << k; //bitNPlusOne + if (mask & fraction_bits) { + if (((mask - 1) & fraction_bits) | ((mask << 1) & fraction_bits)) raw++; + } + } + + _bits = sign ? -raw : raw; + return *this; + } + posit& float_assign(float rhs) { + bool sign = false; + bool bitNPlusOne = 0, bitsMore = 0; + constexpr float _minpos = 5.9604644775390625e-08f; + constexpr float _maxpos = 16777216.0f; + + sign = (rhs < 0.0) ? true : false; + + constexpr int spfbits = std::numeric_limits::digits - 1; + internal::value v(rhs); + if (v.isinf() || v.isnan()) { + _bits = 0x80; + } + else if (rhs == 0) { + _bits = 0; + } + else if (rhs == 1.0f) { + _bits = 0x40; + } + else if (rhs == -1.0f) { + _bits = 0xC0; + } + else if (rhs >= _maxpos) { + _bits = 0x7F; + } + else if (rhs <= -_maxpos) { + _bits = 0x81; + } + else if (rhs <= _minpos && !sign) { + _bits = 0x01; + } + else if (rhs >= -_minpos && sign) { + _bits = 0xFF; + } + else if (rhs < -1 || rhs > 1) { + if (sign) { + rhs = -rhs; // project to positive reals to simplify computation + } + + if (rhs <= _minpos) { + _bits = 0x01; + } + else { // determine the regime + unsigned k = 1; //because k = m-1, we need to add back 1 + while (rhs >= 2) { + rhs *= 0.5; + k++; + } + + // rounding off regime bits + if (k > 6) { + _bits = 0x7F; + } + else { + int8_t fracLength = 6 - k; + uint8_t frac = (uint8_t)convertFraction(rhs, fracLength, &bitNPlusOne, &bitsMore); + uint_fast8_t regime = 0x7F - (0x7F >> k); + _bits = (regime + frac); + if (bitNPlusOne) _bits += ((_bits & 0x01) | bitsMore); + } + _bits = sign ? -_bits : _bits; + } + } + else if (rhs > -1 && rhs < 1) { + if (sign) { + rhs = -rhs; + } + unsigned k = 0; + while (rhs < 1) { + rhs *= 2; + k++; + } + // rounding off regime bits + if (k > 6) + _bits = 0x1; + else { + int8_t fracLength = 6 - k; + uint8_t frac = (uint8_t)convertFraction(rhs, fracLength, &bitNPlusOne, &bitsMore); + uint8_t regime = 0x40 >> k; + _bits = (regime + frac); + if (bitNPlusOne) _bits += ((_bits & 0x01) | bitsMore); + } + _bits = sign ? -_bits : _bits; + } + else { + //NaR - for NaN, INF and all other combinations + _bits = 0x80; + } + return *this; + } + + // I/O operators + friend std::ostream& operator<< (std::ostream& ostr, const posit& p); + friend std::istream& operator>> (std::istream& istr, posit& p); + + // posit - posit logic functions + friend bool operator==(const posit& lhs, const posit& rhs); + friend bool operator!=(const posit& lhs, const posit& rhs); + friend bool operator< (const posit& lhs, const posit& rhs); + friend bool operator> (const posit& lhs, const posit& rhs); + friend bool operator<=(const posit& lhs, const posit& rhs); + friend bool operator>=(const posit& lhs, const posit& rhs); + + friend bool operator< (const posit& lhs, double rhs); + friend bool operator< (double lhs, const posit& rhs); +}; + +// posit I/O operators +// generate a posit format ASCII format nbits.esxNN...NNp +inline std::ostream& operator<<(std::ostream& ostr, const posit& p) { + // to make certain that setw and left/right operators work properly + // we need to transform the posit into a string + std::stringstream ss; +#if POSIT_ERROR_FREE_IO_FORMAT + ss << NBITS_IS_8 << '.' << ES_IS_2 << 'x' << to_hex(p.get()) << 'p'; +#else + std::streamsize prec = ostr.precision(); + std::streamsize width = ostr.width(); + std::ios_base::fmtflags ff; + ff = ostr.flags(); + ss.flags(ff); + ss << std::setw(width) << std::setprecision(prec) << to_string(p, prec); // TODO: we need a true native serialization function +#endif + return ostr << ss.str(); +} + +// read an ASCII float or posit format: nbits.esxNN...NNp, for example: 32.2x80000000p +inline std::istream& operator>> (std::istream& istr, posit& p) { + std::string txt; + istr >> txt; + if (!parse(txt, p)) { + std::cerr << "unable to parse -" << txt << "- into a posit value\n"; + } + return istr; +} + +// convert a posit value to a string using "nar" as designation of NaR +inline std::string to_string(const posit& p, std::streamsize precision) { + if (p.isnar()) { + return std::string("nar"); + } + std::stringstream ss; + ss << std::setprecision(precision) << float(p); + return ss.str(); +} + +// posit - posit binary logic operators +inline bool operator==(const posit& lhs, const posit& rhs) { + return lhs._bits == rhs._bits; +} +inline bool operator!=(const posit& lhs, const posit& rhs) { + return !operator==(lhs, rhs); +} +inline bool operator< (const posit& lhs, const posit& rhs) { + return int8_t(lhs._bits) < int8_t(rhs._bits); +} +inline bool operator> (const posit& lhs, const posit& rhs) { + return operator< (rhs, lhs); +} +inline bool operator<=(const posit& lhs, const posit& rhs) { + return operator< (lhs, rhs) || operator==(lhs, rhs); +} +inline bool operator>=(const posit& lhs, const posit& rhs) { + return !operator< (lhs, rhs); +} + +/* base class has these operators: no need to specialize */ +inline posit operator+(const posit& lhs, const posit& rhs) { + posit result = lhs; + return result += rhs; +} +inline posit operator-(const posit& lhs, const posit& rhs) { + posit result = lhs; + return result -= rhs; +} + +// binary operator*() is provided by generic class + +#if POSIT_ENABLE_LITERALS +// posit - literal logic functions + +// posit - int logic operators +inline bool operator==(const posit& lhs, int rhs) { + return operator==(lhs, posit(rhs)); +} +inline bool operator!=(const posit& lhs, int rhs) { + return !operator==(lhs, posit(rhs)); +} +inline bool operator< (const posit& lhs, int rhs) { + return operator<(lhs, posit(rhs)); +} +inline bool operator> (const posit& lhs, int rhs) { + return operator< (posit(rhs), lhs); +} +inline bool operator<=(const posit& lhs, int rhs) { + return operator< (lhs, posit(rhs)) || operator==(lhs, posit(rhs)); +} +inline bool operator>=(const posit& lhs, int rhs) { + return !operator<(lhs, posit(rhs)); +} + +// int - posit logic operators +inline bool operator==(int lhs, const posit& rhs) { + return posit(lhs) == rhs; +} +inline bool operator!=(int lhs, const posit& rhs) { + return !operator==(posit(lhs), rhs); +} +inline bool operator< (int lhs, const posit& rhs) { + return operator<(posit(lhs), rhs); +} +inline bool operator> (int lhs, const posit& rhs) { + return operator< (posit(rhs), lhs); +} +inline bool operator<=(int lhs, const posit& rhs) { + return operator< (posit(lhs), rhs) || operator==(posit(lhs), rhs); +} +inline bool operator>=(int lhs, const posit& rhs) { + return !operator<(posit(lhs), rhs); +} + +inline bool operator< (const posit& lhs, double rhs) { + return int8_t(lhs._bits) < int8_t(posit(rhs)._bits); +} + +#endif // POSIT_ENABLE_LITERALS #endif // POSIT_FAST_POSIT_8_2 diff --git a/static/posit/specialized/posit_128_2.cpp b/static/posit/specialized/posit_128_2.cpp index 7f4d94735..f0d2769c5 100644 --- a/static/posit/specialized/posit_128_2.cpp +++ b/static/posit/specialized/posit_128_2.cpp @@ -1,6 +1,6 @@ // posit_128_2.cpp: test suite runner for specialized 128-bit posit<128,2> // -// Copyright (C) 2017-2022 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. #include diff --git a/static/posit/specialized/posit_128_4.cpp b/static/posit/specialized/posit_128_4.cpp index c05ceca58..ff0935c53 100644 --- a/static/posit/specialized/posit_128_4.cpp +++ b/static/posit/specialized/posit_128_4.cpp @@ -1,14 +1,10 @@ // posit_128_4.cpp: test suite runner for specialized 128-bit posit<128,4> // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. -#if defined(_MSC_VER) -#pragma warning(disable : 5045) // Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified -#pragma warning(disable : 4514) // unreferenced inline function has been removed -#pragma warning(disable : 4820) // bytes padding added after data member -#pragma warning(disable : 4710) // function not inlined -#endif +#include + // Configure the posit template environment // first: enable fast specialized posit<128,4> //#define POSIT_FAST_SPECIALIZATION // turns on all fast specializations diff --git a/static/posit/specialized/posit_16_1.cpp b/static/posit/specialized/posit_16_1.cpp index ef7001338..64a9d9734 100644 --- a/static/posit/specialized/posit_16_1.cpp +++ b/static/posit/specialized/posit_16_1.cpp @@ -1,14 +1,10 @@ // posit_16_1.cpp: test suite runner for specialized posit<16,1> // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. -#if defined(_MSC_VER) -#pragma warning(disable : 5045) // Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified -#pragma warning(disable : 4514) // unreferenced inline function has been removed -#pragma warning(disable : 4820) // bytes padding added after data member -#pragma warning(disable : 4710) // function not inlined -#endif +#include + // Configure the posit template environment // first: enable fast specialized posit<16,1> //#define POSIT_FAST_SPECIALIZATION @@ -16,7 +12,6 @@ // second: enable posit arithmetic exceptions #define POSIT_THROW_ARITHMETIC_EXCEPTION 1 #include -//#include #include #include @@ -28,6 +23,10 @@ // It is the responsibility of the regression test to organize the tests in a quartile progression. //#undef REGRESSION_LEVEL_OVERRIDE #ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 #define REGRESSION_LEVEL_1 1 #define REGRESSION_LEVEL_2 1 #define REGRESSION_LEVEL_3 1 @@ -62,16 +61,16 @@ try { posit a, b, c; fa = 2; fb = 1; a = fa; b = fb; c = a; c += b; - cout << hex_format(a) << " + " << hex_format(b) << " = " << hex_format(a + b) << "(" << (fa + fb) << ") " << hex_format(c) << "(" << c << ")" << endl; + std::cout << hex_format(a) << " + " << hex_format(b) << " = " << hex_format(a + b) << "(" << (fa + fb) << ") " << hex_format(c) << "(" << c << ")" << '\n'; fa = 2; fb = -1; a = fa; b = fb; c = a; c += b; - cout << hex_format(a) << " + " << hex_format(b) << " = " << hex_format(a + b) << "(" << (fa + fb) << ") " << hex_format(c) << "(" << c << ")" << endl; + std::cout << hex_format(a) << " + " << hex_format(b) << " = " << hex_format(a + b) << "(" << (fa + fb) << ") " << hex_format(c) << "(" << c << ")" << '\n'; fa = -2; fb = 1; a = fa; b = fb; c = a; c += b; - cout << hex_format(a) << " + " << hex_format(b) << " = " << hex_format(a + b) << "(" << (fa + fb) << ") " << hex_format(c) << "(" << c << ")" << endl; + std::cout << hex_format(a) << " + " << hex_format(b) << " = " << hex_format(a + b) << "(" << (fa + fb) << ") " << hex_format(c) << "(" << c << ")" << '\n'; fa = -2; fb = -1; a = fa; b = fb; c = a; c += b; - cout << hex_format(a) << " + " << hex_format(b) << " = " << hex_format(a + b) << "(" << (fa + fb) << ") " << hex_format(c) << "(" << c << ")" << endl; + std::cout << hex_format(a) << " + " << hex_format(b) << " = " << hex_format(a + b) << "(" << (fa + fb) << ") " << hex_format(c) << "(" << c << ")" << '\n'; nrOfFailedTestCases += ReportTestResult(VerifyBinaryOperatorThroughRandoms(bReportIndividualTestCases, OPCODE_IPA, 100), tag, "+= (native) "); nrOfFailedTestCases += ReportTestResult(VerifyBinaryOperatorThroughRandoms(bReportIndividualTestCases, OPCODE_IPS, 100), tag, "-= (native) "); @@ -90,19 +89,18 @@ try { a = -1; b = -1; ++b; testLogicOperators(a, b); - a.set_raw_bits(0xfffd); b.set_raw_bits(0xfffe); + a.setbits(0xfffd); b.setbits(0xfffe); testLogicOperators(a, b); uint16_t v1 = 0x7fff; uint16_t v2 = 0x8001; - cout << v1 << " vs " << int16_t(v1) << endl; - cout << v2 << " vs " << int16_t(v2) << endl; - a.set_raw_bits(v1); b.set_raw_bits(v2); + std::cout << v1 << " vs " << int16_t(v1) << '\n'; + std::cout << v2 << " vs " << int16_t(v2) << '\n'; + a.setbits(v1); b.setbits(v2); testLogicOperators(a, b); testLogicOperators(b, a); - - cout << nrOfFailedTestCases << " number of failures\n"; + std::cout << nrOfFailedTestCases << " number of failures\n"; nrOfFailedTestCases = 0; // ignore failures in manual testing #else diff --git a/static/posit/specialized/posit_16_2.cpp b/static/posit/specialized/posit_16_2.cpp index 98355c90f..8a563c72b 100644 --- a/static/posit/specialized/posit_16_2.cpp +++ b/static/posit/specialized/posit_16_2.cpp @@ -1,6 +1,6 @@ // posit_16_2.cpp: test suite runner for specialized posit<16,2> // -// Copyright (C) 2017-2022 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. #include @@ -12,7 +12,6 @@ // second: enable posit arithmetic exceptions #define POSIT_THROW_ARITHMETIC_EXCEPTION 1 #include -//#include #include #include @@ -24,6 +23,10 @@ // It is the responsibility of the regression test to organize the tests in a quartile progression. //#undef REGRESSION_LEVEL_OVERRIDE #ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 #define REGRESSION_LEVEL_1 1 #define REGRESSION_LEVEL_2 1 #define REGRESSION_LEVEL_3 1 @@ -35,14 +38,14 @@ try { using namespace sw::universal; // configure a posit<16,2> - constexpr size_t nbits = 16; - constexpr size_t es = 2; + constexpr size_t nbits = NBITS_IS_16; + constexpr size_t es = ES_IS_2; int nrOfFailedTestCases = 0; bool bReportIndividualTestCases = false; size_t RND_TEST_CASES = 10000; -#if POSIT_FAST_POSIT_16_1 +#if POSIT_FAST_POSIT_16_2 std::cout << "Fast specialization posit<16,2> configuration tests\n"; #else std::cout << "Standard posit<16,2> configuration tests\n"; @@ -58,16 +61,16 @@ try { posit a, b, c; fa = 2; fb = 1; a = fa; b = fb; c = a; c += b; - cout << hex_format(a) << " + " << hex_format(b) << " = " << hex_format(a + b) << "(" << (fa + fb) << ") " << hex_format(c) << "(" << c << ")" << endl; + std::cout << to_binary(a) << " + " << to_binary(b) << " = " << to_binary(a + b) << "(" << (fa + fb) << ") " << to_binary(c) << "(" << c << ")" << '\n'; fa = 2; fb = -1; a = fa; b = fb; c = a; c += b; - cout << hex_format(a) << " + " << hex_format(b) << " = " << hex_format(a + b) << "(" << (fa + fb) << ") " << hex_format(c) << "(" << c << ")" << endl; + std::cout << to_binary(a) << " + " << to_binary(b) << " = " << to_binary(a + b) << "(" << (fa + fb) << ") " << to_binary(c) << "(" << c << ")" << '\n'; fa = -2; fb = 1; a = fa; b = fb; c = a; c += b; - cout << hex_format(a) << " + " << hex_format(b) << " = " << hex_format(a + b) << "(" << (fa + fb) << ") " << hex_format(c) << "(" << c << ")" << endl; + std::cout << to_binary(a) << " + " << to_binary(b) << " = " << to_binary(a + b) << "(" << (fa + fb) << ") " << to_binary(c) << "(" << c << ")" << '\n'; fa = -2; fb = -1; a = fa; b = fb; c = a; c += b; - cout << hex_format(a) << " + " << hex_format(b) << " = " << hex_format(a + b) << "(" << (fa + fb) << ") " << hex_format(c) << "(" << c << ")" << endl; + std::cout << to_binary(a) << " + " << to_binary(b) << " = " << to_binary(a + b) << "(" << (fa + fb) << ") " << to_binary(c) << "(" << c << ")" << '\n'; nrOfFailedTestCases += ReportTestResult(VerifyBinaryOperatorThroughRandoms(bReportIndividualTestCases, OPCODE_IPA, 100), tag, "+= (native) "); nrOfFailedTestCases += ReportTestResult(VerifyBinaryOperatorThroughRandoms(bReportIndividualTestCases, OPCODE_IPS, 100), tag, "-= (native) "); @@ -86,19 +89,18 @@ try { a = -1; b = -1; ++b; testLogicOperators(a, b); - a.set_raw_bits(0xfffd); b.set_raw_bits(0xfffe); + a.setbits(0xfffd); b.setbits(0xfffe); testLogicOperators(a, b); uint16_t v1 = 0x7fff; uint16_t v2 = 0x8001; - cout << v1 << " vs " << int16_t(v1) << endl; - cout << v2 << " vs " << int16_t(v2) << endl; - a.set_raw_bits(v1); b.set_raw_bits(v2); + std::cout << v1 << " vs " << int16_t(v1) << '\n'; + std::cout << v2 << " vs " << int16_t(v2) << '\n'; + a.setbits(v1); b.setbits(v2); testLogicOperators(a, b); testLogicOperators(b, a); - - cout << nrOfFailedTestCases << " number of failures\n"; + std::cout << nrOfFailedTestCases << " number of failures\n"; nrOfFailedTestCases = 0; // ignore failures in manual testing #else diff --git a/static/posit/specialized/posit_256_2.cpp b/static/posit/specialized/posit_256_2.cpp index 0c5f107e7..fa0401738 100644 --- a/static/posit/specialized/posit_256_2.cpp +++ b/static/posit/specialized/posit_256_2.cpp @@ -1,6 +1,6 @@ // posit_256_2.cpp: test suite runner for fast specialized 256-bit posit<256,2> // -// Copyright (C) 2017-2022 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. #include diff --git a/static/posit/specialized/posit_256_5.cpp b/static/posit/specialized/posit_256_5.cpp index 94d4b64ac..0457228dd 100644 --- a/static/posit/specialized/posit_256_5.cpp +++ b/static/posit/specialized/posit_256_5.cpp @@ -1,14 +1,10 @@ // posit_256_5.cpp: test suite runner for fast specialized 256-bit posit<256,5> // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. -#if defined(_MSC_VER) -#pragma warning(disable : 5045) // Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified -#pragma warning(disable : 4514) // unreferenced inline function has been removed -#pragma warning(disable : 4820) // bytes padding added after data member -#pragma warning(disable : 4710) // function not inlined -#endif +#include + // Configure the posit template environment // first: enable fast specialized posit<256,5> //#define POSIT_FAST_SPECIALIZATION // turns on all fast specializations diff --git a/static/posit/specialized/posit_2_0.cpp b/static/posit/specialized/posit_2_0.cpp index 186a8f843..7ea86dada 100644 --- a/static/posit/specialized/posit_2_0.cpp +++ b/static/posit/specialized/posit_2_0.cpp @@ -1,14 +1,10 @@ // posit_2_0.cpp: test suite runner for specialized 2-bit posits based on look-up tables // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. -#if defined(_MSC_VER) -#pragma warning(disable : 5045) // Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified -#pragma warning(disable : 4514) // unreferenced inline function has been removed -#pragma warning(disable : 4820) // bytes padding added after data member -#pragma warning(disable : 4710) // function not inlined -#endif +#include + // enable fast specialized posit<2,0> //#define POSIT_FAST_SPECIALIZATION #define POSIT_FAST_POSIT_2_0 1 diff --git a/static/posit/specialized/posit_32_2.cpp b/static/posit/specialized/posit_32_2.cpp index 2c85042e3..3ed5dd463 100644 --- a/static/posit/specialized/posit_32_2.cpp +++ b/static/posit/specialized/posit_32_2.cpp @@ -1,14 +1,10 @@ // posit_32_2.cpp: test suite runner for fast specialized posit<32,2> // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. -#if defined(_MSC_VER) -#pragma warning(disable : 5045) // Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified -#pragma warning(disable : 4514) // unreferenced inline function has been removed -#pragma warning(disable : 4820) // bytes padding added after data member -#pragma warning(disable : 4710) // function not inlined -#endif +#include + // Configure the posit template environment // first: enable fast specialized posit<32,2> //#define POSIT_FAST_SPECIALIZATION // turns on all fast specializations diff --git a/static/posit/specialized/posit_3_0.cpp b/static/posit/specialized/posit_3_0.cpp index c7a2be8c9..a8946660c 100644 --- a/static/posit/specialized/posit_3_0.cpp +++ b/static/posit/specialized/posit_3_0.cpp @@ -1,14 +1,10 @@ // posit_3_0.cpp: test suite runner for specialized 3-bit posits based on look-up tables // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. -#if defined(_MSC_VER) -#pragma warning(disable : 5045) // Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified -#pragma warning(disable : 4514) // unreferenced inline function has been removed -#pragma warning(disable : 4820) // bytes padding added after data member -#pragma warning(disable : 4710) // function not inlined -#endif +#include + // enable fast specialized posit<3,0> //#define POSIT_FAST_SPECIALIZATION #define POSIT_FAST_POSIT_3_0 1 diff --git a/static/posit/specialized/posit_48_2.cpp b/static/posit/specialized/posit_48_2.cpp index cc010342f..37d87558a 100644 --- a/static/posit/specialized/posit_48_2.cpp +++ b/static/posit/specialized/posit_48_2.cpp @@ -1,14 +1,10 @@ // posit_48_2.cpp: test suite runner for specialized extended standard 48-bit posit<48,2> // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. -#if defined(_MSC_VER) -#pragma warning(disable : 5045) // Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified -#pragma warning(disable : 4514) // unreferenced inline function has been removed -#pragma warning(disable : 4820) // bytes padding added after data member -#pragma warning(disable : 4710) // function not inlined -#endif +#include + // Configure the posit template environment // first: enable fast specialized posit<48,2> #define POSIT_FAST_POSIT_48_2 1 // TODO: fast posit<48,2> not implemented yet diff --git a/static/posit/specialized/posit_4_0.cpp b/static/posit/specialized/posit_4_0.cpp index 766f89c17..d1f75f243 100644 --- a/static/posit/specialized/posit_4_0.cpp +++ b/static/posit/specialized/posit_4_0.cpp @@ -1,14 +1,10 @@ // posit_4_0.cpp: test suite runner for specialized 4-bit posits based on look-up tables // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. -#if defined(_MSC_VER) -#pragma warning(disable : 5045) // Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified -#pragma warning(disable : 4514) // unreferenced inline function has been removed -#pragma warning(disable : 4820) // bytes padding added after data member -#pragma warning(disable : 4710) // function not inlined -#endif +#include + // enable fast specialized posit<4,0> //#define POSIT_FAST_SPECIALIZATION #define POSIT_FAST_POSIT_4_0 1 diff --git a/static/posit/specialized/posit_64_2.cpp b/static/posit/specialized/posit_64_2.cpp index 3349dd378..837271024 100644 --- a/static/posit/specialized/posit_64_2.cpp +++ b/static/posit/specialized/posit_64_2.cpp @@ -1,9 +1,10 @@ // posit_64_2.cpp: test suite runner for fast specialized posit<64,2> // -// Copyright (C) 2017-2022 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. #include + // Configure the posit template environment // first: enable fast specialized posit<64,2> #define POSIT_FAST_POSIT_64_2 1 // TODO: fast posit<64,2> not implemented yet diff --git a/static/posit/specialized/posit_64_3.cpp b/static/posit/specialized/posit_64_3.cpp index bd09fcebd..f73f54aee 100644 --- a/static/posit/specialized/posit_64_3.cpp +++ b/static/posit/specialized/posit_64_3.cpp @@ -1,14 +1,10 @@ // posit_64_3.cpp: test suite runner for fast specialized posit<64,3> // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. -#if defined(_MSC_VER) -#pragma warning(disable : 5045) // Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified -#pragma warning(disable : 4514) // unreferenced inline function has been removed -#pragma warning(disable : 4820) // bytes padding added after data member -#pragma warning(disable : 4710) // function not inlined -#endif +#include + // Configure the posit template environment // first: enable fast specialized posit<64,3> #define POSIT_FAST_POSIT_64_3 1 // TODO: fast posit<64,3> not implemented yet diff --git a/static/posit/specialized/posit_8_0.cpp b/static/posit/specialized/posit_8_0.cpp index e3dd84114..8c73787dd 100644 --- a/static/posit/specialized/posit_8_0.cpp +++ b/static/posit/specialized/posit_8_0.cpp @@ -1,14 +1,10 @@ // posit_8_0.cpp: test suite runner for fast specialized posit<8,0> // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. -#if defined(_MSC_VER) -#pragma warning(disable : 5045) // Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified -#pragma warning(disable : 4514) // unreferenced inline function has been removed -#pragma warning(disable : 4820) // bytes padding added after data member -#pragma warning(disable : 4710) // function not inlined -#endif +#include + // Configure the posit template environment // first: enable fast specialized posit<8,0> #define POSIT_FAST_POSIT_8_0 1 diff --git a/static/posit/specialized/posit_8_1.cpp b/static/posit/specialized/posit_8_1.cpp index 6d4465db5..b6af3acbf 100644 --- a/static/posit/specialized/posit_8_1.cpp +++ b/static/posit/specialized/posit_8_1.cpp @@ -1,6 +1,6 @@ // posit_8_1.cpp: test suite runner for fast specialized posit<8,1> // -// Copyright (C) 2017-2022 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. #include @@ -29,6 +29,22 @@ void GenerateValues() { } } +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 0 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + int main() try { using namespace sw::universal; diff --git a/static/posit/specialized/posit_8_2.cpp b/static/posit/specialized/posit_8_2.cpp index 5068a7250..866fb26ef 100644 --- a/static/posit/specialized/posit_8_2.cpp +++ b/static/posit/specialized/posit_8_2.cpp @@ -1,6 +1,6 @@ // posit_8_2.cpp: test suite runner for fast specialized posit<8,2> // -// Copyright (C) 2017-2022 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. #include @@ -18,6 +18,22 @@ specialized small standard 8-bit posit with es = 2 */ +// Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override +#define MANUAL_TESTING 1 +// REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity +// It is the responsibility of the regression test to organize the tests in a quartile progression. +//#undef REGRESSION_LEVEL_OVERRIDE +#ifndef REGRESSION_LEVEL_OVERRIDE +#undef REGRESSION_LEVEL_1 +#undef REGRESSION_LEVEL_2 +#undef REGRESSION_LEVEL_3 +#undef REGRESSION_LEVEL_4 +#define REGRESSION_LEVEL_1 1 +#define REGRESSION_LEVEL_2 1 +#define REGRESSION_LEVEL_3 1 +#define REGRESSION_LEVEL_4 1 +#endif + void GenerateValues() { using namespace sw::universal; constexpr unsigned int NR_POSITS = 256; @@ -35,14 +51,14 @@ try { // no randoms, 8-bit posits can be done exhaustively - constexpr size_t nbits = 8; - constexpr size_t es = 2; + constexpr size_t nbits = NBITS_IS_8; + constexpr size_t es = ES_IS_2; int nrOfFailedTestCases = 0; bool bReportIndividualTestCases = false; std::string tag = " posit<8,2>"; -#if POSIT_FAST_POSIT_8_1 +#if POSIT_FAST_POSIT_8_2 std::cout << "Fast specialization posit<8,2> configuration tests\n"; #else std::cout << "Standard posit<8,2> configuration tests\n"; @@ -73,6 +89,30 @@ try { test = "is positive"; nrOfFailedTestCases += ReportCheck(tag, test, p.ispos()); + p.setbits(0x64); + std::cout << std::setw(4) << 0x64 << " : " << color_print(p) << " : " << p << '\n'; + p.setbits(0x65); + std::cout << std::setw(4) << 0x65 << " : " << color_print(p) << " : " << p << '\n'; + p.setbits(0x66); + std::cout << std::setw(4) << 0x66 << " : " << color_print(p) << " : " << p << '\n'; + + // 124: 01111100 01111100 4 0 16 111110- 0- --- 65536 8.2x7Cp + // 125: 01111101 01111101 4 0 18 111110 - 1 - -- - 262144 + p.setbits(0x7C); + std::cout << std::setw(4) << 0x7C << " : " << color_print(p) << " : " << p << '\n'; + p.setbits(0x7D); + std::cout << std::setw(4) << 0x7D << " : " << color_print(p) << " : " << p << '\n'; + float f = float(p); + +// goto epilog; + + for (unsigned i = 0; i < 128; ++i) { + p.setbits(i); + std::cout << std::setw(4) << i << " : " << color_print(p) << " : " << p << '\n'; + } + + goto epilog; + // logic tests std::cout << "Logic operator tests\n"; nrOfFailedTestCases += ReportTestResult( VerifyPositLogicEqual (), tag, " == (native) "); @@ -119,6 +159,7 @@ try { nrOfFailedTestCases += ReportTestResult( VerifyPowerFunction (bReportIndividualTestCases), tag, "pow "); +epilog: return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS); } catch (char const* msg) { diff --git a/static/posit/specialized/quire_32_2.cpp b/static/posit/specialized/quire_32_2.cpp index a09ab79c7..7348abcac 100644 --- a/static/posit/specialized/quire_32_2.cpp +++ b/static/posit/specialized/quire_32_2.cpp @@ -1,14 +1,10 @@ // quire_32_2.cpp: test suite runner for dot product and fused dot product functionality tests for fast specialized posit<32,2> // -// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. -#if defined(_MSC_VER) -#pragma warning(disable : 5045) // Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified -#pragma warning(disable : 4514) // unreferenced inline function has been removed -#pragma warning(disable : 4820) // bytes padding added after data member -#pragma warning(disable : 4710) // function not inlined -#endif +#include + // Configure the posit template environment // first: enable fast specialized posit<32,2> //#define POSIT_FAST_SPECIALIZATION // turns on all fast specializations