From 7c420ecd8400c36c8e02e7344fc6f356f0a791c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mallas=C3=A9n=20Quintana?= Date: Fri, 22 Dec 2023 00:37:28 +0100 Subject: [PATCH] Implement specialized posit<16,2> sub, mul, div (#404) Div is not working 100% yet --- .../number/posit/specialized/posit_16_2.hpp | 253 +++++++++--------- static/posit/specialized/posit_16_2.cpp | 90 ++++--- 2 files changed, 181 insertions(+), 162 deletions(-) diff --git a/include/universal/number/posit/specialized/posit_16_2.hpp b/include/universal/number/posit/specialized/posit_16_2.hpp index fb06650ac..cc8f7df76 100644 --- a/include/universal/number/posit/specialized/posit_16_2.hpp +++ b/include/universal/number/posit/specialized/posit_16_2.hpp @@ -162,18 +162,13 @@ class posit { // extract remaining fraction bits uint32_t lhs_fraction = ((0x4000 | remaining << 1) & 0x7FFF) << 16; // 0x4000 is the hidden bit int8_t shiftRight = k; - // std::cout << "lhs frac: " << to_binary(lhs_fraction, 32) << '\n'; // adjust shift and extract fraction bits of rhs - // std::cout << "shiftRight: " << int(shiftRight) << '\n'; extractAddand(rhs, shiftRight, remaining); - // std::cout << "extractAddand shiftRight: " << int(shiftRight) << " remaining: " << to_binary(remaining, 16, true) << '\n'; uint32_t rhs_fraction = ((0x4000 | remaining << 1) & 0x7FFF) << 16; // 0x4000 is the hidden bit - // std::cout << "rhs frac : " << to_binary(rhs_fraction, 32) << '\n'; // This is 4kZ + expZ; (where kZ=kA-kB and expZ=expA-expB) shiftRight = (shiftRight << 2) + exp - (remaining >> 13); - // std::cout << "shiftRight : " << int(shiftRight) << '\n'; if (shiftRight == 0) { // we can simplify the computation lhs_fraction += rhs_fraction; // this will always produce a carry @@ -186,9 +181,7 @@ class posit { } else { (shiftRight > 15) ? (rhs_fraction = 0) : (rhs_fraction >>= shiftRight); // frac16B >>= shiftRight - // std::cout << "rhs frac : " << to_binary(rhs_fraction, 32) << '\n'; lhs_fraction += rhs_fraction; - // std::cout << "lhs frac : " << to_binary(lhs_fraction, 32) << '\n'; bool rcarry = (0x8000'0000 & lhs_fraction) != 0; // first left bit if (rcarry) { @@ -201,8 +194,6 @@ class posit { } } - // std::cout << "lhs frac : " << to_binary(lhs_fraction, 32) << '\n'; - _bits = round(k, exp, lhs_fraction); if (sign) _bits = -_bits & 0xFFFF; return *this; @@ -240,49 +231,53 @@ class posit { } // decode the regime of lhs - int8_t m = 0; // pattern length - uint16_t remaining = 0; - decode_regime(lhs, m, remaining); + int8_t k; // regime numerical value + uint16_t remaining; // Remaining bits after the regime: 00..0 + decode_regime(lhs, k, remaining); // extract the exponent - uint16_t exp = remaining >> 13; + uint16_t exp = remaining >> 13; // 16 - 1(sign) - 2(exponent) - uint32_t lhs_fraction = (0x4000 | remaining << 1) << 16; - int8_t shiftRight = m; + uint32_t lhs_fraction = ((0x4000 | remaining << 1) & 0x7FFF) << 16; // 0x4000 is the hidden bit + int8_t shiftRight = k; // adjust shift and extract fraction bits of rhs extractAddand(rhs, shiftRight, remaining); - uint32_t rhs_fraction = (0x4000 | remaining << 1) << 16; + uint32_t rhs_fraction = ((0x4000 | remaining << 1) & 0x7FFF) << 16; // 0x4000 is the hidden bit // align the fractions for subtraction + // This is 4kZ + expZ; (where kZ=kA-kB and expZ=expA-expB) shiftRight = (shiftRight << 2) + exp - (remaining >> 13); - if (shiftRight != 0) { - if (shiftRight >= 29) { - _bits = lhs; - if (sign) _bits = -_bits & 0xFFFF; - return *this; - } - else { - rhs_fraction >>= shiftRight; - } + if (shiftRight > 31) { + _bits = lhs; + if (sign) _bits = -_bits & 0xFFFF; + return *this; } else { rhs_fraction >>= shiftRight; } + lhs_fraction -= rhs_fraction; - while ((lhs_fraction >> 29) == 0) { - --m; - lhs_fraction <<= 2; + while ((lhs_fraction >> 27) == 0) { + --k; + lhs_fraction <<= 4; } + bool ecarry = (0x4000'0000 & lhs_fraction) != 0; - if (!ecarry) { - if (exp == 0) --m; - exp ^= 1; + while (!ecarry) { + if (exp == 0) { + --k; + exp = 3; + } + else { + --exp; + } lhs_fraction <<= 1; + ecarry = (0x4000'0000 & lhs_fraction) != 0; } - _bits = round(m, exp, lhs_fraction); + _bits = round(k, exp, lhs_fraction); if (sign) _bits = -_bits & 0xFFFF; return *this; } @@ -310,35 +305,38 @@ class posit { rhs = rhs & sign_mask ? -rhs : rhs; // decode the regime of lhs - int8_t m = 0; // pattern length - uint16_t remaining = 0; - decode_regime(lhs, m, remaining); + int8_t k = 0; // regime numerical value + uint16_t remaining; // Remaining bits after the regime: 00..0 + decode_regime(lhs, k, remaining); // extract the exponent - int32_t exp = remaining >> 14; + int32_t exp = remaining >> 13; // 16 - 1(sign) - 2(exponent) - // add the hidden bit - uint32_t lhs_fraction = (0x4000 | remaining); + // extract remaining fraction bits + uint16_t lhs_fraction = (0x4000 | remaining << 1) & 0x7FFF; // 0x4000 is the hidden bit // adjust shift and extract fraction bits of rhs - extractMultiplicand(rhs, m, remaining); - exp += (remaining >> 14); - uint32_t rhs_fraction = (0x4000 | remaining); - uint32_t result_fraction = lhs_fraction * rhs_fraction; - //std::cout << "fbits 0x" << std::hex << result_fraction << std::dec << std::endl; + extractMultiplicand(rhs, k, remaining); + exp += (remaining >> 13); + uint16_t rhs_fraction = (0x4000 | remaining << 1) & 0x7FFF; // 0x4000 is the hidden bit + uint32_t result_fraction = (uint32_t) lhs_fraction * rhs_fraction; - if (exp > 1) { - ++m; - exp ^= 0x2; + if (exp > 3) { + ++k; + exp &= 0x3; } + bool rcarry = bool(result_fraction & 0x20000000); if (rcarry) { - if (exp) m++; - exp ^= 0x1; + exp++; + if (exp > 3) { + ++k; + exp &= 0x3; + } result_fraction >>= 1; } // round - _bits = adjustAndRound(m, exp, result_fraction); + _bits = adjustAndRound(k, exp, result_fraction); if (sign) _bits = -_bits & 0xFFFF; return *this; } @@ -375,21 +373,21 @@ class posit { rhs = rhs & sign_mask ? -rhs : rhs; // decode the regime of lhs - int8_t m = 0; // pattern length - uint16_t remaining = 0; - decode_regime(lhs, m, remaining); + int8_t k; // regime numerical value + uint16_t remaining; // Remaining bits after the regime: 00..0 + decode_regime(lhs, k, remaining); // extract the exponent - int32_t exp = remaining >> 14; + int32_t exp = remaining >> 13; // 16 - 1(sign) - 2(exponent) - // extract the fraction - uint16_t lhs_fraction = (0x4000 | remaining); - uint32_t fraction = lhs_fraction << 14; + // extract remaining fraction bits + uint16_t lhs_fraction = (0x4000 | remaining << 1) & 0x7FFF; // 0x4000 is the hidden bit + uint32_t fraction = (uint32_t) lhs_fraction << 14; // adjust shift and extract fraction bits of rhs - extractDividand(rhs, m, remaining); - exp -= remaining >> 14; - uint16_t rhs_fraction = (0x4000 | remaining); + extractDividand(rhs, k, remaining); + exp -= remaining >> 13; + uint16_t rhs_fraction = (0x4000 | remaining << 1) & 0x7FFF; // 0x4000 is the hidden bit div_t result = div(fraction, rhs_fraction); uint32_t result_fraction = result.quot; @@ -397,20 +395,25 @@ class posit { // adjust the exponent if needed if (exp < 0) { - exp = 0x01; - --m; + exp += 4; + --k; } if (result_fraction != 0) { bool rcarry = result_fraction >> 14; // this is the hidden bit (14th bit), extreme right bit is bit 0 if (!rcarry) { - if (exp == 0) --m; - exp ^= 0x01; + if (exp == 0) { + --k; + exp = 3; + } + else { + --exp; + } result_fraction <<= 1; } } // round - _bits = divRound(m, exp, result_fraction, remainder != 0); + _bits = divRound(k, exp, result_fraction, remainder != 0); if (sign) _bits = -_bits & 0xFFFF; return *this; @@ -698,35 +701,37 @@ class posit { remaining &= 0x7FFF; } } - inline void extractMultiplicand(const uint16_t bits, int8_t& m, uint16_t& remaining) const { + // TODO: This is the same as decode_regime except the initialization of k. Can we combine them? + inline void extractMultiplicand(const uint16_t bits, int8_t& k, uint16_t& remaining) const { remaining = (bits << 2) & 0xFFFF; if (bits & 0x4000) { // positive regimes while (remaining >> 15) { - ++m; + ++k; remaining = (remaining << 1) & 0xFFFF; } } else { // negative regimes - --m; + --k; while (!(remaining >> 15)) { - --m; + --k; remaining = (remaining << 1) & 0xFFFF; } remaining &= 0x7FFF; } } - inline void extractDividand(const uint16_t bits, int8_t& m, uint16_t& remaining) const { + // TODO: This is the same as extractAddand. Can we combine them? + inline void extractDividand(const uint16_t bits, int8_t& k, uint16_t& remaining) const { remaining = (bits << 2) & 0xFFFF; if (bits & 0x4000) { // positive regimes while (remaining >> 15) { - --m; + --k; remaining = (remaining << 1) & 0xFFFF; } } else { // negative regimes - ++m; + ++k; while (!(remaining >> 15)) { - ++m; + ++k; remaining = (remaining << 1) & 0xFFFF; } remaining &= 0x7FFF; @@ -743,20 +748,13 @@ class posit { scale = int16_t(k) + 1; regime = 0x7FFF - (0x7FFF >> scale); } - //std::cout << "scale : " << scale << '\n'; - //std::cout << "regime : " << to_binary(regime, 16) << '\n'; - //std::cout << "exponent : " << to_binary(exp, 16) << '\n'; if (scale > 14) { bits = k < 0 ? 0x0001 : 0x7FFF; // minpos and maxpos. exp and frac do not matter } else { - //std::cout << "fraction in : " << to_binary(fraction, 32) << '\n'; - fraction = (fraction & 0x3FFFFFFF) >> (scale + 2); // remove both carry bits, 2 bits exp - //std::cout << "fraction out : " << to_binary(fraction, 32) << '\n'; uint16_t final_fbits = uint16_t(fraction >> 16); - //std::cout << "fraction bits: " << to_binary(final_fbits, 16) << '\n'; bool bitNPlusOne = false; uint16_t moreBits = 0; if (scale <= 12) { @@ -779,10 +777,6 @@ class posit { } } - // std::cout << "composite regime : " << to_binary(regime, 16) << '\n'; - // std::cout << "composite exponent : " << to_binary(exp) << '\n'; - // std::cout << "composite fraction : " << to_binary(final_fbits) << '\n'; - bits = uint16_t(regime) + uint16_t(exp) + uint16_t(final_fbits); // n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even if (bitNPlusOne) { @@ -792,84 +786,103 @@ class posit { } return bits; } - inline uint16_t divRound(const int8_t m, uint16_t exp, uint32_t fraction, bool nonZeroRemainder) const { - uint16_t scale, regime, bits; - if (m < 0) { - scale = (-m & 0xFFFF); + inline uint16_t divRound(const int8_t k, uint16_t exp, uint32_t fraction, bool nonZeroRemainder) const { + int16_t scale; + uint16_t regime, bits; + if (k < 0) { + scale = (-k & 0xFFFF); regime = 0x4000 >> scale; } else { - scale = m + 1; + scale = int16_t(k) + 1; regime = 0x7FFF - (0x7FFF >> scale); } if (scale > 14) { - bits = m < 0 ? 0x0001 : 0x7FFF; // minpos and maxpos + bits = k < 0 ? 0x0001 : 0x7FFF; // minpos and maxpos. exp and frac do not matter } else { - fraction &= 0x3FFF; // remove both carry bits - uint16_t final_fbits = uint16_t(fraction >> (scale + 1)); + fraction &= 0x3FFF; //remove carry and rcarry bits and shift to correct position + uint16_t final_fbits = uint16_t(fraction >> (scale + 2)); bool bitNPlusOne = false; - if (scale != 14) { - bitNPlusOne = bool((fraction >> scale) & 0x1); - } - else if (final_fbits > 0) { - final_fbits = 0; - } - if (scale == 14 && exp != 0) { - bitNPlusOne = true; - exp = 0; + uint16_t moreBits = 0; + if (scale <= 12) { + bitNPlusOne = bool((fraction >> (scale + 1)) & 0x1); + exp <<= (12 - scale); + if (bitNPlusOne && (((1 << (scale + 1)) - 1) & fraction)) { + moreBits = 0x0001; + } } else { - exp <<= (13 - scale); + if (scale == 14) { + bitNPlusOne = bool(exp & 0x2); + moreBits = exp & 0x1; + exp = 0; + } + else if (scale == 13) { + bitNPlusOne = bool(exp & 0x1); + exp >>= 1; + } + if (final_fbits > 0) { + final_fbits = 0; + moreBits = 0x0001; + } } + bits = uint16_t(regime) + uint16_t(exp) + uint16_t(final_fbits); if (bitNPlusOne) { - uint16_t moreBits = (fraction & ((1 << scale) - 1)) ? 0x0001 : 0x0000; if (nonZeroRemainder) moreBits = 0x0001; - // n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even bits += (bits & 0x0001) | moreBits; } } return bits; } - inline uint16_t adjustAndRound(const int8_t m, uint16_t exp, uint32_t fraction) const { - uint16_t scale, regime, bits; - if (m < 0) { - scale = (-m & 0xFFFF); + inline uint16_t adjustAndRound(const int8_t k, uint16_t exp, uint32_t fraction) const { + int16_t scale; + uint16_t regime, bits; + if (k < 0) { + scale = (-k & 0xFFFF); regime = 0x4000 >> scale; } else { - scale = m + 1; + scale = int16_t(k) + 1; regime = 0x7FFF - (0x7FFF >> scale); } if (scale > 14) { - bits = m < 0 ? 0x0001 : 0x7FFF; // minpos and maxpos + bits = k < 0 ? 0x0001 : 0x7FFF; // minpos and maxpos. exp and frac do not matter } else { - fraction = (fraction & 0x0FFFFFFF) >> (scale - 1); // remove both carry bits + // remove carry and rcarry bits and shift to correct position + fraction = (fraction & 0x0FFFFFFF) >> scale; uint16_t final_fbits = uint16_t(fraction >> 16); bool bitNPlusOne = false; - if (scale != 14) { + uint16_t moreBits = 0; + if (scale <= 12) { bitNPlusOne = bool(0x8000 & fraction); - } - else if (final_fbits > 0) { - final_fbits = 0; - } - if (scale == 14 && exp != 0) { - bitNPlusOne = true; - exp = 0; + exp <<= (12 - scale); } else { - exp <<= (13 - scale); + if (scale == 14) { + bitNPlusOne = bool(exp & 0x2); + moreBits = exp & 0x1; + exp = 0; + } + else if (scale == 13) { + bitNPlusOne = bool(exp & 0x1); + exp >>= 1; + } + if (final_fbits > 0) { + final_fbits = 0; + moreBits = 1; + } } bits = uint16_t(regime) + uint16_t(exp) + uint16_t(final_fbits); // n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even if (bitNPlusOne) { - uint16_t moreBits = (0x7FFF & fraction) ? 0x0001 : 0x0000; + if (0x7FFF & fraction) moreBits = 1; bits += (bits & 0x0001) | moreBits; } } diff --git a/static/posit/specialized/posit_16_2.cpp b/static/posit/specialized/posit_16_2.cpp index 5e9ea8220..77b76c1b2 100644 --- a/static/posit/specialized/posit_16_2.cpp +++ b/static/posit/specialized/posit_16_2.cpp @@ -79,49 +79,55 @@ try { float fa, fb; posit<16, 1> a, b, c; - { - posit<16, 2> a, b, c, cref; - float fa, fb, fc; - /* - FAIL - 1.3877787807814456755e-17 + -3.9990234375 != -1.99951171875 golden reference is -3.9990234375 - 0b0.000000000000001.. + 0b1.10.01.11111111111 != 0b1.10.00.11111111111 golden reference is 0b1.10.01.11111111111 - - 1.3877787807814456755e-17 + -7.3341652750968933105e-09 != -5.4715201258659362793e-09 golden reference is -7.3341652750968933105e-09 - 0b0.000000000000001.. + 0b1.00000001.00.11111 != 0b1.00000001.00.01111 golden reference is 0b1.00000001.00.11111 - - 1.3877787807814456755e-17 - 2.2204460492503130808e-16 != -1.3877787807814456755e-17 golden reference is -2.2204460492503130808e-16 - 0b0.000000000000001.. - 0b0.00000000000001.0. != 0b1.000000000000001.. golden reference is 0b1.00000000000001.0. - */ - a.setbits(0x0001); - //b.setbits(0xCFFF); // 0b1.10.0'1.111'1111'1111 - b.setbits(0x809F); // 0b1000'0000'1001'1111 - b.setbits(0x0002); // 0b0000'0000'0000'0010 - //a = 1; - //b = -1.5f; - c = a - b; - fa = float(a); - fb = float(b); - fc = fa + fb; - cref = fc; - std::cout - << std::setw(NUMBER_COLUMN_WIDTH) << a << " + " - << std::setw(NUMBER_COLUMN_WIDTH) << b << " = " - << std::setw(NUMBER_COLUMN_WIDTH) << c << '\n'; - std::cout - << std::setw(NUMBER_COLUMN_WIDTH) << fa << " + " - << std::setw(NUMBER_COLUMN_WIDTH) << fb << " = " - << std::setw(NUMBER_COLUMN_WIDTH) << fc << '\n'; - std::cout - << std::setw(NUMBER_COLUMN_WIDTH) << to_binary(fa) << " + " - << std::setw(NUMBER_COLUMN_WIDTH) << to_binary(fb) << " = " - << std::setw(NUMBER_COLUMN_WIDTH) << to_binary(fc) << '\n'; - ReportBinaryOperation(a, "+", b, c); - ReportBinaryArithmeticError("bla", "+", a, b, c, cref); - } + // { + // posit<16, 2> a, b, c, cref; + // float fa, fb, fc; + // /* + // FAIL + // 1.3877787807814456755e-17 + -3.9990234375 != -1.99951171875 golden reference is -3.9990234375 + // 0b0.000000000000001.. + 0b1.10.01.11111111111 != 0b1.10.00.11111111111 golden reference is 0b1.10.01.11111111111 + + // 1.3877787807814456755e-17 + -7.3341652750968933105e-09 != -5.4715201258659362793e-09 golden reference is -7.3341652750968933105e-09 + // 0b0.000000000000001.. + 0b1.00000001.00.11111 != 0b1.00000001.00.01111 golden reference is 0b1.00000001.00.11111 + + // 1.3877787807814456755e-17 - 2.2204460492503130808e-16 != -1.3877787807814456755e-17 golden reference is -2.2204460492503130808e-16 + // 0b0.000000000000001.. - 0b0.00000000000001.0. != 0b1.000000000000001.. golden reference is 0b1.00000000000001.0. + // */ + // a.setbits(0x0001); + // //b.setbits(0xCFFF); // 0b1.10.0'1.111'1111'1111 + // b.setbits(0x809F); // 0b1000'0000'1001'1111 + // // b.setbits(0x0002); // 0b0000'0000'0000'0010 + // //a = 1; + // //b = -1.5f; + // c = a - b; + // fa = float(a); + // fb = float(b); + // fc = fa - fb; + // cref = fc; + // std::cout + // << std::setw(NUMBER_COLUMN_WIDTH) << a << " - " + // << std::setw(NUMBER_COLUMN_WIDTH) << b << " = " + // << std::setw(NUMBER_COLUMN_WIDTH) << c << '\n'; + // std::cout + // << std::setw(NUMBER_COLUMN_WIDTH) << fa << " - " + // << std::setw(NUMBER_COLUMN_WIDTH) << fb << " = " + // << std::setw(NUMBER_COLUMN_WIDTH) << fc << '\n'; + // std::cout + // << std::setw(NUMBER_COLUMN_WIDTH) << to_binary(fa) << " - " + // << std::setw(NUMBER_COLUMN_WIDTH) << to_binary(fb) << " = " + // << std::setw(NUMBER_COLUMN_WIDTH) << to_binary(fc) << '\n'; + // ReportBinaryOperation(a, "-", b, c); + // ReportBinaryArithmeticError("bla", "-", a, b, c, cref); + // } - //nrOfFailedTestCases += ReportTestResult(VerifySubtraction(reportTestCases), tag, "sub (native) "); - //nrOfFailedTestCases += ReportTestResult(VerifyAddition(reportTestCases), tag, "add (native) "); + std::cout << "Manual exhaustive div" << std::endl; + nrOfFailedTestCases += ReportTestResult(VerifyDivision(reportTestCases), tag, "div (native) "); + std::cout << "Manual exhaustive mul" << std::endl; + nrOfFailedTestCases += ReportTestResult(VerifyMultiplication(reportTestCases), tag, "mul (native) "); + std::cout << "Manual exhaustive sub" << std::endl; + nrOfFailedTestCases += ReportTestResult(VerifySubtraction(reportTestCases), tag, "sub (native) "); + std::cout << "Manual exhaustive add" << std::endl; + nrOfFailedTestCases += ReportTestResult(VerifyAddition(reportTestCases), tag, "add (native) "); return 0; /* {