diff --git a/src/deemon/objects/int_logic.c b/src/deemon/objects/int_logic.c index 342af28ae..698e927b7 100644 --- a/src/deemon/objects/int_logic.c +++ b/src/deemon/objects/int_logic.c @@ -1948,6 +1948,14 @@ int_pow(DeeIntObject *a, DeeObject *b_ob) { * 3. This notice may not be removed or altered from any source distribution. * */ +#undef HAVE_int_pext_impl +#undef HAVE_int_pdep_impl +#if !defined(__OPTIMIZE_SIZE__) && 1 +#define HAVE_int_pext_impl +#define HAVE_int_pdep_impl +#endif + +#ifdef HAVE_int_pext_impl PRIVATE WUNUSED NONNULL((1, 2)) DREF DeeIntObject *DCALL int_pext_impl(digit const *self, digit const *mask, size_t common_size) { DREF DeeIntObject *result; @@ -2001,12 +2009,13 @@ int_pext_impl(digit const *self, digit const *mask, size_t common_size) { err: return NULL; } +#endif /* HAVE_int_pext_impl */ PRIVATE ATTR_NOINLINE WUNUSED NONNULL((1, 2)) DREF DeeIntObject *DCALL int_pext_ex_impl(DeeIntObject *self, DeeIntObject *mask) { - size_t self_size = (size_t)ABS(self->ob_size); + size_t self_size = (size_t)ABS(self->ob_size); size_t mask_size = (size_t)ABS(mask->ob_size); - bool self_neg = self->ob_size < 0; + bool self_neg = self->ob_size < 0; bool mask_neg = mask->ob_size < 0; DREF DeeIntObject *result; size_t i, result_nbits, result_digits; @@ -2207,20 +2216,82 @@ int_pext(DeeIntObject *self, DeeIntObject *mask) { DREF DeeIntObject *result; /* A negative self/mask makes this algorithm extremely complicated. * A (correct, but unoptimized) reference implementation can be found at: - * >> /util/test/deemon-int-pdep.dee:correctPExt */ + * >> /util/test/deemon-int-pext.dee:correctPExt */ +#ifdef HAVE_int_pext_impl if likely(self->ob_size >= 0 && mask->ob_size >= 0) { size_t common_size = (size_t)mask->ob_size; if (common_size > (size_t)self->ob_size) common_size = (size_t)self->ob_size; result = int_pext_impl(self->ob_digit, mask->ob_digit, common_size); - } else { + } else +#endif /* HAVE_int_pext_impl */ + { result = int_pext_ex_impl(self, mask); } return result; } -INTERN WUNUSED NONNULL((1, 2)) DREF DeeIntObject *DCALL -int_pdep(DeeIntObject *self, DeeIntObject *mask) { + + +#ifdef HAVE_int_pdep_impl +PRIVATE WUNUSED NONNULL((1, 3)) DREF DeeIntObject *DCALL +int_pdep_impl(digit const *self, size_t common_size, + digit const *mask, size_t mask_size) { + DREF DeeIntObject *result; + size_t i, self_index; + shift_t self_shift; + result = DeeInt_Alloc(mask_size); + if unlikely(!result) + goto err; + self_index = 0; + self_shift = 0; + for (i = 0; i < mask_size && self_index < common_size; ++i) { + shift_t m_nbits; + digit v, m = mask[i]; + if likely(m) { + m_nbits = POPCOUNT(m); + v = self[self_index] >> self_shift; /* v_nbits = DIGIT_BITS - self_shift */ + self_shift += m_nbits; /* v_nbits = DIGIT_BITS - self_shift + m_nbits */ + if (self_shift >= DIGIT_BITS) { + self_shift -= DIGIT_BITS; /* v_nbits = DIGIT_BITS - self_shift + m_nbits - DIGIT_BITS */ + ++self_index; + if (self_index < common_size) { + shift_t v_nbits; /* # of bits already stored in "v" */ + /* >> v_nbits = DIGIT_BITS - self_shift + m_nbits - DIGIT_BITS + * >> v_nbits = -self_shift + m_nbits + * >> v_nbits = m_nbits - self_shift */ + v_nbits = m_nbits - self_shift; + v |= self[self_index] << v_nbits; + } + } + v = PDEP(v, m); + } else { + v = 0; + } + result->ob_digit[i] = v; + } +#if 1 + result->ob_size = (Dee_ssize_t)i; +#else + bzeroc(result->ob_digit + i, mask_size - i, sizeof(Dee_digit_t)); +#endif + result = int_normalize(result); + return result; +err: + return NULL; +} +#endif /* HAVE_int_pdep_impl */ + +PRIVATE ATTR_NOINLINE WUNUSED NONNULL((1, 2)) DREF DeeIntObject *DCALL +int_pdep_ex_impl(DeeIntObject *self, DeeIntObject *mask) { +#if 0 + size_t self_size = (size_t)ABS(self->ob_size); + size_t mask_size = (size_t)ABS(mask->ob_size); + bool self_neg = self->ob_size < 0; + bool mask_neg = mask->ob_size < 0; + DREF DeeIntObject *result; +#endif + /* TODO */ (void)self; (void)mask; @@ -2228,6 +2299,27 @@ int_pdep(DeeIntObject *self, DeeIntObject *mask) { return NULL; } +INTERN WUNUSED NONNULL((1, 2)) DREF DeeIntObject *DCALL +int_pdep(DeeIntObject *self, DeeIntObject *mask) { + DREF DeeIntObject *result; + /* A negative self/mask makes this algorithm extremely complicated. + * A (correct, but unoptimized) reference implementation can be found at: + * >> /util/test/deemon-int-pdep.dee:correctPDep */ +#ifdef HAVE_int_pdep_impl + if likely(self->ob_size >= 0 && mask->ob_size >= 0) { + size_t common_size = (size_t)mask->ob_size; + if (common_size > (size_t)self->ob_size) + common_size = (size_t)self->ob_size; + result = int_pdep_impl(self->ob_digit, common_size, + mask->ob_digit, (size_t)mask->ob_size); + } else +#endif /* HAVE_int_pdep_impl */ + { + result = int_pdep_ex_impl(self, mask); + } + return result; +} + DECL_END diff --git a/util/test/deemon-int-pdep.dee b/util/test/deemon-int-pdep.dee index 3df53aa46..b01b75941 100644 --- a/util/test/deemon-int-pdep.dee +++ b/util/test/deemon-int-pdep.dee @@ -23,7 +23,24 @@ import * from deemon; @@Reference implementation for @int.pdep function correctPDep(self: int, mask: int): int { - // TODO + local result = 0; + local resultShift = 0; + while (mask) { + local nSkip = mask.ctz; + resultShift += nSkip; + mask >>= nSkip; + if (mask == -1) { + result |= self << resultShift; + break; + } + local nCopy = mask.ct1; + local addend = self & nCopy.bitmask; + result |= addend << resultShift; + mask >>= nCopy; + self >>= nCopy; + resultShift += nCopy; + } + return result; } function assertPDep(self: int, mask: int) { @@ -35,7 +52,6 @@ function assertPDep(self: int, mask: int) { f"\nActual: {actual.hex()}" f"\nself: {self.hex()}" f"\nmask: {mask.hex()}"; - assert expected == correctPDep(self, mask); } function getWideRangeOfIntegers(): {int...} { @@ -58,20 +74,28 @@ function getWideRangeOfIntegers(): {int...} { } -//TODO:assert -1 == correctPDep(-1, -1); -//TODO:assert 0x1234 == correctPDep(0x1234, -1); -//TODO:assert "0x13" == correctPDep(0x1234, 0xf0f0).hex(); -//TODO:assert "0x13579bdf" == correctPDep(0x123456789ABCDEF0, 0xf0f0f0f0f0f0f0f0).hex(); -//TODO:assert "0x1357" == correctPDep(0x12345678, 0xf0f0f0f0).hex(); -//TODO:assert "0x33" == correctPDep(0x11223344, 0xff00).hex(); -//TODO: -//TODO:assertPDep(0x1234, 0xf0f0); -//TODO:assertPDep(0x123456789ABCDEF0, 0xf0f0f0f0f0f0f0f0); -//TODO:assertPDep(0x12345678, 0xf0f0f0f0); -//TODO:assertPDep(0x11223344, 0xff00); -//TODO: -//TODO:for (local self: getWideRangeOfIntegers()) { -//TODO: for (local mask: getWideRangeOfIntegers()) { -//TODO: assertPDep(self, mask); -//TODO: } -//TODO:} +assert -1 == correctPDep(-1, -1); +assert 0x1234 == correctPDep(0x1234, -1); +assert "0x3040" == correctPDep(0x1234, 0xf0f0).hex(); +assert "0x3040" == correctPDep(0x34, 0xf0f0).hex(); +assert "0x90a0b0c0d0e0f000" == correctPDep(0x9ABCDEF0, 0xf0f0f0f0f0f0f0f0).hex(); +assert "0x90a0b0c0d0e0f000" == correctPDep(0x9ABCDEF0, 0xfffffffffffffffff0f0f0f0f0f0f0f0).hex(); +assert "0x90a0b0c0d0e0f000" == correctPDep(0x123456789ABCDEF0, 0xf0f0f0f0f0f0f0f0).hex(); +assert "0x102030405060708090a0b0c0d0e0f000" == correctPDep( + 0x123456789ABCDEF0, 0xf0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0).hex(); +assert "0x50607080" == correctPDep(0x5678, 0xf0f0f0f0).hex(); +assert "0x50607080" == correctPDep(0x12345678, 0xf0f0f0f0).hex(); +assert "0x4400" == correctPDep(0x44, 0xff00).hex(); +assert "0x4400" == correctPDep(0x11223344, 0xff00).hex(); + +assert 0x10305070 == correctPDep(0x00001357, 0xf0f0f0f0); +assert 0x00003300 == correctPDep(0x00000033, 0x0000ff00); +assertPDep(0x00001357, 0xf0f0f0f0); +assertPDep(0x00000033, 0x0000ff00); + +for (local self: getWideRangeOfIntegers()) { + for (local mask: getWideRangeOfIntegers()) { + if (self >= 0 && mask >= 0) // TODO: REMOVE ME + assertPDep(self, mask); + } +}