Skip to content

Commit

Permalink
Simplified 2-value search, removed 8-bit expansion
Browse files Browse the repository at this point in the history
  • Loading branch information
cwoffenden committed Sep 5, 2022
1 parent c07f344 commit 72b8ea0
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 72 deletions.
36 changes: 11 additions & 25 deletions rgbcx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2792,29 +2792,19 @@ namespace rgbcx
return 0;
}

// if we only have two values (min and max) the search radius can be set to zero (setting the endpoints directly)
bool has_two_values = true;
for (uint32_t i = 0; i < 16; i++) {
uint32_t val = pPixels[i * stride];
if (val != min_val && val != max_val) {
has_two_values = false;
break;
}
}
if (has_two_values) {
search_rad = 0;
}

uint32_t best_err = UINT32_MAX;
for (uint32_t mode = 0; mode < 2; mode++)
{
if ((mode_flag & (1 << mode)) == 0)
continue;

for (int lo_delta = -(int)search_rad; lo_delta <= (int)search_rad; lo_delta++)
// the deltas go 0, -1, 1, -2, 2, -3, 3, etc., meaning 2-colour blocks are found first
for (int lo_count = 0; lo_count <= (int)search_rad << 1; lo_count++)
{
for (int hi_delta = -(int)search_rad; hi_delta <= (int)search_rad; hi_delta++)
int lo_delta = ((lo_count & 1) ? -lo_count : lo_count) >> 1;
for (int hi_count = 0; hi_count <= (int)search_rad << 1; hi_count++)
{
int hi_delta = ((hi_count & 1) ? -hi_count : hi_count) >> 1;
bc4_block trial_block;
trial_block.m_endpoints[0] = (uint8_t)clamp<int>(max_val + hi_delta, 0, 255);
trial_block.m_endpoints[1] = (uint8_t)clamp<int>(min_val + lo_delta, 0, 255);
Expand All @@ -2830,10 +2820,8 @@ namespace rgbcx
else if (!trial_block.is_alpha6_block())
std::swap(trial_block.m_endpoints[0], trial_block.m_endpoints[1]);

// note: block vals are expanded to 8:6 fixed point, as is the error,
// with 8:6 able to accumulate 16x the worse-case error (255.98 ^ 2)
uint16_t block_vals14[8];
trial_block.get_block_values(block_vals14, trial_block.m_endpoints[0], trial_block.m_endpoints[1]);
uint8_t block_vals[8];
trial_block.get_block_values(block_vals, trial_block.m_endpoints[0], trial_block.m_endpoints[1]);

uint32_t trial_err = 0;
uint8_t trial_sels[16];
Expand All @@ -2842,9 +2830,8 @@ namespace rgbcx
{
memcpy(trial_sels, pForce_selectors, 16);

for (uint32_t i = 0; i < 16; i++) {
trial_err += squarei(block_vals14[pForce_selectors[i]] - bc4_block::expand8to14(pPixels[i * stride]));
}
for (uint32_t i = 0; i < 16; i++)
trial_err += squarei(block_vals[pForce_selectors[i]] - pPixels[i * stride]);
}
else
{
Expand All @@ -2854,7 +2841,7 @@ namespace rgbcx
uint32_t best_index = 0;
for (uint32_t j = 0; j < 8; j++)
{
uint32_t err = squarei(block_vals14[j] - bc4_block::expand8to14(pPixels[i * stride]));
uint32_t err = squarei(block_vals[j] - pPixels[i * stride]);
if (err < best_index_err)
{
best_index_err = err;
Expand Down Expand Up @@ -2901,8 +2888,7 @@ namespace rgbcx
} // mode
error_reached_zero:

// scale the error back to 8-bit from 8:6 fixed point (to match what was previously returned)
return (best_err + 63) >> 12;
return best_err;
}

void encode_bc3(void* pDst, const uint8_t* pPixels, uint32_t flags, uint32_t total_orderings_to_try)
Expand Down
47 changes: 0 additions & 47 deletions rgbcx.h
Original file line number Diff line number Diff line change
Expand Up @@ -388,12 +388,6 @@ namespace rgbcx
return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1);
}

// Expands an 8-bit value to 14-bit
static inline uint32_t expand8to14(uint32_t val) {
return (val << 6) | (val >> 2);
}

// Interpolated values as 8-bit (as per BC3 alpha)
static inline uint32_t get_block_values6(uint8_t* pDst, uint32_t l, uint32_t h)
{
pDst[0] = static_cast<uint8_t>(l);
Expand All @@ -407,23 +401,6 @@ namespace rgbcx
return 6;
}

// Interpolated values expanded to 14-bit (for BC4/5)
static inline uint32_t get_block_values6(uint16_t* pDst, uint32_t l, uint32_t h)
{
uint32_t l14 = expand8to14(l);
uint32_t h14 = expand8to14(h);
pDst[0] = static_cast<uint16_t>(l14);
pDst[1] = static_cast<uint16_t>(h14);
pDst[2] = static_cast<uint16_t>((l14 * 4 + h14 ) / 5);
pDst[3] = static_cast<uint16_t>((l14 * 3 + h14 * 2) / 5);
pDst[4] = static_cast<uint16_t>((l14 * 2 + h14 * 3) / 5);
pDst[5] = static_cast<uint16_t>((l14 + h14 * 4) / 5);
pDst[6] = 0;
pDst[7] = static_cast<uint16_t>(expand8to14(255));
return 6;
}

// Interpolated values as 8-bit (as per BC3 alpha)
static inline uint32_t get_block_values8(uint8_t* pDst, uint32_t l, uint32_t h)
{
pDst[0] = static_cast<uint8_t>(l);
Expand All @@ -437,37 +414,13 @@ namespace rgbcx
return 8;
}

// Interpolated values expanded to 14-bit (for BC4/5)
static inline uint32_t get_block_values8(uint16_t* pDst, uint32_t l, uint32_t h)
{
uint32_t l14 = expand8to14(l);
uint32_t h14 = expand8to14(h);
pDst[0] = static_cast<uint16_t>(l14);
pDst[1] = static_cast<uint16_t>(h14);
pDst[2] = static_cast<uint16_t>((l14 * 6 + h14 ) / 7);
pDst[3] = static_cast<uint16_t>((l14 * 5 + h14 * 2) / 7);
pDst[4] = static_cast<uint16_t>((l14 * 4 + h14 * 3) / 7);
pDst[5] = static_cast<uint16_t>((l14 * 3 + h14 * 4) / 7);
pDst[6] = static_cast<uint16_t>((l14 * 2 + h14 * 5) / 7);
pDst[7] = static_cast<uint16_t>((l14 + h14 * 6) / 7);
return 8;
}

static inline uint32_t get_block_values(uint8_t* pDst, uint32_t l, uint32_t h)
{
if (l > h)
return get_block_values8(pDst, l, h);
else
return get_block_values6(pDst, l, h);
}

static inline uint32_t get_block_values(uint16_t* pDst, uint32_t l, uint32_t h)
{
if (l > h)
return get_block_values8(pDst, l, h);
else
return get_block_values6(pDst, l, h);
}
};

}
Expand Down

0 comments on commit 72b8ea0

Please sign in to comment.