Skip to content

Commit

Permalink
vertexcodec: Optimize encoding selection of zero groups
Browse files Browse the repository at this point in the history
When checking if a byte group can be encoded as zero, we need to check
16 bytes; to reduce branch mispredictions we can load the byte group into
two 64-bit registers and check the bitwise or. This results in slightly
suboptimal codegen for gcc, but is optimal for clang/MSVC.

This function can also be used to determine if a given vertex block can
use zero encoding as a control mode. For cases when the zero encoding is
selected, this scans the bytes faster and does not rely on
auto-vectorization which sometimes synthesizes rather poor code in this
case.

This change makes encoding ~5-10% faster depending on the data.
  • Loading branch information
zeux committed Jan 13, 2025
1 parent 8e716e4 commit ebf6416
Showing 1 changed file with 20 additions and 13 deletions.
33 changes: 20 additions & 13 deletions src/vertexcodec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,9 @@ const int kEncodeDefaultLevel = 2;

static size_t getVertexBlockSize(size_t vertex_size)
{
// make sure the entire block fits into the scratch buffer
size_t result = kVertexBlockSizeBytes / vertex_size;

// align to byte group size; we encode each byte as a byte group
// if vertex block is misaligned, it results in wasted bytes, so just truncate the block size
result &= ~(kByteGroupSize - 1);
// make sure the entire block fits into the scratch buffer and is aligned to byte group size
// note: the block size is implicitly part of the format, so we can't change it without breaking compatibility
size_t result = (kVertexBlockSizeBytes / vertex_size) & ~(kByteGroupSize - 1);

return (result < kVertexBlockMaxSize) ? result : kVertexBlockMaxSize;
}
Expand Down Expand Up @@ -179,21 +176,22 @@ static Stats* bytestats = NULL;
static Stats vertexstats[256];
#endif

static bool canEncodeZero(const unsigned char* buffer, size_t buffer_size)
static bool encodeBytesGroupZero(const unsigned char* buffer)
{
for (size_t i = 0; i < buffer_size; ++i)
if (buffer[i])
return false;
assert(kByteGroupSize == sizeof(unsigned long long) * 2);

return true;
unsigned long long v[2];
memcpy(v, buffer, sizeof(v));

return (v[0] | v[1]) == 0;
}

static size_t encodeBytesGroupMeasure(const unsigned char* buffer, int bits)
{
assert(bits >= 0 && bits <= 8);

if (bits == 0)
return canEncodeZero(buffer, kByteGroupSize) ? 0 : size_t(-1);
return encodeBytesGroupZero(buffer) ? 0 : size_t(-1);

if (bits == 8)
return kByteGroupSize;
Expand Down Expand Up @@ -455,9 +453,18 @@ static int estimateChannel(const unsigned char* vertex_data, size_t vertex_count
return best_channel == 2 ? best_channel | (xor_rot << 4) : best_channel;
}

static bool estimateControlZero(const unsigned char* buffer, size_t vertex_count_aligned)
{
for (size_t i = 0; i < vertex_count_aligned; i += kByteGroupSize)
if (!encodeBytesGroupZero(buffer + i))
return false;

return true;
}

static int estimateControl(const unsigned char* buffer, size_t vertex_count, size_t vertex_count_aligned, int level)
{
if (canEncodeZero(buffer, vertex_count))
if (estimateControlZero(buffer, vertex_count_aligned))
return 2; // zero encoding

if (level == 0)
Expand Down

0 comments on commit ebf6416

Please sign in to comment.