Simplified 2-value search, removed 8-bit expansion

richgel999 · Sep 5, 2022 · 72b8ea0 · 72b8ea0
1 parent c07f344
commit 72b8ea0
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 72 deletions.
diff --git a/rgbcx.cpp b/rgbcx.cpp
@@ -2792,29 +2792,19 @@ namespace rgbcx
 			return 0;
 		}
 
-		// if we only have two values (min and max) the search radius can be set to zero (setting the endpoints directly)
-		bool has_two_values = true;
-		for (uint32_t i = 0; i < 16; i++) {
-			uint32_t val = pPixels[i * stride];
-			if (val != min_val && val != max_val) {
-				has_two_values = false;
-				break;
-			}
-		}
-		if (has_two_values) {
-			search_rad = 0;
-		}
-
 		uint32_t best_err = UINT32_MAX;
 		for (uint32_t mode = 0; mode < 2; mode++)
 		{
 			if ((mode_flag & (1 << mode)) == 0)
 				continue;
 
-			for (int lo_delta = -(int)search_rad; lo_delta <= (int)search_rad; lo_delta++)
+			// the deltas go 0, -1, 1, -2, 2, -3, 3, etc., meaning 2-colour blocks are found first
+			for (int lo_count = 0; lo_count <= (int)search_rad << 1; lo_count++)
 			{
-				for (int hi_delta = -(int)search_rad; hi_delta <= (int)search_rad; hi_delta++)
+				int lo_delta = ((lo_count & 1) ? -lo_count : lo_count) >> 1;
+				for (int hi_count = 0; hi_count <= (int)search_rad << 1; hi_count++)
 				{
+					int hi_delta = ((hi_count & 1) ? -hi_count : hi_count) >> 1;
 					bc4_block trial_block;
 					trial_block.m_endpoints[0] = (uint8_t)clamp<int>(max_val + hi_delta, 0, 255);
 					trial_block.m_endpoints[1] = (uint8_t)clamp<int>(min_val + lo_delta, 0, 255);
@@ -2830,10 +2820,8 @@ namespace rgbcx
 					else if (!trial_block.is_alpha6_block())
 						std::swap(trial_block.m_endpoints[0], trial_block.m_endpoints[1]);
 
-					// note: block vals are expanded to 8:6 fixed point, as is the error,
-					// with 8:6 able to accumulate 16x the worse-case error (255.98 ^ 2)
-					uint16_t block_vals14[8];
-					trial_block.get_block_values(block_vals14, trial_block.m_endpoints[0], trial_block.m_endpoints[1]);
+					uint8_t block_vals[8];
+					trial_block.get_block_values(block_vals, trial_block.m_endpoints[0], trial_block.m_endpoints[1]);
 
 					uint32_t trial_err = 0;
 					uint8_t trial_sels[16];
@@ -2842,9 +2830,8 @@ namespace rgbcx
 					{
 						memcpy(trial_sels, pForce_selectors, 16);
 
-						for (uint32_t i = 0; i < 16; i++) {
-							trial_err += squarei(block_vals14[pForce_selectors[i]] - bc4_block::expand8to14(pPixels[i * stride]));
-						}
+						for (uint32_t i = 0; i < 16; i++) 
+							trial_err += squarei(block_vals[pForce_selectors[i]] - pPixels[i * stride]);
 					}
 					else
 					{
@@ -2854,7 +2841,7 @@ namespace rgbcx
 							uint32_t best_index = 0;
 							for (uint32_t j = 0; j < 8; j++)
 							{
-								uint32_t err = squarei(block_vals14[j] - bc4_block::expand8to14(pPixels[i * stride]));
+								uint32_t err = squarei(block_vals[j] - pPixels[i * stride]);
 								if (err < best_index_err)
 								{
 									best_index_err = err;
@@ -2901,8 +2888,7 @@ namespace rgbcx
 		} // mode
 	error_reached_zero:
 
-		// scale the error back to 8-bit from 8:6 fixed point (to match what was previously returned)
-		return (best_err + 63) >> 12;
+		return best_err;
 	}
 
 	void encode_bc3(void* pDst, const uint8_t* pPixels, uint32_t flags, uint32_t total_orderings_to_try)

diff --git a/rgbcx.h b/rgbcx.h
@@ -388,12 +388,6 @@ namespace rgbcx
 			return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1);
 		}
 
-		// Expands an 8-bit value to 14-bit
-		static inline uint32_t expand8to14(uint32_t val) {
-			return (val << 6) | (val >> 2);
-		}
-
-		// Interpolated values as 8-bit (as per BC3 alpha)
 		static inline uint32_t get_block_values6(uint8_t* pDst, uint32_t l, uint32_t h)
 		{
 			pDst[0] = static_cast<uint8_t>(l);
@@ -407,23 +401,6 @@ namespace rgbcx
 			return 6;
 		}
 
-		// Interpolated values expanded to 14-bit (for BC4/5)
-		static inline uint32_t get_block_values6(uint16_t* pDst, uint32_t l, uint32_t h)
-		{
-			uint32_t l14 = expand8to14(l);
-			uint32_t h14 = expand8to14(h);
-			pDst[0] = static_cast<uint16_t>(l14);
-			pDst[1] = static_cast<uint16_t>(h14);
-			pDst[2] = static_cast<uint16_t>((l14 * 4 + h14    ) / 5);
-			pDst[3] = static_cast<uint16_t>((l14 * 3 + h14 * 2) / 5);
-			pDst[4] = static_cast<uint16_t>((l14 * 2 + h14 * 3) / 5);
-			pDst[5] = static_cast<uint16_t>((l14     + h14 * 4) / 5);
-			pDst[6] = 0;
-			pDst[7] = static_cast<uint16_t>(expand8to14(255));
-			return 6;
-		}
-
-		// Interpolated values as 8-bit (as per BC3 alpha)
 		static inline uint32_t get_block_values8(uint8_t* pDst, uint32_t l, uint32_t h)
 		{
 			pDst[0] = static_cast<uint8_t>(l);
@@ -437,37 +414,13 @@ namespace rgbcx
 			return 8;
 		}
 
-		// Interpolated values expanded to 14-bit (for BC4/5)
-		static inline uint32_t get_block_values8(uint16_t* pDst, uint32_t l, uint32_t h)
-		{
-			uint32_t l14 = expand8to14(l);
-			uint32_t h14 = expand8to14(h);
-			pDst[0] = static_cast<uint16_t>(l14);
-			pDst[1] = static_cast<uint16_t>(h14);
-			pDst[2] = static_cast<uint16_t>((l14 * 6 + h14    ) / 7);
-			pDst[3] = static_cast<uint16_t>((l14 * 5 + h14 * 2) / 7);
-			pDst[4] = static_cast<uint16_t>((l14 * 4 + h14 * 3) / 7);
-			pDst[5] = static_cast<uint16_t>((l14 * 3 + h14 * 4) / 7);
-			pDst[6] = static_cast<uint16_t>((l14 * 2 + h14 * 5) / 7);
-			pDst[7] = static_cast<uint16_t>((l14     + h14 * 6) / 7);
-			return 8;
-		}
-
 		static inline uint32_t get_block_values(uint8_t* pDst, uint32_t l, uint32_t h)
 		{
 			if (l > h)
 				return get_block_values8(pDst, l, h);
 			else
 				return get_block_values6(pDst, l, h);
 		}
-
-		static inline uint32_t get_block_values(uint16_t* pDst, uint32_t l, uint32_t h)
-		{
-			if (l > h)
-				return get_block_values8(pDst, l, h);
-			else
-				return get_block_values6(pDst, l, h);
-		}
 	};
 
 }