From 26058cffd84cf9249dad9982e311ba5fa54b544c Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Sat, 7 Sep 2024 11:34:28 +0200 Subject: [PATCH] vulkan: argsort: barriers must be under uniform control flow a return before a barrier leads to UB. While the old code actually works on some devices, it fails on some others (i.e. "smaller" GPUs). BTW, I think it would be better if it was possible to set specialization constants when the graph is built, in that way the local workgroup could be sized appropriately. Signed-off-by: Salvatore Mesoraca --- src/vulkan-shaders/argsort.comp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/vulkan-shaders/argsort.comp b/src/vulkan-shaders/argsort.comp index e55414b03..d4fa45b1e 100644 --- a/src/vulkan-shaders/argsort.comp +++ b/src/vulkan-shaders/argsort.comp @@ -29,20 +29,18 @@ void main() { const int col = int(gl_LocalInvocationID.x); const uint row = gl_WorkGroupID.y; - if (col >= p.ncols_pad) { - return; - } - const uint row_offset = row * p.ncols; // initialize indices - dst_row[col] = col; + if (col < p.ncols_pad) { + dst_row[col] = col; + } barrier(); for (uint k = 2; k <= p.ncols_pad; k *= 2) { for (uint j = k / 2; j > 0; j /= 2) { const uint ixj = col ^ j; - if (ixj > col) { + if (col < p.ncols_pad && ixj > col) { if ((col & k) == 0) { if (dst_row[col] >= p.ncols || (dst_row[ixj] < p.ncols && (p.order == ASC ?