From 5c8639ae868132198aa3ad49eed89f32af556174 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 16 Sep 2024 17:43:37 -0700 Subject: [PATCH 1/3] simplify: Improve pass collapse tracing code We now record the reason for each pass termination and adjust edge collapse goal logic to match the code we are actually using for rejection; previously, goal in the log was used as an "ideal" error assuming no collapses are locked in progress but that is not realistic. We now display the actual goal used or, if the collapse list is exhausted, the error limit, which should be more clear. --- src/simplifier.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/simplifier.cpp b/src/simplifier.cpp index 10839173fe..6edcf3724a 100644 --- a/src/simplifier.cpp +++ b/src/simplifier.cpp @@ -1102,7 +1102,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* size_t edge_collapse_goal = triangle_collapse_goal / 2; #if TRACE - size_t stats[4] = {}; + size_t stats[7] = {}; #endif for (size_t i = 0; i < collapse_count; ++i) @@ -1112,10 +1112,16 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* TRACESTATS(0); if (c.error > error_limit) + { + TRACESTATS(4); break; + } if (triangle_collapses >= triangle_collapse_goal) + { + TRACESTATS(5); break; + } // we limit the error in each pass based on the error of optimal last collapse; since many collapses will be locked // as they will share vertices with other successfull collapses, we need to increase the acceptable error by some factor @@ -1124,7 +1130,10 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* // on average, each collapse is expected to lock 6 other collapses; to avoid degenerate passes on meshes with odd // topology, we only abort if we got over 1/6 collapses accordingly. if (c.error > error_goal && triangle_collapses > triangle_collapse_goal / 6) + { + TRACESTATS(6); break; + } unsigned int i0 = c.v0; unsigned int i1 = c.v1; @@ -1216,11 +1225,13 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* } #if TRACE - float error_goal_perfect = edge_collapse_goal < collapse_count ? collapses[collapse_order[edge_collapse_goal]].error : 0.f; + float error_goal_last = edge_collapse_goal < collapse_count ? 1.5f * collapses[collapse_order[edge_collapse_goal]].error : FLT_MAX; + float error_goal_limit = error_goal_last < error_limit ? error_goal_last : error_limit; - printf("removed %d triangles, error %e (goal %e); evaluated %d/%d collapses (done %d, skipped %d, invalid %d)\n", - int(triangle_collapses), sqrtf(result_error), sqrtf(error_goal_perfect), - int(stats[0]), int(collapse_count), int(edge_collapses), int(stats[1]), int(stats[2])); + printf("removed %d triangles, error %e (goal %e); evaluated %d/%d collapses (done %d, skipped %d, invalid %d); %s\n", + int(triangle_collapses), sqrtf(result_error), sqrtf(error_goal_limit), + int(stats[0]), int(collapse_count), int(edge_collapses), int(stats[1]), int(stats[2]), + stats[4] ? "error limit" : (stats[5] ? "count limit" : (stats[6] ? "error goal" : "out of collapses"))); #endif return edge_collapses; From 17e02c49d040d370ef4d8bd6b8c8083cb11c7c7d Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Mon, 16 Sep 2024 18:15:06 -0700 Subject: [PATCH 2/3] simplify: Avoid early termination of passes due to error_goal In large meshes with complex topology we occasionally hit a case where error_goal cutoff is not monotonic: a previous pass would simplify up to a certain goal, but a subsequent pass would arrive at a smaller goal (for example as invalid collapses get eliminated) and hit it before an error that it already encountered. If we already collapsed an edge with a given error it is always correct to collapse other edges with a smaller error. This change tweaks the conditional, which results in fewer passes on some meshes (eg thai_buddha gets ~10% faster to simplify), 2-3% smaller error on some meshes, and occasionally 1-2% larger error; most meshes simplify as they did before however. --- src/simplifier.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simplifier.cpp b/src/simplifier.cpp index 6edcf3724a..b6f8221259 100644 --- a/src/simplifier.cpp +++ b/src/simplifier.cpp @@ -1129,7 +1129,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* // on average, each collapse is expected to lock 6 other collapses; to avoid degenerate passes on meshes with odd // topology, we only abort if we got over 1/6 collapses accordingly. - if (c.error > error_goal && triangle_collapses > triangle_collapse_goal / 6) + if (c.error > error_goal && c.error > result_error && triangle_collapses > triangle_collapse_goal / 6) { TRACESTATS(6); break; From 9dddd2f866c196c19406a3e0284ebb21d964b624 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Tue, 17 Sep 2024 12:33:03 -0700 Subject: [PATCH 3/3] simplify: Improve precision of collapse error sorting We use a subset of bits of collapse error to produce an approximate ordering of collapses; to keep stack usage and cache utilization reasonable, we used 11-bit counting sort which uses 8 bits of exponent and 3 bits of mantissa. 3 bits of mantissa may not be enough and can result in choosing suboptimal collapse order. Ideally we should probably use 5 bits here, but that needs much more stack space. For now, switch to 8+4 bits but to avoid doubling the stack usage, constrain the exponent range on the high end so that excessively high errors (>2^32) are bucketed together, as they are not useful. In the future there's an opportunity to similarly constrain exponent space on low end by clamping errors that are too low to zero. In addition to improving the error selection in some cases this also can sometimes reduce the amount of simplification passes as eligible collapses in error limited regime get sorted better and can be processed earlier. --- demo/tests.cpp | 2 +- src/simplifier.cpp | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/demo/tests.cpp b/demo/tests.cpp index f07919f07a..aef1aae55d 100644 --- a/demo/tests.cpp +++ b/demo/tests.cpp @@ -1151,7 +1151,7 @@ static void simplifyAttr(bool skip_g) { vb[y * 3 + x][0] = float(x); vb[y * 3 + x][1] = float(y); - vb[y * 3 + x][2] = 0.03f * x + 0.03f * (y % 2); + vb[y * 3 + x][2] = 0.03f * x + 0.03f * (y % 2) + (x == 2 && y == 7) * 0.03f; vb[y * 3 + x][3] = r; vb[y * 3 + x][4] = g; vb[y * 3 + x][5] = b; diff --git a/src/simplifier.cpp b/src/simplifier.cpp index b6f8221259..6db2a17041 100644 --- a/src/simplifier.cpp +++ b/src/simplifier.cpp @@ -1056,16 +1056,22 @@ static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapses, size_t collapse_count) { - const int sort_bits = 11; + // we use counting sort to order collapses by error; since the exact sort order is not as critical, + // only top 12 bits of exponent+mantissa (8 bits of exponent and 4 bits of mantissa) are used. + // to avoid excessive stack usage, we clamp the exponent range as collapses with errors much higher than 1 are not useful. + const unsigned int sort_bits = 12; + const unsigned int sort_bins = 2048 + 512; // exponent range [-127, 32) // fill histogram for counting sort - unsigned int histogram[1 << sort_bits]; + unsigned int histogram[sort_bins]; memset(histogram, 0, sizeof(histogram)); for (size_t i = 0; i < collapse_count; ++i) { // skip sign bit since error is non-negative - unsigned int key = (collapses[i].errorui << 1) >> (32 - sort_bits); + unsigned int error = collapses[i].errorui; + unsigned int key = (error << 1) >> (32 - sort_bits); + key = key < sort_bins ? key : sort_bins - 1; histogram[key]++; } @@ -1073,7 +1079,7 @@ static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapse // compute offsets based on histogram data size_t histogram_sum = 0; - for (size_t i = 0; i < 1 << sort_bits; ++i) + for (size_t i = 0; i < sort_bins; ++i) { size_t count = histogram[i]; histogram[i] = unsigned(histogram_sum); @@ -1086,7 +1092,9 @@ static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapse for (size_t i = 0; i < collapse_count; ++i) { // skip sign bit since error is non-negative - unsigned int key = (collapses[i].errorui << 1) >> (32 - sort_bits); + unsigned int error = collapses[i].errorui; + unsigned int key = (error << 1) >> (32 - sort_bits); + key = key < sort_bins ? key : sort_bins - 1; sort_order[histogram[key]++] = unsigned(i); }