Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collider optimization #978

Merged
merged 13 commits into from
Oct 9, 2024
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ if(MANIFOLD_FUZZ)
# enable fuzztest fuzzing mode
set(FUZZTEST_FUZZING_MODE ON)
# address sanitizer required
list(APPEND CMAKE_CXX_FLAGS -fsanitize=address)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
endif()

if(TRACY_ENABLE)
Expand All @@ -118,7 +118,7 @@ if(TRACY_ENABLE)
list(APPEND MANIFOLD_FLAGS -DTRACY_MEMORY_USAGE)
endif()
if(NOT MSVC)
list(APPEND CMAKE_CXX_FLAGS -fno-omit-frame-pointer)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer")
endif()
else()
option(CMAKE_BUILD_TYPE "Build type" Release)
Expand Down
41 changes: 39 additions & 2 deletions include/manifold/vec_view.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
#pragma once

#include <cstddef>
#include <limits>
#include <type_traits>
#include <vector>

#include "manifold/optional_assert.h"

Expand All @@ -31,8 +34,13 @@ class VecView {
using Iter = T *;
using IterC = const T *;

VecView() : ptr_(nullptr), size_(0) {}

VecView(T *ptr, size_t size) : ptr_(ptr), size_(size) {}

VecView(const std::vector<std::remove_cv_t<T>> &v)
: ptr_(v.data()), size_(v.size()) {}

VecView(const VecView &other) {
ptr_ = other.ptr_;
size_ = other.size_;
Expand Down Expand Up @@ -94,6 +102,37 @@ class VecView {

bool empty() const { return size_ == 0; }

VecView<T> view(size_t offset = 0,
size_t length = std::numeric_limits<size_t>::max()) {
if (length == std::numeric_limits<size_t>::max())
length = this->size_ - offset;
ASSERT(length >= 0, std::out_of_range("Vec::view out of range"));
ASSERT(offset + length <= this->size_ && offset >= 0,
std::out_of_range("Vec::view out of range"));
return VecView<T>(this->ptr_ + offset, length);
}

VecView<const T> cview(
size_t offset = 0,
size_t length = std::numeric_limits<size_t>::max()) const {
if (length == std::numeric_limits<size_t>::max())
length = this->size_ - offset;
ASSERT(length >= 0, std::out_of_range("Vec::cview out of range"));
ASSERT(offset + length <= this->size_ && offset >= 0,
std::out_of_range("Vec::cview out of range"));
return VecView<const T>(this->ptr_ + offset, length);
}

VecView<const T> view(
size_t offset = 0,
size_t length = std::numeric_limits<size_t>::max()) const {
return cview(offset, length);
}

T *data() { return this->ptr_; }

const T *data() const { return this->ptr_; }

#ifdef MANIFOLD_DEBUG
void Dump() const {
std::cout << "Vec = " << std::endl;
Expand All @@ -107,8 +146,6 @@ class VecView {
protected:
T *ptr_ = nullptr;
size_t size_ = 0;

VecView() = default;
};

} // namespace manifold
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -141,5 +141,5 @@ if(TRACY_ENABLE)
GIT_PROGRESS TRUE
)
FetchContent_MakeAvailable(tracy)
target_link_libraries(manifold INTERFACE TracyClient)
target_link_libraries(manifold PUBLIC TracyClient)
endif()
118 changes: 52 additions & 66 deletions src/collider.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
#include <intrin.h>
#endif

#ifdef MANIFOLD_PAR
#include <tbb/combinable.h>
#endif

namespace manifold {

namespace collider_internal {
Expand Down Expand Up @@ -155,18 +159,18 @@ struct CreateRadixTree {
};

template <typename T, const bool selfCollision, typename Recorder>
struct FindCollisions {
struct FindCollision {
VecView<const T> queries;
VecView<const Box> nodeBBox_;
VecView<const std::pair<int, int>> internalChildren_;
Recorder recorder;

int RecordCollision(int node, const int queryIdx) {
inline int RecordCollision(int node, const int queryIdx, SparseIndices& ind) {
bool overlaps = nodeBBox_[node].DoesOverlap(queries[queryIdx]);
if (overlaps && IsLeaf(node)) {
int leafIdx = Node2Leaf(node);
if (!selfCollision || leafIdx != queryIdx) {
recorder.record(queryIdx, leafIdx);
recorder.record(queryIdx, leafIdx, ind);
}
}
return overlaps && IsInternal(node); // Should traverse into node
Expand All @@ -179,15 +183,14 @@ struct FindCollisions {
int top = -1;
// Depth-first search
int node = kRoot;
// same implies that this query do not have any collision
if (recorder.earlyexit(queryIdx)) return;
SparseIndices& ind = recorder.local();
while (1) {
int internal = Node2Internal(node);
int child1 = internalChildren_[internal].first;
int child2 = internalChildren_[internal].second;

int traverse1 = RecordCollision(child1, queryIdx);
int traverse2 = RecordCollision(child2, queryIdx);
int traverse1 = RecordCollision(child1, queryIdx, ind);
int traverse2 = RecordCollision(child2, queryIdx, ind);

if (!traverse1 && !traverse2) {
if (top < 0) break; // done
Expand All @@ -199,48 +202,37 @@ struct FindCollisions {
}
}
}
recorder.end(queryIdx);
}
};

struct CountCollisions {
VecView<int> counts;
VecView<char> empty;
void record(int queryIdx, int _leafIdx) { counts[queryIdx]++; }
bool earlyexit(int _queryIdx) { return false; }
void end(int queryIdx) {
if (counts[queryIdx] == 0) empty[queryIdx] = 1;
}
};

template <const bool inverted>
struct SeqCollisionRecorder {
SparseIndices& queryTri_;
void record(int queryIdx, int leafIdx) const {
inline void record(int queryIdx, int leafIdx, SparseIndices& ind) const {
if (inverted)
queryTri_.Add(leafIdx, queryIdx);
ind.Add(leafIdx, queryIdx);
else
queryTri_.Add(queryIdx, leafIdx);
ind.Add(queryIdx, leafIdx);
}
bool earlyexit(int queryIdx) const { return false; }
void end(int queryIdx) const {}
SparseIndices& local() { return queryTri_; }
};

#ifdef MANIFOLD_PAR
template <const bool inverted>
struct ParCollisionRecorder {
SparseIndices& queryTri;
VecView<int> counts;
VecView<char> empty;
void record(int queryIdx, int leafIdx) {
int pos = counts[queryIdx]++;
tbb::combinable<SparseIndices>& store;
inline void record(int queryIdx, int leafIdx, SparseIndices& ind) const {
// Add may invoke something in parallel, and it may return in
// another thread, making thread local unsafe
// we need to explicitly forbid parallelization by passing a flag
if (inverted)
queryTri.Set(pos, leafIdx, queryIdx);
ind.Add(leafIdx, queryIdx, true);
else
queryTri.Set(pos, queryIdx, leafIdx);
ind.Add(queryIdx, leafIdx, true);
}
bool earlyexit(int queryIdx) const { return empty[queryIdx] == 1; }
void end(int queryIdx) const {}
SparseIndices& local() { return store.local(); }
};
#endif

struct BuildInternalBoxes {
VecView<Box> nodeBBox_;
Expand Down Expand Up @@ -331,44 +323,38 @@ class Collider {

template <const bool selfCollision = false, const bool inverted = false,
typename T>
SparseIndices Collisions(const VecView<const T>& queriesIn) const {
void Collisions(const VecView<const T>& queriesIn,
SparseIndices& queryTri) const {
ZoneScoped;
using collider_internal::FindCollisions;
// note that the length is 1 larger than the number of queries so the last
// element can store the sum when using exclusive scan
if (queriesIn.size() < collider_internal::kSequentialThreshold) {
SparseIndices queryTri;
for_each_n(
ExecutionPolicy::Seq, countAt(0), queriesIn.size(),
FindCollisions<T, selfCollision,
collider_internal::SeqCollisionRecorder<inverted>>{
queriesIn, nodeBBox_, internalChildren_, {queryTri}});
return queryTri;
} else {
// compute the number of collisions to determine the size for allocation
// and offset, this avoids the need for atomic
Vec<int> counts(queriesIn.size() + 1, 0);
Vec<char> empty(queriesIn.size(), 0);
using collider_internal::FindCollision;
#ifdef MANIFOLD_PAR
if (queriesIn.size() > collider_internal::kSequentialThreshold) {
tbb::combinable<SparseIndices> store;
for_each_n(
ExecutionPolicy::Par, countAt(0), queriesIn.size(),
FindCollisions<T, selfCollision, collider_internal::CountCollisions>{
queriesIn, nodeBBox_, internalChildren_, {counts, empty}});
// compute start index for each query and total count
manifold::exclusive_scan(counts.begin(), counts.end(), counts.begin(), 0,
std::plus<int>());
if (counts.back() == 0) return SparseIndices(0);
SparseIndices queryTri(counts.back());
// actually recording collisions
for_each_n(
ExecutionPolicy::Par, countAt(0), queriesIn.size(),
FindCollisions<T, selfCollision,
collider_internal::ParCollisionRecorder<inverted>>{
queriesIn,
nodeBBox_,
internalChildren_,
{queryTri, counts, empty}});
return queryTri;
FindCollision<T, selfCollision,
collider_internal::ParCollisionRecorder<inverted>>{
queriesIn, nodeBBox_, internalChildren_, {store}});

std::vector<SparseIndices> tmp;
store.combine_each(
[&](SparseIndices& ind) { tmp.emplace_back(std::move(ind)); });
queryTri.FromIndices(tmp);
return;
}
#endif
for_each_n(ExecutionPolicy::Seq, countAt(0), queriesIn.size(),
FindCollision<T, selfCollision,
collider_internal::SeqCollisionRecorder<inverted>>{
queriesIn, nodeBBox_, internalChildren_, {queryTri}});
}

template <const bool selfCollision = false, const bool inverted = false,
typename T>
SparseIndices Collisions(const VecView<const T>& queriesIn) const {
SparseIndices result;
Collisions<selfCollision, inverted, T>(queriesIn, result);
return result;
}

static uint32_t MortonCode(vec3 position, Box bBox) {
Expand Down
47 changes: 27 additions & 20 deletions src/polygon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "./collider.h"
#include "./utils.h"
#include "manifold/optional_assert.h"
#include "manifold/parallel.h"

namespace {
using namespace manifold;
Expand Down Expand Up @@ -308,6 +309,7 @@ class EarClip {
struct IdxCollider {
Collider collider;
std::vector<VertItr> itr;
SparseIndices ind;
};

// A circularly-linked list representing the polygon(s) that still need to be
Expand Down Expand Up @@ -489,7 +491,7 @@ class EarClip {
// values < -precision so they will never affect validity. The first
// totalCost is designed to give priority to sharper angles. Any cost < (-1
// - precision) has satisfied the Delaunay condition.
double EarCost(double precision, const IdxCollider &collider) const {
double EarCost(double precision, IdxCollider &collider) const {
vec2 openSide = left->pos - right->pos;
const vec2 center = 0.5 * (left->pos + right->pos);
const double scale = 4 / glm::dot(openSide, openSide);
Expand All @@ -502,27 +504,32 @@ class EarClip {
return totalCost;
}

Vec<Box> earBox;
earBox.push_back({vec3(center.x - radius, center.y - radius, 0),
vec3(center.x + radius, center.y + radius, 0)});
earBox.back().Union(vec3(pos, 0));
const SparseIndices toTest = collider.collider.Collisions(earBox.cview());
Box earBox = Box{vec3(center.x - radius, center.y - radius, 0),
vec3(center.x + radius, center.y + radius, 0)};
earBox.Union(vec3(pos, 0));
collider.collider.Collisions(VecView<const Box>(&earBox, 1),
collider.ind);

const int lid = left->mesh_idx;
const int rid = right->mesh_idx;
for (size_t i = 0; i < toTest.size(); ++i) {
const VertItr test = collider.itr[toTest.Get(i, true)];
if (!Clipped(test) && test->mesh_idx != mesh_idx &&
test->mesh_idx != lid &&
test->mesh_idx != rid) { // Skip duplicated verts
double cost = Cost(test, openSide, precision);
if (cost < -precision) {
cost = DelaunayCost(test->pos - center, scale, precision);
}
totalCost = std::max(totalCost, cost);
}
}

totalCost = transform_reduce(
countAt(0), countAt(collider.ind.size()), totalCost,
[](double a, double b) { return std::max(a, b); },
[&](size_t i) {
const VertItr test = collider.itr[collider.ind.Get(i, true)];
if (!Clipped(test) && test->mesh_idx != mesh_idx &&
test->mesh_idx != lid &&
test->mesh_idx != rid) { // Skip duplicated verts
double cost = Cost(test, openSide, precision);
if (cost < -precision) {
cost = DelaunayCost(test->pos - center, scale, precision);
}
return cost;
}
return std::numeric_limits<double>::lowest();
});
collider.ind.Clear();
return totalCost;
}

Expand Down Expand Up @@ -799,7 +806,7 @@ class EarClip {

// Recalculate the cost of the Vert v ear, updating it in the queue by
// removing and reinserting it.
void ProcessEar(VertItr v, const IdxCollider &collider) {
void ProcessEar(VertItr v, IdxCollider &collider) {
if (v->ear != earsQueue_.end()) {
earsQueue_.erase(v->ear);
v->ear = earsQueue_.end();
Expand Down Expand Up @@ -855,7 +862,7 @@ class EarClip {
void TriangulatePoly(VertItr start) {
ZoneScoped;

const IdxCollider vertCollider = VertCollider(start);
IdxCollider vertCollider = VertCollider(start);

if (vertCollider.itr.empty()) {
PRINT("Empty poly");
Expand Down
Loading
Loading