Skip to content

Commit

Permalink
Collider optimization (#978)
Browse files Browse the repository at this point in the history
* vec changes

* collider optimization

* fix tracy

* slight optimization for polygon

* cleanup

* cleanup

* CMAKE_CXX_FLAGS should also be a string

* slightly optimize polygon

* should be lowest instead of min

* address comments

* update MANIFOLD_PAR guard

* try to fix black CI
  • Loading branch information
pca006132 authored Oct 9, 2024
1 parent 3febd48 commit 451829f
Show file tree
Hide file tree
Showing 8 changed files with 161 additions and 138 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/check_format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ jobs:
exclude: '*/third_party'
extensions: 'h,cpp,js,ts,html'
clangFormatVersion: 18
- uses: psf/black@stable
with:
options: "--check --verbose"
src: "./bindings/python/examples"
- uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: 'pip'
- uses: psf/black@stable
with:
options: "--check --verbose"
src: "./bindings/python/examples"
- name: "gersemi cmake check"
run: |
pip3 install gersemi
Expand Down
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ if(MANIFOLD_FUZZ)
# enable fuzztest fuzzing mode
set(FUZZTEST_FUZZING_MODE ON)
# address sanitizer required
list(APPEND CMAKE_CXX_FLAGS -fsanitize=address)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
endif()

if(TRACY_ENABLE)
Expand All @@ -118,7 +118,7 @@ if(TRACY_ENABLE)
list(APPEND MANIFOLD_FLAGS -DTRACY_MEMORY_USAGE)
endif()
if(NOT MSVC)
list(APPEND CMAKE_CXX_FLAGS -fno-omit-frame-pointer)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer")
endif()
else()
option(CMAKE_BUILD_TYPE "Build type" Release)
Expand Down
41 changes: 39 additions & 2 deletions include/manifold/vec_view.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
#pragma once

#include <cstddef>
#include <limits>
#include <type_traits>
#include <vector>

#include "manifold/optional_assert.h"

Expand All @@ -31,8 +34,13 @@ class VecView {
using Iter = T *;
using IterC = const T *;

VecView() : ptr_(nullptr), size_(0) {}

VecView(T *ptr, size_t size) : ptr_(ptr), size_(size) {}

VecView(const std::vector<std::remove_cv_t<T>> &v)
: ptr_(v.data()), size_(v.size()) {}

VecView(const VecView &other) {
ptr_ = other.ptr_;
size_ = other.size_;
Expand Down Expand Up @@ -94,6 +102,37 @@ class VecView {

bool empty() const { return size_ == 0; }

VecView<T> view(size_t offset = 0,
size_t length = std::numeric_limits<size_t>::max()) {
if (length == std::numeric_limits<size_t>::max())
length = this->size_ - offset;
ASSERT(length >= 0, std::out_of_range("Vec::view out of range"));
ASSERT(offset + length <= this->size_ && offset >= 0,
std::out_of_range("Vec::view out of range"));
return VecView<T>(this->ptr_ + offset, length);
}

VecView<const T> cview(
size_t offset = 0,
size_t length = std::numeric_limits<size_t>::max()) const {
if (length == std::numeric_limits<size_t>::max())
length = this->size_ - offset;
ASSERT(length >= 0, std::out_of_range("Vec::cview out of range"));
ASSERT(offset + length <= this->size_ && offset >= 0,
std::out_of_range("Vec::cview out of range"));
return VecView<const T>(this->ptr_ + offset, length);
}

VecView<const T> view(
size_t offset = 0,
size_t length = std::numeric_limits<size_t>::max()) const {
return cview(offset, length);
}

T *data() { return this->ptr_; }

const T *data() const { return this->ptr_; }

#ifdef MANIFOLD_DEBUG
void Dump() const {
std::cout << "Vec = " << std::endl;
Expand All @@ -107,8 +146,6 @@ class VecView {
protected:
T *ptr_ = nullptr;
size_t size_ = 0;

VecView() = default;
};

} // namespace manifold
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,5 +146,5 @@ if(TRACY_ENABLE)
GIT_PROGRESS TRUE
)
FetchContent_MakeAvailable(tracy)
target_link_libraries(manifold INTERFACE TracyClient)
target_link_libraries(manifold PUBLIC TracyClient)
endif()
118 changes: 52 additions & 66 deletions src/collider.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
#include <intrin.h>
#endif

#if (MANIFOLD_PAR == 1)
#include <tbb/combinable.h>
#endif

namespace manifold {

namespace collider_internal {
Expand Down Expand Up @@ -155,18 +159,18 @@ struct CreateRadixTree {
};

template <typename T, const bool selfCollision, typename Recorder>
struct FindCollisions {
struct FindCollision {
VecView<const T> queries;
VecView<const Box> nodeBBox_;
VecView<const std::pair<int, int>> internalChildren_;
Recorder recorder;

int RecordCollision(int node, const int queryIdx) {
inline int RecordCollision(int node, const int queryIdx, SparseIndices& ind) {
bool overlaps = nodeBBox_[node].DoesOverlap(queries[queryIdx]);
if (overlaps && IsLeaf(node)) {
int leafIdx = Node2Leaf(node);
if (!selfCollision || leafIdx != queryIdx) {
recorder.record(queryIdx, leafIdx);
recorder.record(queryIdx, leafIdx, ind);
}
}
return overlaps && IsInternal(node); // Should traverse into node
Expand All @@ -179,15 +183,14 @@ struct FindCollisions {
int top = -1;
// Depth-first search
int node = kRoot;
// same implies that this query do not have any collision
if (recorder.earlyexit(queryIdx)) return;
SparseIndices& ind = recorder.local();
while (1) {
int internal = Node2Internal(node);
int child1 = internalChildren_[internal].first;
int child2 = internalChildren_[internal].second;

int traverse1 = RecordCollision(child1, queryIdx);
int traverse2 = RecordCollision(child2, queryIdx);
int traverse1 = RecordCollision(child1, queryIdx, ind);
int traverse2 = RecordCollision(child2, queryIdx, ind);

if (!traverse1 && !traverse2) {
if (top < 0) break; // done
Expand All @@ -199,48 +202,37 @@ struct FindCollisions {
}
}
}
recorder.end(queryIdx);
}
};

struct CountCollisions {
VecView<int> counts;
VecView<char> empty;
void record(int queryIdx, int _leafIdx) { counts[queryIdx]++; }
bool earlyexit(int _queryIdx) { return false; }
void end(int queryIdx) {
if (counts[queryIdx] == 0) empty[queryIdx] = 1;
}
};

template <const bool inverted>
struct SeqCollisionRecorder {
SparseIndices& queryTri_;
void record(int queryIdx, int leafIdx) const {
inline void record(int queryIdx, int leafIdx, SparseIndices& ind) const {
if (inverted)
queryTri_.Add(leafIdx, queryIdx);
ind.Add(leafIdx, queryIdx);
else
queryTri_.Add(queryIdx, leafIdx);
ind.Add(queryIdx, leafIdx);
}
bool earlyexit(int queryIdx) const { return false; }
void end(int queryIdx) const {}
SparseIndices& local() { return queryTri_; }
};

#if (MANIFOLD_PAR == 1)
template <const bool inverted>
struct ParCollisionRecorder {
SparseIndices& queryTri;
VecView<int> counts;
VecView<char> empty;
void record(int queryIdx, int leafIdx) {
int pos = counts[queryIdx]++;
tbb::combinable<SparseIndices>& store;
inline void record(int queryIdx, int leafIdx, SparseIndices& ind) const {
// Add may invoke something in parallel, and it may return in
// another thread, making thread local unsafe
// we need to explicitly forbid parallelization by passing a flag
if (inverted)
queryTri.Set(pos, leafIdx, queryIdx);
ind.Add(leafIdx, queryIdx, true);
else
queryTri.Set(pos, queryIdx, leafIdx);
ind.Add(queryIdx, leafIdx, true);
}
bool earlyexit(int queryIdx) const { return empty[queryIdx] == 1; }
void end(int queryIdx) const {}
SparseIndices& local() { return store.local(); }
};
#endif

struct BuildInternalBoxes {
VecView<Box> nodeBBox_;
Expand Down Expand Up @@ -331,44 +323,38 @@ class Collider {

template <const bool selfCollision = false, const bool inverted = false,
typename T>
SparseIndices Collisions(const VecView<const T>& queriesIn) const {
void Collisions(const VecView<const T>& queriesIn,
SparseIndices& queryTri) const {
ZoneScoped;
using collider_internal::FindCollisions;
// note that the length is 1 larger than the number of queries so the last
// element can store the sum when using exclusive scan
if (queriesIn.size() < collider_internal::kSequentialThreshold) {
SparseIndices queryTri;
for_each_n(
ExecutionPolicy::Seq, countAt(0), queriesIn.size(),
FindCollisions<T, selfCollision,
collider_internal::SeqCollisionRecorder<inverted>>{
queriesIn, nodeBBox_, internalChildren_, {queryTri}});
return queryTri;
} else {
// compute the number of collisions to determine the size for allocation
// and offset, this avoids the need for atomic
Vec<int> counts(queriesIn.size() + 1, 0);
Vec<char> empty(queriesIn.size(), 0);
using collider_internal::FindCollision;
#if (MANIFOLD_PAR == 1)
if (queriesIn.size() > collider_internal::kSequentialThreshold) {
tbb::combinable<SparseIndices> store;
for_each_n(
ExecutionPolicy::Par, countAt(0), queriesIn.size(),
FindCollisions<T, selfCollision, collider_internal::CountCollisions>{
queriesIn, nodeBBox_, internalChildren_, {counts, empty}});
// compute start index for each query and total count
manifold::exclusive_scan(counts.begin(), counts.end(), counts.begin(), 0,
std::plus<int>());
if (counts.back() == 0) return SparseIndices(0);
SparseIndices queryTri(counts.back());
// actually recording collisions
for_each_n(
ExecutionPolicy::Par, countAt(0), queriesIn.size(),
FindCollisions<T, selfCollision,
collider_internal::ParCollisionRecorder<inverted>>{
queriesIn,
nodeBBox_,
internalChildren_,
{queryTri, counts, empty}});
return queryTri;
FindCollision<T, selfCollision,
collider_internal::ParCollisionRecorder<inverted>>{
queriesIn, nodeBBox_, internalChildren_, {store}});

std::vector<SparseIndices> tmp;
store.combine_each(
[&](SparseIndices& ind) { tmp.emplace_back(std::move(ind)); });
queryTri.FromIndices(tmp);
return;
}
#endif
for_each_n(ExecutionPolicy::Seq, countAt(0), queriesIn.size(),
FindCollision<T, selfCollision,
collider_internal::SeqCollisionRecorder<inverted>>{
queriesIn, nodeBBox_, internalChildren_, {queryTri}});
}

template <const bool selfCollision = false, const bool inverted = false,
typename T>
SparseIndices Collisions(const VecView<const T>& queriesIn) const {
SparseIndices result;
Collisions<selfCollision, inverted, T>(queriesIn, result);
return result;
}

static uint32_t MortonCode(vec3 position, Box bBox) {
Expand Down
47 changes: 27 additions & 20 deletions src/polygon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "./collider.h"
#include "./utils.h"
#include "manifold/optional_assert.h"
#include "manifold/parallel.h"

namespace {
using namespace manifold;
Expand Down Expand Up @@ -308,6 +309,7 @@ class EarClip {
struct IdxCollider {
Collider collider;
std::vector<VertItr> itr;
SparseIndices ind;
};

// A circularly-linked list representing the polygon(s) that still need to be
Expand Down Expand Up @@ -489,7 +491,7 @@ class EarClip {
// values < -precision so they will never affect validity. The first
// totalCost is designed to give priority to sharper angles. Any cost < (-1
// - precision) has satisfied the Delaunay condition.
double EarCost(double precision, const IdxCollider &collider) const {
double EarCost(double precision, IdxCollider &collider) const {
vec2 openSide = left->pos - right->pos;
const vec2 center = 0.5 * (left->pos + right->pos);
const double scale = 4 / glm::dot(openSide, openSide);
Expand All @@ -502,27 +504,32 @@ class EarClip {
return totalCost;
}

Vec<Box> earBox;
earBox.push_back({vec3(center.x - radius, center.y - radius, 0),
vec3(center.x + radius, center.y + radius, 0)});
earBox.back().Union(vec3(pos, 0));
const SparseIndices toTest = collider.collider.Collisions(earBox.cview());
Box earBox = Box{vec3(center.x - radius, center.y - radius, 0),
vec3(center.x + radius, center.y + radius, 0)};
earBox.Union(vec3(pos, 0));
collider.collider.Collisions(VecView<const Box>(&earBox, 1),
collider.ind);

const int lid = left->mesh_idx;
const int rid = right->mesh_idx;
for (size_t i = 0; i < toTest.size(); ++i) {
const VertItr test = collider.itr[toTest.Get(i, true)];
if (!Clipped(test) && test->mesh_idx != mesh_idx &&
test->mesh_idx != lid &&
test->mesh_idx != rid) { // Skip duplicated verts
double cost = Cost(test, openSide, precision);
if (cost < -precision) {
cost = DelaunayCost(test->pos - center, scale, precision);
}
totalCost = std::max(totalCost, cost);
}
}

totalCost = transform_reduce(
countAt(0), countAt(collider.ind.size()), totalCost,
[](double a, double b) { return std::max(a, b); },
[&](size_t i) {
const VertItr test = collider.itr[collider.ind.Get(i, true)];
if (!Clipped(test) && test->mesh_idx != mesh_idx &&
test->mesh_idx != lid &&
test->mesh_idx != rid) { // Skip duplicated verts
double cost = Cost(test, openSide, precision);
if (cost < -precision) {
cost = DelaunayCost(test->pos - center, scale, precision);
}
return cost;
}
return std::numeric_limits<double>::lowest();
});
collider.ind.Clear();
return totalCost;
}

Expand Down Expand Up @@ -799,7 +806,7 @@ class EarClip {

// Recalculate the cost of the Vert v ear, updating it in the queue by
// removing and reinserting it.
void ProcessEar(VertItr v, const IdxCollider &collider) {
void ProcessEar(VertItr v, IdxCollider &collider) {
if (v->ear != earsQueue_.end()) {
earsQueue_.erase(v->ear);
v->ear = earsQueue_.end();
Expand Down Expand Up @@ -855,7 +862,7 @@ class EarClip {
void TriangulatePoly(VertItr start) {
ZoneScoped;

const IdxCollider vertCollider = VertCollider(start);
IdxCollider vertCollider = VertCollider(start);

if (vertCollider.itr.empty()) {
PRINT("Empty poly");
Expand Down
Loading

0 comments on commit 451829f

Please sign in to comment.