diff --git a/.github/workflows/check_format.yml b/.github/workflows/check_format.yml index bb82889e3..bf56a2fce 100644 --- a/.github/workflows/check_format.yml +++ b/.github/workflows/check_format.yml @@ -18,14 +18,14 @@ jobs: exclude: '*/third_party' extensions: 'h,cpp,js,ts,html' clangFormatVersion: 18 - - uses: psf/black@stable - with: - options: "--check --verbose" - src: "./bindings/python/examples" - uses: actions/setup-python@v5 with: python-version: '3.12' cache: 'pip' + - uses: psf/black@stable + with: + options: "--check --verbose" + src: "./bindings/python/examples" - name: "gersemi cmake check" run: | pip3 install gersemi diff --git a/CMakeLists.txt b/CMakeLists.txt index feccf217c..e62f28dae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -109,7 +109,7 @@ if(MANIFOLD_FUZZ) # enable fuzztest fuzzing mode set(FUZZTEST_FUZZING_MODE ON) # address sanitizer required - list(APPEND CMAKE_CXX_FLAGS -fsanitize=address) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address") endif() if(TRACY_ENABLE) @@ -118,7 +118,7 @@ if(TRACY_ENABLE) list(APPEND MANIFOLD_FLAGS -DTRACY_MEMORY_USAGE) endif() if(NOT MSVC) - list(APPEND CMAKE_CXX_FLAGS -fno-omit-frame-pointer) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer") endif() else() option(CMAKE_BUILD_TYPE "Build type" Release) diff --git a/include/manifold/vec_view.h b/include/manifold/vec_view.h index 12440eb52..4dc5f7344 100644 --- a/include/manifold/vec_view.h +++ b/include/manifold/vec_view.h @@ -15,6 +15,9 @@ #pragma once #include +#include +#include +#include #include "manifold/optional_assert.h" @@ -31,8 +34,13 @@ class VecView { using Iter = T *; using IterC = const T *; + VecView() : ptr_(nullptr), size_(0) {} + VecView(T *ptr, size_t size) : ptr_(ptr), size_(size) {} + VecView(const std::vector> &v) + : ptr_(v.data()), size_(v.size()) {} + VecView(const VecView &other) { ptr_ = other.ptr_; size_ = other.size_; @@ -94,6 +102,37 @@ class VecView { bool empty() const { return size_ == 0; } + VecView view(size_t offset = 0, + size_t length = std::numeric_limits::max()) { + if (length == std::numeric_limits::max()) + length = this->size_ - offset; + ASSERT(length >= 0, std::out_of_range("Vec::view out of range")); + ASSERT(offset + length <= this->size_ && offset >= 0, + std::out_of_range("Vec::view out of range")); + return VecView(this->ptr_ + offset, length); + } + + VecView cview( + size_t offset = 0, + size_t length = std::numeric_limits::max()) const { + if (length == std::numeric_limits::max()) + length = this->size_ - offset; + ASSERT(length >= 0, std::out_of_range("Vec::cview out of range")); + ASSERT(offset + length <= this->size_ && offset >= 0, + std::out_of_range("Vec::cview out of range")); + return VecView(this->ptr_ + offset, length); + } + + VecView view( + size_t offset = 0, + size_t length = std::numeric_limits::max()) const { + return cview(offset, length); + } + + T *data() { return this->ptr_; } + + const T *data() const { return this->ptr_; } + #ifdef MANIFOLD_DEBUG void Dump() const { std::cout << "Vec = " << std::endl; @@ -107,8 +146,6 @@ class VecView { protected: T *ptr_ = nullptr; size_t size_ = 0; - - VecView() = default; }; } // namespace manifold diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 40350ac32..70b2293a2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -146,5 +146,5 @@ if(TRACY_ENABLE) GIT_PROGRESS TRUE ) FetchContent_MakeAvailable(tracy) - target_link_libraries(manifold INTERFACE TracyClient) + target_link_libraries(manifold PUBLIC TracyClient) endif() diff --git a/src/collider.h b/src/collider.h index b88a322cd..5cc570408 100644 --- a/src/collider.h +++ b/src/collider.h @@ -23,6 +23,10 @@ #include #endif +#if (MANIFOLD_PAR == 1) +#include +#endif + namespace manifold { namespace collider_internal { @@ -155,18 +159,18 @@ struct CreateRadixTree { }; template -struct FindCollisions { +struct FindCollision { VecView queries; VecView nodeBBox_; VecView> internalChildren_; Recorder recorder; - int RecordCollision(int node, const int queryIdx) { + inline int RecordCollision(int node, const int queryIdx, SparseIndices& ind) { bool overlaps = nodeBBox_[node].DoesOverlap(queries[queryIdx]); if (overlaps && IsLeaf(node)) { int leafIdx = Node2Leaf(node); if (!selfCollision || leafIdx != queryIdx) { - recorder.record(queryIdx, leafIdx); + recorder.record(queryIdx, leafIdx, ind); } } return overlaps && IsInternal(node); // Should traverse into node @@ -179,15 +183,14 @@ struct FindCollisions { int top = -1; // Depth-first search int node = kRoot; - // same implies that this query do not have any collision - if (recorder.earlyexit(queryIdx)) return; + SparseIndices& ind = recorder.local(); while (1) { int internal = Node2Internal(node); int child1 = internalChildren_[internal].first; int child2 = internalChildren_[internal].second; - int traverse1 = RecordCollision(child1, queryIdx); - int traverse2 = RecordCollision(child2, queryIdx); + int traverse1 = RecordCollision(child1, queryIdx, ind); + int traverse2 = RecordCollision(child2, queryIdx, ind); if (!traverse1 && !traverse2) { if (top < 0) break; // done @@ -199,48 +202,37 @@ struct FindCollisions { } } } - recorder.end(queryIdx); - } -}; - -struct CountCollisions { - VecView counts; - VecView empty; - void record(int queryIdx, int _leafIdx) { counts[queryIdx]++; } - bool earlyexit(int _queryIdx) { return false; } - void end(int queryIdx) { - if (counts[queryIdx] == 0) empty[queryIdx] = 1; } }; template struct SeqCollisionRecorder { SparseIndices& queryTri_; - void record(int queryIdx, int leafIdx) const { + inline void record(int queryIdx, int leafIdx, SparseIndices& ind) const { if (inverted) - queryTri_.Add(leafIdx, queryIdx); + ind.Add(leafIdx, queryIdx); else - queryTri_.Add(queryIdx, leafIdx); + ind.Add(queryIdx, leafIdx); } - bool earlyexit(int queryIdx) const { return false; } - void end(int queryIdx) const {} + SparseIndices& local() { return queryTri_; } }; +#if (MANIFOLD_PAR == 1) template struct ParCollisionRecorder { - SparseIndices& queryTri; - VecView counts; - VecView empty; - void record(int queryIdx, int leafIdx) { - int pos = counts[queryIdx]++; + tbb::combinable& store; + inline void record(int queryIdx, int leafIdx, SparseIndices& ind) const { + // Add may invoke something in parallel, and it may return in + // another thread, making thread local unsafe + // we need to explicitly forbid parallelization by passing a flag if (inverted) - queryTri.Set(pos, leafIdx, queryIdx); + ind.Add(leafIdx, queryIdx, true); else - queryTri.Set(pos, queryIdx, leafIdx); + ind.Add(queryIdx, leafIdx, true); } - bool earlyexit(int queryIdx) const { return empty[queryIdx] == 1; } - void end(int queryIdx) const {} + SparseIndices& local() { return store.local(); } }; +#endif struct BuildInternalBoxes { VecView nodeBBox_; @@ -331,44 +323,38 @@ class Collider { template - SparseIndices Collisions(const VecView& queriesIn) const { + void Collisions(const VecView& queriesIn, + SparseIndices& queryTri) const { ZoneScoped; - using collider_internal::FindCollisions; - // note that the length is 1 larger than the number of queries so the last - // element can store the sum when using exclusive scan - if (queriesIn.size() < collider_internal::kSequentialThreshold) { - SparseIndices queryTri; - for_each_n( - ExecutionPolicy::Seq, countAt(0), queriesIn.size(), - FindCollisions>{ - queriesIn, nodeBBox_, internalChildren_, {queryTri}}); - return queryTri; - } else { - // compute the number of collisions to determine the size for allocation - // and offset, this avoids the need for atomic - Vec counts(queriesIn.size() + 1, 0); - Vec empty(queriesIn.size(), 0); + using collider_internal::FindCollision; +#if (MANIFOLD_PAR == 1) + if (queriesIn.size() > collider_internal::kSequentialThreshold) { + tbb::combinable store; for_each_n( ExecutionPolicy::Par, countAt(0), queriesIn.size(), - FindCollisions{ - queriesIn, nodeBBox_, internalChildren_, {counts, empty}}); - // compute start index for each query and total count - manifold::exclusive_scan(counts.begin(), counts.end(), counts.begin(), 0, - std::plus()); - if (counts.back() == 0) return SparseIndices(0); - SparseIndices queryTri(counts.back()); - // actually recording collisions - for_each_n( - ExecutionPolicy::Par, countAt(0), queriesIn.size(), - FindCollisions>{ - queriesIn, - nodeBBox_, - internalChildren_, - {queryTri, counts, empty}}); - return queryTri; + FindCollision>{ + queriesIn, nodeBBox_, internalChildren_, {store}}); + + std::vector tmp; + store.combine_each( + [&](SparseIndices& ind) { tmp.emplace_back(std::move(ind)); }); + queryTri.FromIndices(tmp); + return; } +#endif + for_each_n(ExecutionPolicy::Seq, countAt(0), queriesIn.size(), + FindCollision>{ + queriesIn, nodeBBox_, internalChildren_, {queryTri}}); + } + + template + SparseIndices Collisions(const VecView& queriesIn) const { + SparseIndices result; + Collisions(queriesIn, result); + return result; } static uint32_t MortonCode(vec3 position, Box bBox) { diff --git a/src/polygon.cpp b/src/polygon.cpp index b2904f70d..66dc8e8ff 100644 --- a/src/polygon.cpp +++ b/src/polygon.cpp @@ -21,6 +21,7 @@ #include "./collider.h" #include "./utils.h" #include "manifold/optional_assert.h" +#include "manifold/parallel.h" namespace { using namespace manifold; @@ -308,6 +309,7 @@ class EarClip { struct IdxCollider { Collider collider; std::vector itr; + SparseIndices ind; }; // A circularly-linked list representing the polygon(s) that still need to be @@ -489,7 +491,7 @@ class EarClip { // values < -precision so they will never affect validity. The first // totalCost is designed to give priority to sharper angles. Any cost < (-1 // - precision) has satisfied the Delaunay condition. - double EarCost(double precision, const IdxCollider &collider) const { + double EarCost(double precision, IdxCollider &collider) const { vec2 openSide = left->pos - right->pos; const vec2 center = 0.5 * (left->pos + right->pos); const double scale = 4 / glm::dot(openSide, openSide); @@ -502,27 +504,32 @@ class EarClip { return totalCost; } - Vec earBox; - earBox.push_back({vec3(center.x - radius, center.y - radius, 0), - vec3(center.x + radius, center.y + radius, 0)}); - earBox.back().Union(vec3(pos, 0)); - const SparseIndices toTest = collider.collider.Collisions(earBox.cview()); + Box earBox = Box{vec3(center.x - radius, center.y - radius, 0), + vec3(center.x + radius, center.y + radius, 0)}; + earBox.Union(vec3(pos, 0)); + collider.collider.Collisions(VecView(&earBox, 1), + collider.ind); const int lid = left->mesh_idx; const int rid = right->mesh_idx; - for (size_t i = 0; i < toTest.size(); ++i) { - const VertItr test = collider.itr[toTest.Get(i, true)]; - if (!Clipped(test) && test->mesh_idx != mesh_idx && - test->mesh_idx != lid && - test->mesh_idx != rid) { // Skip duplicated verts - double cost = Cost(test, openSide, precision); - if (cost < -precision) { - cost = DelaunayCost(test->pos - center, scale, precision); - } - totalCost = std::max(totalCost, cost); - } - } + totalCost = transform_reduce( + countAt(0), countAt(collider.ind.size()), totalCost, + [](double a, double b) { return std::max(a, b); }, + [&](size_t i) { + const VertItr test = collider.itr[collider.ind.Get(i, true)]; + if (!Clipped(test) && test->mesh_idx != mesh_idx && + test->mesh_idx != lid && + test->mesh_idx != rid) { // Skip duplicated verts + double cost = Cost(test, openSide, precision); + if (cost < -precision) { + cost = DelaunayCost(test->pos - center, scale, precision); + } + return cost; + } + return std::numeric_limits::lowest(); + }); + collider.ind.Clear(); return totalCost; } @@ -799,7 +806,7 @@ class EarClip { // Recalculate the cost of the Vert v ear, updating it in the queue by // removing and reinserting it. - void ProcessEar(VertItr v, const IdxCollider &collider) { + void ProcessEar(VertItr v, IdxCollider &collider) { if (v->ear != earsQueue_.end()) { earsQueue_.erase(v->ear); v->ear = earsQueue_.end(); @@ -855,7 +862,7 @@ class EarClip { void TriangulatePoly(VertItr start) { ZoneScoped; - const IdxCollider vertCollider = VertCollider(start); + IdxCollider vertCollider = VertCollider(start); if (vertCollider.itr.empty()) { PRINT("Empty poly"); diff --git a/src/sparse.h b/src/sparse.h index 222b284c1..9eedb16a3 100644 --- a/src/sparse.h +++ b/src/sparse.h @@ -60,6 +60,22 @@ class SparseIndices { SparseIndices() = default; SparseIndices(size_t size) { data_ = Vec(size * sizeof(int64_t)); } + void Clear() { data_.clear(false); } + + void FromIndices(const std::vector& indices) { + std::vector sizes; + size_t total_size = 0; + for (const auto& ind : indices) { + sizes.push_back(total_size); + total_size += ind.data_.size(); + } + data_ = Vec(total_size); + for_each_n(ExecutionPolicy::Par, countAt(0), indices.size(), [&](size_t i) { + std::copy(indices[i].data_.begin(), indices[i].data_.end(), + data_.begin() + sizes[i]); + }); + } + size_t size() const { return data_.size() / sizeof(int64_t); } Vec Copy(bool use_q) const { @@ -130,8 +146,8 @@ class SparseIndices { data_.size() / sizeof(int32_t)); } - inline void Add(int p, int q) { - for (unsigned int i = 0; i < sizeof(int64_t); ++i) data_.push_back(-1); + inline void Add(int p, int q, bool seq = false) { + data_.extend(sizeof(int64_t), seq); Set(size() - 1, p, q); } diff --git a/src/vec.h b/src/vec.h index f90b97cb3..9d8273503 100644 --- a/src/vec.h +++ b/src/vec.h @@ -19,7 +19,6 @@ #define TracyAllocS(ptr, size, n) (void)0 #define TracyFreeS(ptr, n) (void)0 #endif -#include #include #include "manifold/parallel.h" @@ -112,12 +111,11 @@ class Vec : public VecView { } this->size_ = other.size_; capacity_ = other.size_; - auto policy = autoPolicy(this->size_); if (this->size_ != 0) { this->ptr_ = reinterpret_cast(malloc(this->size_ * sizeof(T))); ASSERT(this->ptr_ != nullptr, std::bad_alloc()); TracyAllocS(this->ptr_, this->size_ * sizeof(T), 3); - copy(policy, other.begin(), other.end(), this->ptr_); + manifold::copy(other.begin(), other.end(), this->ptr_); } return *this; } @@ -145,25 +143,32 @@ class Vec : public VecView { std::swap(capacity_, other.capacity_); } - inline void push_back(const T &val) { + inline void push_back(const T &val, bool seq = false) { if (this->size_ >= capacity_) { // avoid dangling pointer in case val is a reference of our array T val_copy = val; - reserve(capacity_ == 0 ? 128 : capacity_ * 2); + reserve(capacity_ == 0 ? 128 : capacity_ * 2, seq); this->ptr_[this->size_++] = val_copy; return; } this->ptr_[this->size_++] = val; } - void reserve(size_t n) { + inline void extend(size_t n, bool seq = false) { + if (this->size_ + n >= capacity_) + reserve(capacity_ == 0 ? 128 : std::max(capacity_ * 2, this->size_ + n), + seq); + this->size_ += n; + } + + void reserve(size_t n, bool seq = false) { if (n > capacity_) { T *newBuffer = reinterpret_cast(malloc(n * sizeof(T))); ASSERT(newBuffer != nullptr, std::bad_alloc()); TracyAllocS(newBuffer, n * sizeof(T), 3); if (this->size_ > 0) - copy(autoPolicy(this->size_), this->ptr_, this->ptr_ + this->size_, - newBuffer); + manifold::copy(seq ? ExecutionPolicy::Seq : autoPolicy(this->size_), + this->ptr_, this->ptr_ + this->size_, newBuffer); if (this->ptr_ != nullptr) { TracyFreeS(this->ptr_, 3); free(this->ptr_); @@ -186,7 +191,10 @@ class Vec : public VecView { void pop_back() { resize(this->size_ - 1); } - void clear() { resize(0); } + void clear(bool shrink = true) { + this->size_ = 0; + if (shrink) shrink_to_fit(); + } void shrink_to_fit() { T *newBuffer = nullptr; @@ -194,8 +202,7 @@ class Vec : public VecView { newBuffer = reinterpret_cast(malloc(this->size_ * sizeof(T))); ASSERT(newBuffer != nullptr, std::bad_alloc()); TracyAllocS(newBuffer, this->size_ * sizeof(T), 3); - copy(autoPolicy(this->size_), this->ptr_, this->ptr_ + this->size_, - newBuffer); + manifold::copy(this->ptr_, this->ptr_ + this->size_, newBuffer); } if (this->ptr_ != nullptr) { TracyFreeS(this->ptr_, 3); @@ -205,36 +212,6 @@ class Vec : public VecView { capacity_ = this->size_; } - VecView view(size_t offset = 0, - size_t length = std::numeric_limits::max()) { - if (length == std::numeric_limits::max()) - length = this->size_ - offset; - ASSERT(length >= 0, std::out_of_range("Vec::view out of range")); - ASSERT(offset + length <= this->size_ && offset >= 0, - std::out_of_range("Vec::view out of range")); - return VecView(this->ptr_ + offset, length); - } - - VecView cview( - size_t offset = 0, - size_t length = std::numeric_limits::max()) const { - if (length == std::numeric_limits::max()) - length = this->size_ - offset; - ASSERT(length >= 0, std::out_of_range("Vec::cview out of range")); - ASSERT(offset + length <= this->size_ && offset >= 0, - std::out_of_range("Vec::cview out of range")); - return VecView(this->ptr_ + offset, length); - } - - VecView view( - size_t offset = 0, - size_t length = std::numeric_limits::max()) const { - return cview(offset, length); - } - - T *data() { return this->ptr_; } - const T *data() const { return this->ptr_; } - size_t capacity() const { return capacity_; } private: