From 19cdb4de1c195d4813bd2bb199582d69bb0724be Mon Sep 17 00:00:00 2001 From: Andrew Copland Date: Thu, 14 Nov 2024 19:05:59 +0000 Subject: [PATCH 1/3] Remove profiling from Linux Debug builds, don't profile in debug --- CMakePresets.json | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/CMakePresets.json b/CMakePresets.json index 228711f3fdd..9fa7a056ba4 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -77,8 +77,7 @@ "cacheVariables": { "CMAKE_EXPORT_COMPILE_COMMANDS": true, "CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}", - "CMAKE_BUILD_TYPE": "Debug", - "PROFILER_ENABLED": "1" + "CMAKE_BUILD_TYPE": "Debug" }, "vendor": { "microsoft.com/VisualStudioSettings/CMake/1.0": { @@ -138,8 +137,7 @@ "CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}", "CMAKE_BUILD_TYPE": "Debug", "CMAKE_C_COMPILER": "/usr/bin/clang", - "CMAKE_CXX_COMPILER": "/usr/bin/clang++", - "PROFILER_ENABLED": "1" + "CMAKE_CXX_COMPILER": "/usr/bin/clang++" }, "vendor": { "microsoft.com/VisualStudioSettings/CMake/1.0": { From 5c02f64558c7429ed722f8a00c064339e2bfccb2 Mon Sep 17 00:00:00 2001 From: Andrew Copland Date: Thu, 14 Nov 2024 19:06:33 +0000 Subject: [PATCH 2/3] Fix compilation error in MSVC, can't pass runtime string to Macro --- src/Frame.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Frame.cpp b/src/Frame.cpp index 7abeb275e29..1c2169d87f0 100644 --- a/src/Frame.cpp +++ b/src/Frame.cpp @@ -305,7 +305,8 @@ void Frame::CollideFrames(void (*callback)(CollisionContact *)) if (!frame.m_collisionSpace) continue; - PROFILE_SCOPED_DESC(frame.m_label.c_str()) + // Used to be frame.m_label.c_str() however the preprocessor is evaluated at compile time this fails on MSVC + PROFILE_SCOPED_DESC("Loop frame") frame.m_collisionSpace->Collide(callback); } } From 04732038c3da5b330720ee12ef0abc624c878c92 Mon Sep 17 00:00:00 2001 From: Webster Sheets Date: Thu, 14 Nov 2024 14:55:06 -0500 Subject: [PATCH 3/3] Enable profiler multithreading when using std::atomic CAS-lock - By default, uses sequentially-consistent std::atomic_compare_exchange_weak instead of hand-rolled asm - USE_CHRONO is approx. 4.3x faster on AMD Ryzen 7 5800X than the hand-rolled rdtsc implementation (50ns per-call overhead vs. 216ns per-call overhead) - Performance on aarch64 is unknown at this time --- contrib/profiler/Profiler.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/contrib/profiler/Profiler.cpp b/contrib/profiler/Profiler.cpp index 6ec3f9c499a..0be8f50dccd 100644 --- a/contrib/profiler/Profiler.cpp +++ b/contrib/profiler/Profiler.cpp @@ -25,7 +25,6 @@ #include "Profiler.h" #if defined(USE_CHRONO) -#undef __PROFILER_SMP__ #include #endif @@ -115,7 +114,7 @@ namespace Profiler { #if !defined(USE_CHRONO) volatile u32 mLock; #else - std::atomic_uint32_t mLock; + std::atomic_uint32_t mLock = { 0 }; #endif }; #else @@ -915,7 +914,7 @@ namespace Profiler { #if defined(__PROFILER_ENABLED__) - threadlocal Caller::ThreadState Caller::thisThread = { {0}, 0, 0, 0, 0 }; + threadlocal Caller::ThreadState Caller::thisThread = { {}, 0, 0, 0, 0 }; f64 Caller::mTimerOverhead = 0, Caller::mRdtscOverhead = 0; u64 Caller::mGlobalDuration = 0; Caller::Max Caller::maxStats; @@ -999,7 +998,7 @@ namespace Profiler { u64 globalStart = Timer::getticks(); u64 globalClockStart = Clock::getticks(); - GlobalThreadList threads = { NULL, {0} }; + GlobalThreadList threads = { NULL, {} }; threadlocal Caller *root = NULL;