From edc19fe54d065c6c6236fdbec0e0153da84ae050 Mon Sep 17 00:00:00 2001 From: Gregory LEOCADIE Date: Thu, 6 Oct 2022 16:32:53 +0200 Subject: [PATCH] [Profiler] Fix deadlock on alpine on thread creation (#3288) * Address __pthread_create case * Apply only for Alpine --- profiler/CMakeLists.txt | 1 + .../Datadog.Linux.ApiWrapper/CMakeLists.txt | 4 + .../functions_to_wrap.c | 153 +++++++++++++++++- .../CMakeLists.txt | 4 + .../LinuxStackFramesCollector.cpp | 12 ++ .../LinuxStackFramesCollectorTest.cpp | 33 ++++ 6 files changed, 201 insertions(+), 6 deletions(-) diff --git a/profiler/CMakeLists.txt b/profiler/CMakeLists.txt index 28b48fcc7063..0575811558ee 100644 --- a/profiler/CMakeLists.txt +++ b/profiler/CMakeLists.txt @@ -102,6 +102,7 @@ endif() SET(ARCH_BASENAME "linux") if (DEFINED ENV{IsAlpine} AND "$ENV{IsAlpine}" MATCHES "true") + SET(IS_ALPINE) SET(ARCH_BASENAME "${ARCH_BASENAME}-musl") endif() diff --git a/profiler/src/ProfilerEngine/Datadog.Linux.ApiWrapper/CMakeLists.txt b/profiler/src/ProfilerEngine/Datadog.Linux.ApiWrapper/CMakeLists.txt index c3b01b51f1c8..cbcd3a1badb9 100644 --- a/profiler/src/ProfilerEngine/Datadog.Linux.ApiWrapper/CMakeLists.txt +++ b/profiler/src/ProfilerEngine/Datadog.Linux.ApiWrapper/CMakeLists.txt @@ -12,6 +12,10 @@ project("Datadog.Linux.ApiWrapper" VERSION 1.0.0) add_compile_options(-std=c11 -fPIC) +if (DEFINED ENV{IsAlpine} AND "$ENV{IsAlpine}" MATCHES "true") + add_compile_options(-DDD_ALPINE) +endif() + # ****************************************************** # Environment detection # ****************************************************** diff --git a/profiler/src/ProfilerEngine/Datadog.Linux.ApiWrapper/functions_to_wrap.c b/profiler/src/ProfilerEngine/Datadog.Linux.ApiWrapper/functions_to_wrap.c index 6ace48b827f6..9df4cb6822b8 100644 --- a/profiler/src/ProfilerEngine/Datadog.Linux.ApiWrapper/functions_to_wrap.c +++ b/profiler/src/ProfilerEngine/Datadog.Linux.ApiWrapper/functions_to_wrap.c @@ -1,10 +1,9 @@ #define _GNU_SOURCE #include -#include #include +#include #include #include -#include /* dl_iterate_phdr wrapper The .NET profiler on Linux uses a classic signal-based approach to collect thread callstack. @@ -34,11 +33,10 @@ This is done by libunwind, just taking advantage of it. */ - /* Function pointers to hold the value of the glibc functions */ -static int (*__real_dl_iterate_phdr)(int (*callback) (struct dl_phdr_info* info, size_t size, void* data), void* data) = NULL; +static int (*__real_dl_iterate_phdr)(int (*callback)(struct dl_phdr_info* info, size_t size, void* data), void* data) = NULL; -int dl_iterate_phdr(int (*callback) (struct dl_phdr_info* info, size_t size, void* data), void* data) +int dl_iterate_phdr(int (*callback)(struct dl_phdr_info* info, size_t size, void* data), void* data) { if (__real_dl_iterate_phdr == NULL) { @@ -70,7 +68,7 @@ int dl_iterate_phdr(int (*callback) (struct dl_phdr_info* info, size_t size, voi /* Function pointers to hold the value of the glibc functions */ static void* (*__real_dlopen)(const char* file, int mode) = NULL; -void *dlopen(const char *file, int mode) +void* dlopen(const char* file, int mode) { if (__real_dlopen == NULL) { @@ -122,3 +120,146 @@ int dladdr(const void* addr_arg, Dl_info* info) return result; } + +__thread int _dd_in_pthread_create = 0; + +// this function is called in by the profiler +int dd_IsInPthreadCreate() +{ + return _dd_in_pthread_create; +} + +#ifdef DD_ALPINE + +/* Function pointers to hold the value of the glibc functions */ +static int (*__real___pthread_create)(pthread_t* restrict res, const pthread_attr_t* restrict attrp, void* (*entry)(void*), void* restrict arg) = NULL; + +int __pthread_create(pthread_t* restrict res, const pthread_attr_t* restrict attrp, void* (*entry)(void*), void* restrict arg) +{ + if (__real___pthread_create == NULL) + { + __real___pthread_create = dlsym(RTLD_NEXT, "__pthread_create"); + } + + _dd_in_pthread_create = 1; + // call the real __pthread_create (libc/musl-libc) + int result = __real___pthread_create(res, attrp, entry, arg); + + _dd_in_pthread_create = 0; + + return result; +} + +/* Function pointers to hold the value of the glibc functions */ +static int (*__real_pthread_attr_init)(pthread_attr_t* a) = NULL; + +int pthread_attr_init(pthread_attr_t* a) +{ + if (__real_pthread_attr_init == NULL) + { + __real_pthread_attr_init = dlsym(RTLD_NEXT, "pthread_attr_init"); + } + + sigset_t oldOne; + sigset_t newOne; + + // initialize the set to all signals + sigfillset(&newOne); + + // prevent any signals from interrupting the execution of the real dladdr + pthread_sigmask(SIG_SETMASK, &newOne, &oldOne); + + // call the real pthread_attr_init (libc/musl-libc) + int result = __real_pthread_attr_init(a); + + // restore the previous state for signals + pthread_sigmask(SIG_SETMASK, &oldOne, NULL); + + return result; +} + +/* Function pointers to hold the value of the glibc functions */ +static int (*__real_pthread_getattr_default_np)(pthread_attr_t* attrp) = NULL; + +int pthread_getattr_default_np(pthread_attr_t* a) +{ + if (__real_pthread_getattr_default_np == NULL) + { + __real_pthread_getattr_default_np = dlsym(RTLD_NEXT, "pthread_getattr_default_np"); + } + + sigset_t oldOne; + sigset_t newOne; + + // initialize the set to all signals + sigfillset(&newOne); + + // prevent any signals from interrupting the execution of the real dladdr + pthread_sigmask(SIG_SETMASK, &newOne, &oldOne); + + // call the real pthread_getattr_default_np (libc/musl-libc) + int result = __real_pthread_getattr_default_np(a); + + // restore the previous state for signals + pthread_sigmask(SIG_SETMASK, &oldOne, NULL); + + return result; +} + +/* Function pointers to hold the value of the glibc functions */ +static int (*__real_pthread_setattr_default_np)(const pthread_attr_t* attrp) = NULL; + +int pthread_setattr_default_np(const pthread_attr_t* a) +{ + if (__real_pthread_setattr_default_np == NULL) + { + __real_pthread_setattr_default_np = dlsym(RTLD_NEXT, "pthread_setattr_default_np"); + } + + sigset_t oldOne; + sigset_t newOne; + + // initialize the set to all signals + sigfillset(&newOne); + + // prevent any signals from interrupting the execution of the real dladdr + pthread_sigmask(SIG_SETMASK, &newOne, &oldOne); + + // call the real pthread_setattr_default_np (libc/musl-libc) + int result = __real_pthread_setattr_default_np(a); + + // restore the previous state for signals + pthread_sigmask(SIG_SETMASK, &oldOne, NULL); + + return result; +} + +/* Function pointers to hold the value of the glibc functions */ +static int (*__real_fork)() = NULL; + +pid_t fork() +{ + if (__real_fork == NULL) + { + __real_fork = dlsym(RTLD_NEXT, "fork"); + } + + sigset_t oldOne; + sigset_t newOne; + + // initialize the set to all signals + sigfillset(&newOne); + + // prevent any signals from interrupting the execution of the real dladdr + pthread_sigmask(SIG_SETMASK, &newOne, &oldOne); + + // call the real fork (libc/musl-libc) + pid_t result = __real_fork(); + + // restore the previous state for signals + pthread_sigmask(SIG_SETMASK, &oldOne, NULL); + + return result; +} + +#endif diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/CMakeLists.txt b/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/CMakeLists.txt index 539e6a90c6fc..210aa1e5ceb5 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/CMakeLists.txt +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/CMakeLists.txt @@ -19,6 +19,10 @@ add_compile_options(-std=c++17 -fPIC -fms-extensions) add_compile_options(-DPAL_STDCPP_COMPAT -DPLATFORM_UNIX -DUNICODE) add_compile_options(-Wno-invalid-noreturn -Wno-macro-redefined) +if (IS_ALPINE) + add_compile_options(-DDD_ALPINE) +endif() + if (RUN_ASAN) add_compile_options(-g -fsanitize=address -fno-omit-frame-pointer) endif() diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/LinuxStackFramesCollector.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/LinuxStackFramesCollector.cpp index bef616198330..ff994b03ae19 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/LinuxStackFramesCollector.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/LinuxStackFramesCollector.cpp @@ -155,8 +155,20 @@ void LinuxStackFramesCollector::NotifyStackWalkCompleted(std::int32_t resultErro _stackWalkInProgressWaiter.notify_one(); } +// This symbol is defined in the Datadog.Linux.ApiWrapper to handle the special case of __pthread_create +// For __pthread_create we cannot block the signals while calling the *real* pthread_create and restore it after, +// because the newly created thread will inherit the signal mask from its parent (which will be "block all signals"). +// So to prevent it, dd_IsInPthreadCreate will indicate if the current callstack is executing __pthread_create. +// If it's the case, we just bail, otherwise we stackwalk the current thread. +extern "C" int dd_IsInPthreadCreate() __attribute__((weak)); + std::int32_t LinuxStackFramesCollector::CollectCallStackCurrentThread(void* ctx) { + if (dd_IsInPthreadCreate != nullptr && dd_IsInPthreadCreate() == 1) + { + return E_ABORT; + } + try { std::int32_t resultErrorCode; diff --git a/profiler/test/Datadog.Profiler.Native.Tests/LinuxStackFramesCollectorTest.cpp b/profiler/test/Datadog.Profiler.Native.Tests/LinuxStackFramesCollectorTest.cpp index eaf8aaa956b2..60ef15a828d0 100644 --- a/profiler/test/Datadog.Profiler.Native.Tests/LinuxStackFramesCollectorTest.cpp +++ b/profiler/test/Datadog.Profiler.Native.Tests/LinuxStackFramesCollectorTest.cpp @@ -25,6 +25,14 @@ using namespace std::literals; +// This global variable and function are use defined/declared for the test only +// In production, those symbols will be defined in the Wrapper library +int dd_IsPthreadCreateCall = 0; +extern "C" int dd_IsInPthreadCreate() +{ + return dd_IsPthreadCreateCall; +} + #define ASSERT_DURATION_LE(secs, stmt) \ { \ std::promise completed; \ @@ -142,6 +150,7 @@ class LinuxStackFramesCollectorFixture : public ::testing::Test _processId = OpSysTools::GetProcId(); SignalHandlerForTest::_instance = std::make_unique(); + dd_IsPthreadCreateCall = 0; } void TearDown() override @@ -153,6 +162,7 @@ class LinuxStackFramesCollectorFixture : public ::testing::Test SignalHandlerForTest::_instance.reset(); sigaction(SIGUSR1, &_oldAction, nullptr); + dd_IsPthreadCreateCall = 0; } void StopTest() @@ -164,6 +174,11 @@ class LinuxStackFramesCollectorFixture : public ::testing::Test _stopWorker = true; } + void SimulateInPthreadCreate() + { + dd_IsPthreadCreateCall = 1; + } + pid_t GetWorkerThreadId() { return _workerThread->GetThreadId(); @@ -292,6 +307,24 @@ TEST_F(LinuxStackFramesCollectorFixture, CheckSamplingThreadCollectCallStack) ValidateCallstack(ips); } +TEST_F(LinuxStackFramesCollectorFixture, CheckCollectionAbortIfInPthreadCreateCall) +{ + SimulateInPthreadCreate(); + + auto* signalManager = ProfilerSignalManager::Get(); + auto collector = LinuxStackFramesCollector(signalManager); + + auto threadInfo = ManagedThreadInfo((ThreadID)0); + threadInfo.SetOsInfo((DWORD)GetWorkerThreadId(), (HANDLE)0); + + std::uint32_t hr; + StackSnapshotResultBuffer* buffer; + + ASSERT_DURATION_LE(100ms, buffer = collector.CollectStackSample(&threadInfo, &hr)); + EXPECT_EQ(hr, E_FAIL); + EXPECT_EQ(buffer->GetFramesCount(), 0); +} + TEST_F(LinuxStackFramesCollectorFixture, MustNotCollectIfUnknownThreadId) { auto* signalManager = ProfilerSignalManager::Get();