diff --git a/common/cacheline.h b/common/cacheline.h index dd54c64f99..2212516b1f 100644 --- a/common/cacheline.h +++ b/common/cacheline.h @@ -7,7 +7,11 @@ #include +#if defined(__ARM_ARCH_7A__) || defined(__aarch64__) +#define KDB_CACHELINE_SIZE 128 +#else #define KDB_CACHELINE_SIZE 64 +#endif // __ARM_ARCH_7A__ || __aarch64__ #define KDB_CACHELINE_ALIGNED __attribute__((aligned(KDB_CACHELINE_SIZE))) #endif // KDB_COMMON_CACHELINE_H diff --git a/compiler/scheduler/scheduler-base.cpp b/compiler/scheduler/scheduler-base.cpp index 1ab866cba5..ecec42fb21 100644 --- a/compiler/scheduler/scheduler-base.cpp +++ b/compiler/scheduler/scheduler-base.cpp @@ -6,7 +6,7 @@ #include -volatile int tasks_before_sync_node; +std::atomic tasks_before_sync_node; static SchedulerBase *scheduler; diff --git a/compiler/scheduler/scheduler-base.h b/compiler/scheduler/scheduler-base.h index f5d7c3fbc9..875b4a15b2 100644 --- a/compiler/scheduler/scheduler-base.h +++ b/compiler/scheduler/scheduler-base.h @@ -4,6 +4,8 @@ #pragma once +#include + class Node; class Task; @@ -22,7 +24,7 @@ SchedulerBase *get_scheduler(); void set_scheduler(SchedulerBase *new_scheduler); void unset_scheduler(SchedulerBase *old_scheduler); -extern volatile int tasks_before_sync_node; +extern std::atomic tasks_before_sync_node; inline void register_async_task(Task *task) { get_scheduler()->add_task(task); diff --git a/compiler/scheduler/scheduler.cpp b/compiler/scheduler/scheduler.cpp index 633db5dea0..59cc0eb7ab 100644 --- a/compiler/scheduler/scheduler.cpp +++ b/compiler/scheduler/scheduler.cpp @@ -67,7 +67,7 @@ void Scheduler::execute() { } while (true) { - if (tasks_before_sync_node > 0) { + if (tasks_before_sync_node.load(std::memory_order_acquire) > 0) { usleep(250); continue; } @@ -101,7 +101,7 @@ bool Scheduler::thread_process_node(Node *node) { } task->execute(); delete task; - __sync_fetch_and_sub(&tasks_before_sync_node, 1); + tasks_before_sync_node.fetch_sub(1, std::memory_order_release); return true; } diff --git a/compiler/stage.cpp b/compiler/stage.cpp index 3d47150271..d3094aca35 100644 --- a/compiler/stage.cpp +++ b/compiler/stage.cpp @@ -2,6 +2,8 @@ // Copyright (c) 2020 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt +#include + #include "compiler/stage.h" #include "common/termformat/termformat.h" @@ -31,7 +33,7 @@ const char *get_assert_level_desc(AssertLevelT assert_level) { } } -volatile int ce_locker; +std::atomic ce_locker; namespace { FILE *warning_file{nullptr}; @@ -44,7 +46,7 @@ void stage::set_warning_file(FILE *file) noexcept { void on_compilation_error(const char *description __attribute__((unused)), const char *file_name, int line_number, const char *full_description, AssertLevelT assert_level) { - AutoLocker locker(&ce_locker); + AutoLocker *> locker(&ce_locker); FILE *file = stdout; if (assert_level == WRN_ASSERT_LEVEL && warning_file) { file = warning_file; diff --git a/compiler/threading/data-stream.h b/compiler/threading/data-stream.h index 03e285c184..8023ea9a2d 100644 --- a/compiler/threading/data-stream.h +++ b/compiler/threading/data-stream.h @@ -38,7 +38,7 @@ class DataStream { void operator<<(DataType input) { if (!is_sink_mode_) { - __sync_fetch_and_add(&tasks_before_sync_node, 1); + tasks_before_sync_node.fetch_add(1, std::memory_order_release); } std::lock_guard lock{mutex_}; queue_.push_front(std::move(input)); @@ -60,7 +60,6 @@ class DataStream { const bool is_sink_mode_; }; - struct EmptyStream { template using NthDataType = EmptyStream; diff --git a/compiler/threading/hash-table.h b/compiler/threading/hash-table.h index 4fa9189d4b..0c6d054505 100644 --- a/compiler/threading/hash-table.h +++ b/compiler/threading/hash-table.h @@ -13,7 +13,7 @@ template class TSHashTable { public: struct HTNode : Lockable { - unsigned long long hash; + std::atomic hash; T data; HTNode() : @@ -24,7 +24,8 @@ class TSHashTable { private: HTNode *nodes; - int used_size; + std::atomic used_size; + public: TSHashTable() : nodes(new HTNode[N]), @@ -34,14 +35,15 @@ class TSHashTable { HTNode *at(unsigned long long hash) { int i = (unsigned)hash % (unsigned)N; while (true) { - while (nodes[i].hash != 0 && nodes[i].hash != hash) { + while (nodes[i].hash.load(std::memory_order_acquire) != 0 && nodes[i].hash.load(std::memory_order_relaxed) != hash) { i++; if (i == N) { i = 0; } } - if (nodes[i].hash == 0 && !__sync_bool_compare_and_swap(&nodes[i].hash, 0, hash)) { - int id = __sync_fetch_and_add(&used_size, 1); + unsigned long long expected = 0; + if (nodes[i].hash.load(std::memory_order_acquire) == 0 && !nodes[i].hash.compare_exchange_strong(expected, hash, std::memory_order_acq_rel)) { + int id = used_size.fetch_add(1, std::memory_order_relaxed); assert(id * 2 < N); continue; } @@ -52,20 +54,20 @@ class TSHashTable { const T *find(unsigned long long hash) { int i = (unsigned)hash % (unsigned)N; - while (nodes[i].hash != 0 && nodes[i].hash != hash) { + while (nodes[i].hash.load(std::memory_order_acquire) != 0 && nodes[i].hash.load(std::memory_order_relaxed) != hash) { i++; if (i == N) { i = 0; } } - return nodes[i].hash == hash ? &nodes[i].data : nullptr; + return nodes[i].hash.load(std::memory_order_acquire) == hash ? &nodes[i].data : nullptr; } std::vector get_all() { std::vector res; for (int i = 0; i < N; i++) { - if (nodes[i].hash != 0) { + if (nodes[i].hash.load(std::memory_order_acquire) != 0) { res.push_back(nodes[i].data); } } @@ -76,7 +78,7 @@ class TSHashTable { std::vector get_all_if(const CondF &callbackF) { std::vector res; for (int i = 0; i < N; i++) { - if (nodes[i].hash != 0 && callbackF(nodes[i].data)) { + if (nodes[i].hash.load(std::memory_order_acquire) != 0 && callbackF(nodes[i].data)) { res.push_back(nodes[i].data); } } diff --git a/compiler/threading/locks.h b/compiler/threading/locks.h index e90fb041fe..c831920e87 100644 --- a/compiler/threading/locks.h +++ b/compiler/threading/locks.h @@ -4,9 +4,14 @@ #pragma once +#include #include #include +#include "common/cacheline.h" + +enum { LOCKED = 1, UNLOCKED = 0 }; + template bool try_lock(T); @@ -20,28 +25,43 @@ void unlock(T locker) { locker->unlock(); } -inline bool try_lock(volatile int *locker) { - return __sync_lock_test_and_set(locker, 1) == 0; +inline bool try_lock(std::atomic *locker) { + int expected = UNLOCKED; + return locker->compare_exchange_weak(expected, LOCKED, std::memory_order_acq_rel); } -inline void lock(volatile int *locker) { +inline void lock(std::atomic *locker) { while (!try_lock(locker)) { usleep(250); } } -inline void unlock(volatile int *locker) { - assert(*locker == 1); - __sync_lock_release(locker); +inline void unlock(std::atomic *locker) { + assert(locker->load(std::memory_order_relaxed) == LOCKED); + locker->store(UNLOCKED, std::memory_order_release); } -class Lockable { +class KDB_CACHELINE_ALIGNED Lockable { private: - volatile int x; + std::atomic x; + public: Lockable() : x(0) {} + Lockable(const Lockable &other) noexcept : + x{other.x.load(std::memory_order_relaxed)} {} + Lockable(Lockable &&other) noexcept : + x{other.x.load(std::memory_order_relaxed)} {} + Lockable &operator=(const Lockable &other) noexcept { + x = other.x.load(std::memory_order_relaxed); + return *this; + } + Lockable &operator=(Lockable &&other) noexcept { + x = other.x.load(std::memory_order_relaxed); + return *this; + } + virtual ~Lockable() = default; void lock() { diff --git a/compiler/threading/thread-id.cpp b/compiler/threading/thread-id.cpp index a7d30b07db..f03235e294 100644 --- a/compiler/threading/thread-id.cpp +++ b/compiler/threading/thread-id.cpp @@ -4,7 +4,7 @@ #include "compiler/threading/thread-id.h" -static __thread int bicycle_thread_id; +static thread_local int bicycle_thread_id; int get_thread_id() { return bicycle_thread_id; diff --git a/compiler/threading/tls.h b/compiler/threading/tls.h index 0b0f2a83f8..2be42a6e91 100644 --- a/compiler/threading/tls.h +++ b/compiler/threading/tls.h @@ -8,6 +8,8 @@ #include #include +#include "common/cacheline.h" + #include "compiler/threading/locks.h" #include "compiler/threading/thread-id.h" @@ -23,10 +25,8 @@ inline uint32_t get_default_threads_count() noexcept { template struct TLS { private: - struct TLSRaw { + struct KDB_CACHELINE_ALIGNED TLSRaw { T data{}; - volatile int locker = 0; - char dummy[4096]; }; // The thread with thread_id = 0 is the main thread in which the scheduler's master code is executed. @@ -49,7 +49,6 @@ struct TLS { arr() { } - T &get() { return get_raw()->data; } @@ -69,19 +68,6 @@ struct TLS { int size() { return MAX_THREADS_COUNT + 1; } - - T *lock_get() { - TLSRaw *raw = get_raw(); - bool ok = try_lock(&raw->locker); - assert(ok); - return &raw->data; - } - - void unlock_get(T *ptr) { - TLSRaw *raw = get_raw(); - assert(&raw->data == ptr); - unlock(&raw->locker); - } }; #pragma GCC diagnostic pop diff --git a/runtime/critical_section.cpp b/runtime/critical_section.cpp index a7097a1503..7c93a12e0c 100644 --- a/runtime/critical_section.cpp +++ b/runtime/critical_section.cpp @@ -16,8 +16,8 @@ void check_stack_overflow() { namespace dl { -volatile int in_critical_section = 0; -volatile long long pending_signals = 0; +volatile int in_critical_section; +volatile long long pending_signals; void enter_critical_section() noexcept { check_stack_overflow();