From d486320ddd5f7f4c877d2bec736d76e5a329092d Mon Sep 17 00:00:00 2001
From: Valery Matskevich <generalretcher@gmail.com>
Date: Wed, 22 May 2024 00:00:22 +0300
Subject: [PATCH 1/5] i fixed everything yet didn't even try to compile it

Signed-off-by: Valery Matskevich <generalretcher@gmail.com>
---
 common/cacheline.h                    |  4 ++++
 compiler/scheduler/scheduler-base.cpp |  2 +-
 compiler/scheduler/scheduler-base.h   |  4 +++-
 compiler/scheduler/scheduler.cpp      |  4 ++--
 compiler/stage.cpp                    |  6 ++++--
 compiler/threading/data-stream.h      |  2 +-
 compiler/threading/hash-table.h       | 22 +++++++++++++---------
 compiler/threading/locks.h            | 22 ++++++++++++++--------
 compiler/threading/thread-id.cpp      |  2 +-
 compiler/threading/tls.h              |  8 +++++---
 runtime/critical_section.cpp          | 17 +++++++++--------
 runtime/critical_section.h            |  3 ++-
 12 files changed, 59 insertions(+), 37 deletions(-)
diff --git a/common/cacheline.h b/common/cacheline.h
index dd54c64f99..2212516b1f 100644
--- a/common/cacheline.h
+++ b/common/cacheline.h
@@ -7,7 +7,11 @@
 
 #include <stdint.h>
 
+#if defined(__ARM_ARCH_7A__) || defined(__aarch64__)
+#define KDB_CACHELINE_SIZE 128
+#else
 #define KDB_CACHELINE_SIZE 64
+#endif // __ARM_ARCH_7A__ || __aarch64__
 #define KDB_CACHELINE_ALIGNED __attribute__((aligned(KDB_CACHELINE_SIZE)))
 
 #endif // KDB_COMMON_CACHELINE_H
diff --git a/compiler/scheduler/scheduler-base.cpp b/compiler/scheduler/scheduler-base.cpp
index 1ab866cba5..ecec42fb21 100644
--- a/compiler/scheduler/scheduler-base.cpp
+++ b/compiler/scheduler/scheduler-base.cpp
@@ -6,7 +6,7 @@
 
 #include <cassert>
 
-volatile int tasks_before_sync_node;
+std::atomic<int> tasks_before_sync_node;
 
 static SchedulerBase *scheduler;
 
diff --git a/compiler/scheduler/scheduler-base.h b/compiler/scheduler/scheduler-base.h
index f5d7c3fbc9..875b4a15b2 100644
--- a/compiler/scheduler/scheduler-base.h
+++ b/compiler/scheduler/scheduler-base.h
@@ -4,6 +4,8 @@
 
 #pragma once
 
+#include <atomic>
+
 class Node;
 
 class Task;
@@ -22,7 +24,7 @@ SchedulerBase *get_scheduler();
 void set_scheduler(SchedulerBase *new_scheduler);
 void unset_scheduler(SchedulerBase *old_scheduler);
 
-extern volatile int tasks_before_sync_node;
+extern std::atomic<int> tasks_before_sync_node;
 
 inline void register_async_task(Task *task) {
   get_scheduler()->add_task(task);
diff --git a/compiler/scheduler/scheduler.cpp b/compiler/scheduler/scheduler.cpp
index 633db5dea0..59cc0eb7ab 100644
--- a/compiler/scheduler/scheduler.cpp
+++ b/compiler/scheduler/scheduler.cpp
@@ -67,7 +67,7 @@ void Scheduler::execute() {
   }
 
   while (true) {
-    if (tasks_before_sync_node > 0) {
+    if (tasks_before_sync_node.load(std::memory_order_acquire) > 0) {
       usleep(250);
       continue;
     }
@@ -101,7 +101,7 @@ bool Scheduler::thread_process_node(Node *node) {
   }
   task->execute();
   delete task;
-  __sync_fetch_and_sub(&tasks_before_sync_node, 1);
+  tasks_before_sync_node.fetch_sub(1, std::memory_order_release);
   return true;
 }
 
diff --git a/compiler/stage.cpp b/compiler/stage.cpp
index 3d47150271..d3094aca35 100644
--- a/compiler/stage.cpp
+++ b/compiler/stage.cpp
@@ -2,6 +2,8 @@
 // Copyright (c) 2020 LLC «V Kontakte»
 // Distributed under the GPL v3 License, see LICENSE.notice.txt
 
+#include <atomic>
+
 #include "compiler/stage.h"
 
 #include "common/termformat/termformat.h"
@@ -31,7 +33,7 @@ const char *get_assert_level_desc(AssertLevelT assert_level) {
   }
 }
 
-volatile int ce_locker;
+std::atomic<int> ce_locker;
 
 namespace {
 FILE *warning_file{nullptr};
@@ -44,7 +46,7 @@ void stage::set_warning_file(FILE *file) noexcept {
 void on_compilation_error(const char *description __attribute__((unused)), const char *file_name, int line_number,
                           const char *full_description, AssertLevelT assert_level) {
 
-  AutoLocker<volatile int *> locker(&ce_locker);
+  AutoLocker<std::atomic<int> *> locker(&ce_locker);
   FILE *file = stdout;
   if (assert_level == WRN_ASSERT_LEVEL && warning_file) {
     file = warning_file;
diff --git a/compiler/threading/data-stream.h b/compiler/threading/data-stream.h
index 03e285c184..2d0e8fc237 100644
--- a/compiler/threading/data-stream.h
+++ b/compiler/threading/data-stream.h
@@ -38,7 +38,7 @@ class DataStream {
 
   void operator<<(DataType input) {
     if (!is_sink_mode_) {
-      __sync_fetch_and_add(&tasks_before_sync_node, 1);
+      tasks_before_sync_node.fetch_add(1, std::memory_order_release);
     }
     std::lock_guard<std::mutex> lock{mutex_};
     queue_.push_front(std::move(input));
diff --git a/compiler/threading/hash-table.h b/compiler/threading/hash-table.h
index 4fa9189d4b..599a157eeb 100644
--- a/compiler/threading/hash-table.h
+++ b/compiler/threading/hash-table.h
@@ -13,7 +13,7 @@ template<class T, int N = 1000000>
 class TSHashTable {
 public:
   struct HTNode : Lockable {
-    unsigned long long hash;
+    std::atomic<unsigned long long> hash;
     T data;
 
     HTNode() :
@@ -24,7 +24,7 @@ class TSHashTable {
 
 private:
   HTNode *nodes;
-  int used_size;
+  std::atomic<int> used_size;
 public:
   TSHashTable() :
     nodes(new HTNode[N]),
@@ -34,14 +34,17 @@ class TSHashTable {
   HTNode *at(unsigned long long hash) {
     int i = (unsigned)hash % (unsigned)N;
     while (true) {
-      while (nodes[i].hash != 0 && nodes[i].hash != hash) {
+      while (nodes[i].hash.load(std::memory_order_acquire) != 0 
+        && nodes[i].hash.load(std::memory_order_relaxed) != hash) {
         i++;
         if (i == N) {
           i = 0;
         }
       }
-      if (nodes[i].hash == 0 && !__sync_bool_compare_and_swap(&nodes[i].hash, 0, hash)) {
-        int id = __sync_fetch_and_add(&used_size, 1);
+      unsigned long long expected = 0;
+      if (nodes[i].hash.load(std::memory_order_acquire) == 0 
+        && nodes[i].hash.compare_exchange_weak(expected, hash, std::memory_order_acq_rel)) {
+        int id = used_size.fetch_add(1, std::memory_order_release);
         assert(id * 2 < N);
         continue;
       }
@@ -52,20 +55,21 @@ class TSHashTable {
 
   const T *find(unsigned long long hash) {
     int i = (unsigned)hash % (unsigned)N;
-    while (nodes[i].hash != 0 && nodes[i].hash != hash) {
+    while (nodes[i].hash.load(std::memory_order_acquire) != 0 
+      && nodes[i].hash.load(std::memory_order_relaxed) != hash) {
       i++;
       if (i == N) {
         i = 0;
       }
     }
 
-    return nodes[i].hash == hash ? &nodes[i].data : nullptr;
+    return nodes[i].hash.load(std::memory_order_acquire) == hash ? &nodes[i].data : nullptr;
   }
 
   std::vector<T> get_all() {
     std::vector<T> res;
     for (int i = 0; i < N; i++) {
-      if (nodes[i].hash != 0) {
+      if (nodes[i].hash.load(std::memory_order_acquire) != 0) {
         res.push_back(nodes[i].data);
       }
     }
@@ -76,7 +80,7 @@ class TSHashTable {
   std::vector<T> get_all_if(const CondF &callbackF) {
     std::vector<T> res;
     for (int i = 0; i < N; i++) {
-      if (nodes[i].hash != 0 && callbackF(nodes[i].data)) {
+      if (nodes[i].hash.load(std::memory_order_acquire) != 0 && callbackF(nodes[i].data)) {
         res.push_back(nodes[i].data);
       }
     }
diff --git a/compiler/threading/locks.h b/compiler/threading/locks.h
index e90fb041fe..4cc20e27d3 100644
--- a/compiler/threading/locks.h
+++ b/compiler/threading/locks.h
@@ -4,9 +4,14 @@
 
 #pragma once
 
+#include <atomic>
 #include <cassert>
 #include <unistd.h>
 
+#include "common/cacheline.h"
+
+enum { LOCKED = 1, UNLOCKED = 0 };
+
 template<class T>
 bool try_lock(T);
 
@@ -20,24 +25,25 @@ void unlock(T locker) {
   locker->unlock();
 }
 
-inline bool try_lock(volatile int *locker) {
-  return __sync_lock_test_and_set(locker, 1) == 0;
+inline bool try_lock(std::atomic<int> *locker) {
+  int expected = 0;
+  return locker->compare_exchange_weak(expected, LOCKED, std::memory_order_acq_rel);
 }
 
-inline void lock(volatile int *locker) {
+inline void lock(std::atomic<int> *locker) {
   while (!try_lock(locker)) {
     usleep(250);
   }
 }
 
-inline void unlock(volatile int *locker) {
-  assert(*locker == 1);
-  __sync_lock_release(locker);
+inline void unlock(std::atomic<int> *locker) {
+  assert(locker->load(std::memory_order_relaxed) == LOCKED);
+  locker->store(UNLOCKED, std::memory_order_release);
 }
 
-class Lockable {
+class KDB_CACHELINE_ALIGNED Lockable {
 private:
-  volatile int x;
+  std::atomic<int> x;
 public:
   Lockable() :
     x(0) {}
diff --git a/compiler/threading/thread-id.cpp b/compiler/threading/thread-id.cpp
index a7d30b07db..f03235e294 100644
--- a/compiler/threading/thread-id.cpp
+++ b/compiler/threading/thread-id.cpp
@@ -4,7 +4,7 @@
 
 #include "compiler/threading/thread-id.h"
 
-static __thread int bicycle_thread_id;
+static thread_local int bicycle_thread_id;
 
 int get_thread_id() {
   return bicycle_thread_id;
diff --git a/compiler/threading/tls.h b/compiler/threading/tls.h
index 0b0f2a83f8..7420804825 100644
--- a/compiler/threading/tls.h
+++ b/compiler/threading/tls.h
@@ -8,6 +8,8 @@
 #include <cassert>
 #include <thread>
 
+#include "common/cacheline.h"
+
 #include "compiler/threading/locks.h"
 #include "compiler/threading/thread-id.h"
 
@@ -23,10 +25,10 @@ inline uint32_t get_default_threads_count() noexcept {
 template<class T>
 struct TLS {
 private:
-  struct TLSRaw {
+  struct KDB_CACHELINE_ALIGNED TLSRaw {
     T data{};
-    volatile int locker = 0;
-    char dummy[4096];
+    std::atomic<int> locker = UNLOCKED;
+    char dummy[KDB_CACHELINE_SIZE];
   };
 
   // The thread with thread_id = 0 is the main thread in which the scheduler's master code is executed.
diff --git a/runtime/critical_section.cpp b/runtime/critical_section.cpp
index a7097a1503..b857ba1d92 100644
--- a/runtime/critical_section.cpp
+++ b/runtime/critical_section.cpp
@@ -4,6 +4,7 @@
 
 #include "runtime/critical_section.h"
 
+#include <atomic>
 #include <signal.h>
 
 #include "runtime/php_assert.h"
@@ -16,19 +17,19 @@ void check_stack_overflow() {
 
 namespace dl {
 
-volatile int in_critical_section = 0;
-volatile long long pending_signals = 0;
+std::atomic<int> in_critical_section;
+volatile long long pending_signals;
 
 void enter_critical_section() noexcept {
   check_stack_overflow();
-  php_assert (in_critical_section >= 0);
-  in_critical_section = in_critical_section + 1;
+  php_assert (in_critical_section.load(std::memory_order_relaxed) >= 0);
+  in_critical_section.fetch_add(1, std::memory_order_release);
 }
 
 void leave_critical_section() noexcept {
-  in_critical_section = in_critical_section - 1;
-  php_assert (in_critical_section >= 0);
-  if (pending_signals && in_critical_section <= 0) {
+  in_critical_section.fetch_sub(1, std::memory_order_release);
+  php_assert (in_critical_section.load(std::memory_order_relaxed) >= 0);
+  if (pending_signals && in_critical_section.load(std::memory_order_acquire) <= 0) {
     for (int i = 0; i < sizeof(pending_signals) * 8; i++) {
       if ((pending_signals >> i) & 1) {
         raise(i);
@@ -38,8 +39,8 @@ void leave_critical_section() noexcept {
 }
 
 void init_critical_section() noexcept {
-  in_critical_section = 0;
   pending_signals = 0;
+  in_critical_section.store(0, std::memory_order_release);
 }
 
 } // namespace dl
diff --git a/runtime/critical_section.h b/runtime/critical_section.h
index 70a4eec178..101b375d51 100644
--- a/runtime/critical_section.h
+++ b/runtime/critical_section.h
@@ -4,13 +4,14 @@
 
 #pragma once
 
+#include <atomic>
 #include <utility>
 
 #include "common/mixin/not_copyable.h"
 
 namespace dl {
 
-extern volatile int in_critical_section;
+extern std::atomic<int> in_critical_section;
 extern volatile long long pending_signals;
 
 void enter_critical_section() noexcept;

From 7ad0ce733d85cc6bba9220699df02ce0beaf88d6 Mon Sep 17 00:00:00 2001
From: Valery Matskevich <generalretcher@gmail.com>
Date: Wed, 22 May 2024 00:46:09 +0300
Subject: [PATCH 2/5] make lockable copyable

Signed-off-by: Valery Matskevich <generalretcher@gmail.com>
---
 compiler/threading/hash-table.h |  2 +-
 compiler/threading/locks.h      | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/compiler/threading/hash-table.h b/compiler/threading/hash-table.h
index 599a157eeb..f0042b54bd 100644
--- a/compiler/threading/hash-table.h
+++ b/compiler/threading/hash-table.h
@@ -43,7 +43,7 @@ class TSHashTable {
       }
       unsigned long long expected = 0;
       if (nodes[i].hash.load(std::memory_order_acquire) == 0 
-        && nodes[i].hash.compare_exchange_weak(expected, hash, std::memory_order_acq_rel)) {
+        && !nodes[i].hash.compare_exchange_strong(expected, hash, std::memory_order_acq_rel)) {
         int id = used_size.fetch_add(1, std::memory_order_release);
         assert(id * 2 < N);
         continue;
diff --git a/compiler/threading/locks.h b/compiler/threading/locks.h
index 4cc20e27d3..5bad55ce2d 100644
--- a/compiler/threading/locks.h
+++ b/compiler/threading/locks.h
@@ -48,6 +48,19 @@ class KDB_CACHELINE_ALIGNED Lockable {
   Lockable() :
     x(0) {}
 
+  Lockable(const Lockable& other) noexcept : 
+    x{other.x.load(std::memory_order_relaxed)} {}
+  Lockable(Lockable&& other) noexcept : 
+    x{other.x.load(std::memory_order_relaxed)} {}
+  Lockable& operator=(const Lockable& other) noexcept {
+    x = other.x.load(std::memory_order_relaxed);
+    return *this;
+  }
+  Lockable& operator=(Lockable&& other) noexcept {
+    x = other.x.load(std::memory_order_relaxed);
+    return *this;
+  }
+
   virtual ~Lockable() = default;
 
   void lock() {

From 5076ceab240577abcb1ecb95ffee98e499687dc5 Mon Sep 17 00:00:00 2001
From: Valery Matskevich <generalretcher@gmail.com>
Date: Wed, 22 May 2024 00:50:27 +0300
Subject: [PATCH 3/5] clang-formatted code

Signed-off-by: Valery Matskevich <generalretcher@gmail.com>
---
 compiler/scheduler/scheduler-base.cpp |  6 +++---
 compiler/scheduler/scheduler.cpp      | 11 +++++------
 compiler/stage.cpp                    | 22 +++++++++++-----------
 compiler/threading/data-stream.h      |  9 +++------
 compiler/threading/hash-table.h       | 24 ++++++++++--------------
 compiler/threading/locks.h            | 24 +++++++++++++-----------
 compiler/threading/tls.h              |  6 ++----
 runtime/critical_section.cpp          |  8 ++++----
 runtime/critical_section.h            | 20 ++++++++++++++------
 9 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/compiler/scheduler/scheduler-base.cpp b/compiler/scheduler/scheduler-base.cpp
index ecec42fb21..c5b1f7e468 100644
--- a/compiler/scheduler/scheduler-base.cpp
+++ b/compiler/scheduler/scheduler-base.cpp
@@ -11,17 +11,17 @@ std::atomic<int> tasks_before_sync_node;
 static SchedulerBase *scheduler;
 
 void set_scheduler(SchedulerBase *new_scheduler) {
-  assert (scheduler == nullptr);
+  assert(scheduler == nullptr);
   scheduler = new_scheduler;
 }
 
 void unset_scheduler(SchedulerBase *old_scheduler) {
-  assert (scheduler == old_scheduler);
+  assert(scheduler == old_scheduler);
   scheduler = nullptr;
 }
 
 SchedulerBase *get_scheduler() {
-  assert (scheduler != nullptr);
+  assert(scheduler != nullptr);
   return scheduler;
 }
 
diff --git a/compiler/scheduler/scheduler.cpp b/compiler/scheduler/scheduler.cpp
index 59cc0eb7ab..c02398c228 100644
--- a/compiler/scheduler/scheduler.cpp
+++ b/compiler/scheduler/scheduler.cpp
@@ -21,15 +21,14 @@ class ThreadContext {
   bool run_flag;
 };
 
-
 void *scheduler_thread_execute(void *arg) {
   auto *tls = (ThreadContext *)arg;
   tls->scheduler->thread_execute(tls);
   return nullptr;
 }
 
-Scheduler::Scheduler() :
-  threads_count(-1) {
+Scheduler::Scheduler()
+  : threads_count(-1) {
   task_pull = new TaskPull();
 }
 
@@ -46,7 +45,7 @@ void Scheduler::add_sync_node(Node *node) {
 }
 
 void Scheduler::add_task(Task *task) {
-  assert (task_pull != nullptr);
+  assert(task_pull != nullptr);
   task_pull->add_task(task);
 }
 
@@ -55,7 +54,7 @@ void Scheduler::execute() {
   set_thread_id(0);
   std::vector<ThreadContext> threads(threads_count + 1);
 
-  assert ((int)one_thread_nodes.size() < threads_count);
+  assert((int)one_thread_nodes.size() < threads_count);
   for (int i = 1; i <= threads_count; i++) {
     threads[i].thread_id = i;
     threads[i].scheduler = this;
@@ -90,7 +89,7 @@ void Scheduler::execute() {
 }
 
 void Scheduler::set_threads_count(int new_threads_count) {
-  assert (1 <= new_threads_count && new_threads_count <= MAX_THREADS_COUNT);
+  assert(1 <= new_threads_count && new_threads_count <= MAX_THREADS_COUNT);
   threads_count = new_threads_count;
 }
 
diff --git a/compiler/stage.cpp b/compiler/stage.cpp
index d3094aca35..567d070c5e 100644
--- a/compiler/stage.cpp
+++ b/compiler/stage.cpp
@@ -29,7 +29,7 @@ const char *get_assert_level_desc(AssertLevelT assert_level) {
     case FATAL_ASSERT_LEVEL:
       return "Fatal error";
     default:
-      assert (0);
+      assert(0);
   }
 }
 
@@ -43,8 +43,8 @@ void stage::set_warning_file(FILE *file) noexcept {
   warning_file = file;
 }
 
-void on_compilation_error(const char *description __attribute__((unused)), const char *file_name, int line_number,
-                          const char *full_description, AssertLevelT assert_level) {
+void on_compilation_error(const char *description __attribute__((unused)), const char *file_name, int line_number, const char *full_description,
+                          AssertLevelT assert_level) {
 
   AutoLocker<std::atomic<int> *> locker(&ce_locker);
   FILE *file = stdout;
@@ -62,7 +62,7 @@ void on_compilation_error(const char *description __attribute__((unused)), const
                       "It is probably happened due to incorrect or unsupported PHP input.\n"
                       "But it is still bug in compiler.\n");
 #ifdef __arm64__
-    __builtin_debugtrap();  // for easier debugging kphp_assert / kphp_fail
+    __builtin_debugtrap(); // for easier debugging kphp_assert / kphp_fail
 #endif
     abort();
   }
@@ -75,7 +75,6 @@ void on_compilation_error(const char *description __attribute__((unused)), const
   fflush(file);
 }
 
-
 void Location::set_file(SrcFilePtr new_file) {
   file = new_file;
   function = FunctionPtr();
@@ -94,10 +93,10 @@ void Location::set_line(int new_line) {
   line = new_line;
 }
 
-Location::Location(const SrcFilePtr &file, const FunctionPtr &function, int line) :
-  file(file),
-  function(function),
-  line(line) {}
+Location::Location(const SrcFilePtr &file, const FunctionPtr &function, int line)
+  : file(file)
+  , function(function)
+  , line(line) {}
 
 // return a location in the format: "{file}:{line}  in {function}"
 std::string Location::as_human_readable() const {
@@ -251,8 +250,9 @@ const std::string &stage::get_function_name() {
   return function->name;
 }
 
-bool stage::should_be_colored(FILE *f)  {
-  if (!G) return TermStringFormat::is_terminal(f);
+bool stage::should_be_colored(FILE *f) {
+  if (!G)
+    return TermStringFormat::is_terminal(f);
   switch (G->settings().get_color_settings()) {
     case CompilerSettings::colored:
       return true;
diff --git a/compiler/threading/data-stream.h b/compiler/threading/data-stream.h
index 2d0e8fc237..f4233fcf4d 100644
--- a/compiler/threading/data-stream.h
+++ b/compiler/threading/data-stream.h
@@ -21,10 +21,8 @@ class DataStream {
   template<size_t data_id>
   using NthDataType = DataType;
 
-  explicit DataStream(bool is_sink = false) :
-    is_sink_mode_(is_sink)
-  {
-  }
+  explicit DataStream(bool is_sink = false)
+    : is_sink_mode_(is_sink) {}
 
   bool get(DataType &result) {
     std::lock_guard<std::mutex> lock{mutex_};
@@ -60,13 +58,12 @@ class DataStream {
   const bool is_sink_mode_;
 };
 
-
 struct EmptyStream {
   template<size_t stream_id>
   using NthDataType = EmptyStream;
 };
 
-template<class ...DataTypes>
+template<class... DataTypes>
 class MultipleDataStreams {
 private:
   std::tuple<DataStream<DataTypes> *...> streams_;
diff --git a/compiler/threading/hash-table.h b/compiler/threading/hash-table.h
index f0042b54bd..6d96e2218f 100644
--- a/compiler/threading/hash-table.h
+++ b/compiler/threading/hash-table.h
@@ -16,34 +16,31 @@ class TSHashTable {
     std::atomic<unsigned long long> hash;
     T data;
 
-    HTNode() :
-      hash(0),
-      data() {
-    }
+    HTNode()
+      : hash(0)
+      , data() {}
   };
 
 private:
   HTNode *nodes;
   std::atomic<int> used_size;
+
 public:
-  TSHashTable() :
-    nodes(new HTNode[N]),
-    used_size(0) {
-  }
+  TSHashTable()
+    : nodes(new HTNode[N])
+    , used_size(0) {}
 
   HTNode *at(unsigned long long hash) {
     int i = (unsigned)hash % (unsigned)N;
     while (true) {
-      while (nodes[i].hash.load(std::memory_order_acquire) != 0 
-        && nodes[i].hash.load(std::memory_order_relaxed) != hash) {
+      while (nodes[i].hash.load(std::memory_order_acquire) != 0 && nodes[i].hash.load(std::memory_order_relaxed) != hash) {
         i++;
         if (i == N) {
           i = 0;
         }
       }
       unsigned long long expected = 0;
-      if (nodes[i].hash.load(std::memory_order_acquire) == 0 
-        && !nodes[i].hash.compare_exchange_strong(expected, hash, std::memory_order_acq_rel)) {
+      if (nodes[i].hash.load(std::memory_order_acquire) == 0 && !nodes[i].hash.compare_exchange_strong(expected, hash, std::memory_order_acq_rel)) {
         int id = used_size.fetch_add(1, std::memory_order_release);
         assert(id * 2 < N);
         continue;
@@ -55,8 +52,7 @@ class TSHashTable {
 
   const T *find(unsigned long long hash) {
     int i = (unsigned)hash % (unsigned)N;
-    while (nodes[i].hash.load(std::memory_order_acquire) != 0 
-      && nodes[i].hash.load(std::memory_order_relaxed) != hash) {
+    while (nodes[i].hash.load(std::memory_order_acquire) != 0 && nodes[i].hash.load(std::memory_order_relaxed) != hash) {
       i++;
       if (i == N) {
         i = 0;
diff --git a/compiler/threading/locks.h b/compiler/threading/locks.h
index 5bad55ce2d..5b7718d0e4 100644
--- a/compiler/threading/locks.h
+++ b/compiler/threading/locks.h
@@ -44,19 +44,20 @@ inline void unlock(std::atomic<int> *locker) {
 class KDB_CACHELINE_ALIGNED Lockable {
 private:
   std::atomic<int> x;
+
 public:
-  Lockable() :
-    x(0) {}
-
-  Lockable(const Lockable& other) noexcept : 
-    x{other.x.load(std::memory_order_relaxed)} {}
-  Lockable(Lockable&& other) noexcept : 
-    x{other.x.load(std::memory_order_relaxed)} {}
-  Lockable& operator=(const Lockable& other) noexcept {
+  Lockable()
+    : x(0) {}
+
+  Lockable(const Lockable &other) noexcept
+    : x{other.x.load(std::memory_order_relaxed)} {}
+  Lockable(Lockable &&other) noexcept
+    : x{other.x.load(std::memory_order_relaxed)} {}
+  Lockable &operator=(const Lockable &other) noexcept {
     x = other.x.load(std::memory_order_relaxed);
     return *this;
   }
-  Lockable& operator=(Lockable&& other) noexcept {
+  Lockable &operator=(Lockable &&other) noexcept {
     x = other.x.load(std::memory_order_relaxed);
     return *this;
   }
@@ -76,9 +77,10 @@ template<class DataT>
 class AutoLocker {
 private:
   DataT ptr;
+
 public:
-  explicit AutoLocker(DataT ptr) :
-    ptr(ptr) {
+  explicit AutoLocker(DataT ptr)
+    : ptr(ptr) {
     lock(ptr);
   }
 
diff --git a/compiler/threading/tls.h b/compiler/threading/tls.h
index 7420804825..21557d39b2 100644
--- a/compiler/threading/tls.h
+++ b/compiler/threading/tls.h
@@ -47,10 +47,8 @@ struct TLS {
   }
 
 public:
-  TLS() :
-    arr() {
-  }
-
+  TLS()
+    : arr() {}
 
   T &get() {
     return get_raw()->data;
diff --git a/runtime/critical_section.cpp b/runtime/critical_section.cpp
index b857ba1d92..f10a5f44fa 100644
--- a/runtime/critical_section.cpp
+++ b/runtime/critical_section.cpp
@@ -9,10 +9,10 @@
 
 #include "runtime/php_assert.h"
 
-void check_stack_overflow() __attribute__ ((weak));
+void check_stack_overflow() __attribute__((weak));
 
 void check_stack_overflow() {
-  //pass;
+  // pass;
 }
 
 namespace dl {
@@ -22,13 +22,13 @@ volatile long long pending_signals;
 
 void enter_critical_section() noexcept {
   check_stack_overflow();
-  php_assert (in_critical_section.load(std::memory_order_relaxed) >= 0);
+  php_assert(in_critical_section.load(std::memory_order_relaxed) >= 0);
   in_critical_section.fetch_add(1, std::memory_order_release);
 }
 
 void leave_critical_section() noexcept {
   in_critical_section.fetch_sub(1, std::memory_order_release);
-  php_assert (in_critical_section.load(std::memory_order_relaxed) >= 0);
+  php_assert(in_critical_section.load(std::memory_order_relaxed) >= 0);
   if (pending_signals && in_critical_section.load(std::memory_order_acquire) <= 0) {
     for (int i = 0; i < sizeof(pending_signals) * 8; i++) {
       if ((pending_signals >> i) & 1) {
diff --git a/runtime/critical_section.h b/runtime/critical_section.h
index 101b375d51..816502ec9d 100644
--- a/runtime/critical_section.h
+++ b/runtime/critical_section.h
@@ -20,13 +20,21 @@ void leave_critical_section() noexcept;
 // leaves the critical section until the destructor is executed;
 // it is implied that this object is constructed somewhere inside the critical section
 struct NonCriticalSectionGuard : private vk::not_copyable {
-  NonCriticalSectionGuard() noexcept { leave_critical_section(); }
-  ~NonCriticalSectionGuard() noexcept { enter_critical_section(); }
+  NonCriticalSectionGuard() noexcept {
+    leave_critical_section();
+  }
+  ~NonCriticalSectionGuard() noexcept {
+    enter_critical_section();
+  }
 };
 
 struct CriticalSectionGuard : private vk::not_copyable {
-  CriticalSectionGuard() noexcept { enter_critical_section(); }
-  ~CriticalSectionGuard() noexcept { leave_critical_section(); }
+  CriticalSectionGuard() noexcept {
+    enter_critical_section();
+  }
+  ~CriticalSectionGuard() noexcept {
+    leave_critical_section();
+  }
 };
 
 class CriticalSectionSmartGuard : private vk::not_copyable {
@@ -57,8 +65,8 @@ class CriticalSectionSmartGuard : private vk::not_copyable {
   bool in_critical_section_{false};
 };
 
-template<class F, class ...Args>
-auto critical_section_call(F &&f, Args &&...args) noexcept {
+template<class F, class... Args>
+auto critical_section_call(F &&f, Args &&... args) noexcept {
   CriticalSectionGuard critical_section;
   return f(std::forward<Args>(args)...);
 }

From 39c2ccb3aeae5059f491e1624b2f4970e472ba0e Mon Sep 17 00:00:00 2001
From: Valery Matskevich <generalretcher@gmail.com>
Date: Sat, 25 May 2024 14:39:38 +0300
Subject: [PATCH 4/5] fixes after review

Signed-off-by: Valery Matskevich <generalretcher@gmail.com>
---
 compiler/scheduler/scheduler-base.cpp |  6 +++---
 compiler/scheduler/scheduler.cpp      | 11 ++++++-----
 compiler/stage.cpp                    | 22 +++++++++++-----------
 compiler/threading/data-stream.h      |  8 +++++---
 compiler/threading/hash-table.h       | 16 +++++++++-------
 compiler/threading/locks.h            | 17 ++++++++---------
 compiler/threading/tls.h              | 20 +++-----------------
 runtime/critical_section.cpp          | 18 +++++++++---------
 runtime/critical_section.h            | 22 +++++++---------------
 9 files changed, 61 insertions(+), 79 deletions(-)

diff --git a/compiler/scheduler/scheduler-base.cpp b/compiler/scheduler/scheduler-base.cpp
index c5b1f7e468..ecec42fb21 100644
--- a/compiler/scheduler/scheduler-base.cpp
+++ b/compiler/scheduler/scheduler-base.cpp
@@ -11,17 +11,17 @@ std::atomic<int> tasks_before_sync_node;
 static SchedulerBase *scheduler;
 
 void set_scheduler(SchedulerBase *new_scheduler) {
-  assert(scheduler == nullptr);
+  assert (scheduler == nullptr);
   scheduler = new_scheduler;
 }
 
 void unset_scheduler(SchedulerBase *old_scheduler) {
-  assert(scheduler == old_scheduler);
+  assert (scheduler == old_scheduler);
   scheduler = nullptr;
 }
 
 SchedulerBase *get_scheduler() {
-  assert(scheduler != nullptr);
+  assert (scheduler != nullptr);
   return scheduler;
 }
 
diff --git a/compiler/scheduler/scheduler.cpp b/compiler/scheduler/scheduler.cpp
index c02398c228..59cc0eb7ab 100644
--- a/compiler/scheduler/scheduler.cpp
+++ b/compiler/scheduler/scheduler.cpp
@@ -21,14 +21,15 @@ class ThreadContext {
   bool run_flag;
 };
 
+
 void *scheduler_thread_execute(void *arg) {
   auto *tls = (ThreadContext *)arg;
   tls->scheduler->thread_execute(tls);
   return nullptr;
 }
 
-Scheduler::Scheduler()
-  : threads_count(-1) {
+Scheduler::Scheduler() :
+  threads_count(-1) {
   task_pull = new TaskPull();
 }
 
@@ -45,7 +46,7 @@ void Scheduler::add_sync_node(Node *node) {
 }
 
 void Scheduler::add_task(Task *task) {
-  assert(task_pull != nullptr);
+  assert (task_pull != nullptr);
   task_pull->add_task(task);
 }
 
@@ -54,7 +55,7 @@ void Scheduler::execute() {
   set_thread_id(0);
   std::vector<ThreadContext> threads(threads_count + 1);
 
-  assert((int)one_thread_nodes.size() < threads_count);
+  assert ((int)one_thread_nodes.size() < threads_count);
   for (int i = 1; i <= threads_count; i++) {
     threads[i].thread_id = i;
     threads[i].scheduler = this;
@@ -89,7 +90,7 @@ void Scheduler::execute() {
 }
 
 void Scheduler::set_threads_count(int new_threads_count) {
-  assert(1 <= new_threads_count && new_threads_count <= MAX_THREADS_COUNT);
+  assert (1 <= new_threads_count && new_threads_count <= MAX_THREADS_COUNT);
   threads_count = new_threads_count;
 }
 
diff --git a/compiler/stage.cpp b/compiler/stage.cpp
index 567d070c5e..d3094aca35 100644
--- a/compiler/stage.cpp
+++ b/compiler/stage.cpp
@@ -29,7 +29,7 @@ const char *get_assert_level_desc(AssertLevelT assert_level) {
     case FATAL_ASSERT_LEVEL:
       return "Fatal error";
     default:
-      assert(0);
+      assert (0);
   }
 }
 
@@ -43,8 +43,8 @@ void stage::set_warning_file(FILE *file) noexcept {
   warning_file = file;
 }
 
-void on_compilation_error(const char *description __attribute__((unused)), const char *file_name, int line_number, const char *full_description,
-                          AssertLevelT assert_level) {
+void on_compilation_error(const char *description __attribute__((unused)), const char *file_name, int line_number,
+                          const char *full_description, AssertLevelT assert_level) {
 
   AutoLocker<std::atomic<int> *> locker(&ce_locker);
   FILE *file = stdout;
@@ -62,7 +62,7 @@ void on_compilation_error(const char *description __attribute__((unused)), const
                       "It is probably happened due to incorrect or unsupported PHP input.\n"
                       "But it is still bug in compiler.\n");
 #ifdef __arm64__
-    __builtin_debugtrap(); // for easier debugging kphp_assert / kphp_fail
+    __builtin_debugtrap();  // for easier debugging kphp_assert / kphp_fail
 #endif
     abort();
   }
@@ -75,6 +75,7 @@ void on_compilation_error(const char *description __attribute__((unused)), const
   fflush(file);
 }
 
+
 void Location::set_file(SrcFilePtr new_file) {
   file = new_file;
   function = FunctionPtr();
@@ -93,10 +94,10 @@ void Location::set_line(int new_line) {
   line = new_line;
 }
 
-Location::Location(const SrcFilePtr &file, const FunctionPtr &function, int line)
-  : file(file)
-  , function(function)
-  , line(line) {}
+Location::Location(const SrcFilePtr &file, const FunctionPtr &function, int line) :
+  file(file),
+  function(function),
+  line(line) {}
 
 // return a location in the format: "{file}:{line}  in {function}"
 std::string Location::as_human_readable() const {
@@ -250,9 +251,8 @@ const std::string &stage::get_function_name() {
   return function->name;
 }
 
-bool stage::should_be_colored(FILE *f) {
-  if (!G)
-    return TermStringFormat::is_terminal(f);
+bool stage::should_be_colored(FILE *f)  {
+  if (!G) return TermStringFormat::is_terminal(f);
   switch (G->settings().get_color_settings()) {
     case CompilerSettings::colored:
       return true;
diff --git a/compiler/threading/data-stream.h b/compiler/threading/data-stream.h
index f4233fcf4d..8023ea9a2d 100644
--- a/compiler/threading/data-stream.h
+++ b/compiler/threading/data-stream.h
@@ -21,8 +21,10 @@ class DataStream {
   template<size_t data_id>
   using NthDataType = DataType;
 
-  explicit DataStream(bool is_sink = false)
-    : is_sink_mode_(is_sink) {}
+  explicit DataStream(bool is_sink = false) :
+    is_sink_mode_(is_sink)
+  {
+  }
 
   bool get(DataType &result) {
     std::lock_guard<std::mutex> lock{mutex_};
@@ -63,7 +65,7 @@ struct EmptyStream {
   using NthDataType = EmptyStream;
 };
 
-template<class... DataTypes>
+template<class ...DataTypes>
 class MultipleDataStreams {
 private:
   std::tuple<DataStream<DataTypes> *...> streams_;
diff --git a/compiler/threading/hash-table.h b/compiler/threading/hash-table.h
index 6d96e2218f..0c6d054505 100644
--- a/compiler/threading/hash-table.h
+++ b/compiler/threading/hash-table.h
@@ -16,9 +16,10 @@ class TSHashTable {
     std::atomic<unsigned long long> hash;
     T data;
 
-    HTNode()
-      : hash(0)
-      , data() {}
+    HTNode() :
+      hash(0),
+      data() {
+    }
   };
 
 private:
@@ -26,9 +27,10 @@ class TSHashTable {
   std::atomic<int> used_size;
 
 public:
-  TSHashTable()
-    : nodes(new HTNode[N])
-    , used_size(0) {}
+  TSHashTable() :
+    nodes(new HTNode[N]),
+    used_size(0) {
+  }
 
   HTNode *at(unsigned long long hash) {
     int i = (unsigned)hash % (unsigned)N;
@@ -41,7 +43,7 @@ class TSHashTable {
       }
       unsigned long long expected = 0;
       if (nodes[i].hash.load(std::memory_order_acquire) == 0 && !nodes[i].hash.compare_exchange_strong(expected, hash, std::memory_order_acq_rel)) {
-        int id = used_size.fetch_add(1, std::memory_order_release);
+        int id = used_size.fetch_add(1, std::memory_order_relaxed);
         assert(id * 2 < N);
         continue;
       }
diff --git a/compiler/threading/locks.h b/compiler/threading/locks.h
index 5b7718d0e4..a4da7c33ae 100644
--- a/compiler/threading/locks.h
+++ b/compiler/threading/locks.h
@@ -46,13 +46,13 @@ class KDB_CACHELINE_ALIGNED Lockable {
   std::atomic<int> x;
 
 public:
-  Lockable()
-    : x(0) {}
+  Lockable() :
+    x(0) {}
 
-  Lockable(const Lockable &other) noexcept
-    : x{other.x.load(std::memory_order_relaxed)} {}
-  Lockable(Lockable &&other) noexcept
-    : x{other.x.load(std::memory_order_relaxed)} {}
+  Lockable(const Lockable &other) noexcept :
+    x{other.x.load(std::memory_order_relaxed)} {}
+  Lockable(Lockable &&other) noexcept :
+    x{other.x.load(std::memory_order_relaxed)} {}
   Lockable &operator=(const Lockable &other) noexcept {
     x = other.x.load(std::memory_order_relaxed);
     return *this;
@@ -77,10 +77,9 @@ template<class DataT>
 class AutoLocker {
 private:
   DataT ptr;
-
 public:
-  explicit AutoLocker(DataT ptr)
-    : ptr(ptr) {
+  explicit AutoLocker(DataT ptr) :
+    ptr(ptr) {
     lock(ptr);
   }
 
diff --git a/compiler/threading/tls.h b/compiler/threading/tls.h
index 21557d39b2..2be42a6e91 100644
--- a/compiler/threading/tls.h
+++ b/compiler/threading/tls.h
@@ -27,8 +27,6 @@ struct TLS {
 private:
   struct KDB_CACHELINE_ALIGNED TLSRaw {
     T data{};
-    std::atomic<int> locker = UNLOCKED;
-    char dummy[KDB_CACHELINE_SIZE];
   };
 
   // The thread with thread_id = 0 is the main thread in which the scheduler's master code is executed.
@@ -47,8 +45,9 @@ struct TLS {
   }
 
 public:
-  TLS()
-    : arr() {}
+  TLS() :
+    arr() {
+  }
 
   T &get() {
     return get_raw()->data;
@@ -69,19 +68,6 @@ struct TLS {
   int size() {
     return MAX_THREADS_COUNT + 1;
   }
-
-  T *lock_get() {
-    TLSRaw *raw = get_raw();
-    bool ok = try_lock(&raw->locker);
-    assert(ok);
-    return &raw->data;
-  }
-
-  void unlock_get(T *ptr) {
-    TLSRaw *raw = get_raw();
-    assert(&raw->data == ptr);
-    unlock(&raw->locker);
-  }
 };
 
 #pragma GCC diagnostic pop
diff --git a/runtime/critical_section.cpp b/runtime/critical_section.cpp
index f10a5f44fa..97045ec0ce 100644
--- a/runtime/critical_section.cpp
+++ b/runtime/critical_section.cpp
@@ -9,27 +9,27 @@
 
 #include "runtime/php_assert.h"
 
-void check_stack_overflow() __attribute__((weak));
+void check_stack_overflow() __attribute__ ((weak));
 
 void check_stack_overflow() {
-  // pass;
+  //pass;
 }
 
 namespace dl {
 
-std::atomic<int> in_critical_section;
+volatile int in_critical_section;
 volatile long long pending_signals;
 
 void enter_critical_section() noexcept {
   check_stack_overflow();
-  php_assert(in_critical_section.load(std::memory_order_relaxed) >= 0);
-  in_critical_section.fetch_add(1, std::memory_order_release);
+  php_assert (in_critical_section >= 0);
+  in_critical_section++;
 }
 
 void leave_critical_section() noexcept {
-  in_critical_section.fetch_sub(1, std::memory_order_release);
-  php_assert(in_critical_section.load(std::memory_order_relaxed) >= 0);
-  if (pending_signals && in_critical_section.load(std::memory_order_acquire) <= 0) {
+  in_critical_section--;
+  php_assert (in_critical_section >= 0);
+  if (pending_signals && in_critical_section <= 0) {
     for (int i = 0; i < sizeof(pending_signals) * 8; i++) {
       if ((pending_signals >> i) & 1) {
         raise(i);
@@ -39,8 +39,8 @@ void leave_critical_section() noexcept {
 }
 
 void init_critical_section() noexcept {
+  in_critical_section = 0;
   pending_signals = 0;
-  in_critical_section.store(0, std::memory_order_release);
 }
 
 } // namespace dl
diff --git a/runtime/critical_section.h b/runtime/critical_section.h
index 816502ec9d..3636f2b95a 100644
--- a/runtime/critical_section.h
+++ b/runtime/critical_section.h
@@ -11,7 +11,7 @@
 
 namespace dl {
 
-extern std::atomic<int> in_critical_section;
+extern volatile int in_critical_section;
 extern volatile long long pending_signals;
 
 void enter_critical_section() noexcept;
@@ -20,21 +20,13 @@ void leave_critical_section() noexcept;
 // leaves the critical section until the destructor is executed;
 // it is implied that this object is constructed somewhere inside the critical section
 struct NonCriticalSectionGuard : private vk::not_copyable {
-  NonCriticalSectionGuard() noexcept {
-    leave_critical_section();
-  }
-  ~NonCriticalSectionGuard() noexcept {
-    enter_critical_section();
-  }
+  NonCriticalSectionGuard() noexcept { leave_critical_section(); }
+  ~NonCriticalSectionGuard() noexcept { enter_critical_section(); }
 };
 
 struct CriticalSectionGuard : private vk::not_copyable {
-  CriticalSectionGuard() noexcept {
-    enter_critical_section();
-  }
-  ~CriticalSectionGuard() noexcept {
-    leave_critical_section();
-  }
+  CriticalSectionGuard() noexcept { enter_critical_section(); }
+  ~CriticalSectionGuard() noexcept { leave_critical_section(); }
 };
 
 class CriticalSectionSmartGuard : private vk::not_copyable {
@@ -65,8 +57,8 @@ class CriticalSectionSmartGuard : private vk::not_copyable {
   bool in_critical_section_{false};
 };
 
-template<class F, class... Args>
-auto critical_section_call(F &&f, Args &&... args) noexcept {
+template<class F, class ...Args>
+auto critical_section_call(F &&f, Args &&...args) noexcept {
   CriticalSectionGuard critical_section;
   return f(std::forward<Args>(args)...);
 }

From 12049facae0b9fb473d929bfbab1a242ff485c99 Mon Sep 17 00:00:00 2001
From: Valery Matskevich <generalretcher@gmail.com>
Date: Sat, 25 May 2024 14:50:59 +0300
Subject: [PATCH 5/5] cleaner blame

Signed-off-by: Valery Matskevich <generalretcher@gmail.com>
---
 compiler/threading/locks.h   | 2 +-
 runtime/critical_section.cpp | 5 ++---
 runtime/critical_section.h   | 1 -
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/compiler/threading/locks.h b/compiler/threading/locks.h
index a4da7c33ae..c831920e87 100644
--- a/compiler/threading/locks.h
+++ b/compiler/threading/locks.h
@@ -26,7 +26,7 @@ void unlock(T locker) {
 }
 
 inline bool try_lock(std::atomic<int> *locker) {
-  int expected = 0;
+  int expected = UNLOCKED;
   return locker->compare_exchange_weak(expected, LOCKED, std::memory_order_acq_rel);
 }
 
diff --git a/runtime/critical_section.cpp b/runtime/critical_section.cpp
index 97045ec0ce..7c93a12e0c 100644
--- a/runtime/critical_section.cpp
+++ b/runtime/critical_section.cpp
@@ -4,7 +4,6 @@
 
 #include "runtime/critical_section.h"
 
-#include <atomic>
 #include <signal.h>
 
 #include "runtime/php_assert.h"
@@ -23,11 +22,11 @@ volatile long long pending_signals;
 void enter_critical_section() noexcept {
   check_stack_overflow();
   php_assert (in_critical_section >= 0);
-  in_critical_section++;
+  in_critical_section = in_critical_section + 1;
 }
 
 void leave_critical_section() noexcept {
-  in_critical_section--;
+  in_critical_section = in_critical_section - 1;
   php_assert (in_critical_section >= 0);
   if (pending_signals && in_critical_section <= 0) {
     for (int i = 0; i < sizeof(pending_signals) * 8; i++) {
diff --git a/runtime/critical_section.h b/runtime/critical_section.h
index 3636f2b95a..70a4eec178 100644
--- a/runtime/critical_section.h
+++ b/runtime/critical_section.h
@@ -4,7 +4,6 @@
 
 #pragma once
 
-#include <atomic>
 #include <utility>
 
 #include "common/mixin/not_copyable.h"