From 6a767225f882cef790d3c959c1f41e982942c62b Mon Sep 17 00:00:00 2001 From: Wei Song Date: Thu, 27 Jun 2024 11:30:49 +0800 Subject: [PATCH] simplify cache set lock In multithread env, a cache set needs to be locked before operating on it. This mechanism is now implemented as a part of the cache base class. So all special extension in the multithread support classes can be removed. --- cache/cache.hpp | 3 + cache/cache_multi.hpp | 130 ++---------------------------------- cache/coherence_multi.hpp | 134 ++++++++++---------------------------- 3 files changed, 41 insertions(+), 226 deletions(-) diff --git a/cache/cache.hpp b/cache/cache.hpp index 7f6fc80..8cbc113 100644 --- a/cache/cache.hpp +++ b/cache/cache.hpp @@ -171,6 +171,9 @@ class CacheBase : public CacheMonitorSupport virtual void meta_return_buffer(CMMetadataBase *buf) = 0; // return a copy buffer, used to detect conflicts in copy buffer __always_inline void lock_line(uint32_t ai, uint32_t s, uint32_t w) { access(ai, s, w)->lock(); } __always_inline void unlock_line(uint32_t ai, uint32_t s, uint32_t w) { access(ai, s, w)->unlock(); } + __always_inline void set_mt_state(uint32_t ai, uint32_t s, uint16_t prio) { arrays[ai]->set_mt_state(s, prio); } + __always_inline void check_mt_state(uint32_t ai, uint32_t s, uint16_t prio) { arrays[ai]->check_mt_state(s, prio); } + __always_inline void reset_mt_state(uint32_t ai, uint32_t s, uint16_t prio) { arrays[ai]->reset_mt_state(s, prio); } virtual std::tuple size() const = 0; // return the size parameters of the cache uint32_t get_id() const { return id; } diff --git a/cache/cache_multi.hpp b/cache/cache_multi.hpp index 41ffcdd..6d3b339 100644 --- a/cache/cache_multi.hpp +++ b/cache/cache_multi.hpp @@ -2,85 +2,6 @@ #define CM_CACHE_CACHE_MULTI_HPP #include "cache/cache.hpp" -#include -#include -#include - -// Multi-thread support for Cache Array -class CacheArrayMultiThreadSupport -{ -public: - virtual std::vector *get_status() = 0; - virtual std::mutex* get_mutex(uint32_t s) = 0; // get set mutex - virtual std::mutex* get_cacheline_mutex(uint32_t s, uint32_t w) = 0; // get cacheline mutex - virtual std::condition_variable* get_cv(uint32_t s) = 0; // get set cv -}; - -// Multi-thread Cache Array -// IW: index width, NW: number of ways, MT: metadata type, DT: data type (void if not in use) -template - requires C_DERIVE - && C_DERIVE_OR_VOID -class CacheArrayMultiThread : public CacheArrayNorm, - public CacheArrayMultiThreadSupport -{ - - typedef CacheArrayNorm CacheAT; -protected: - std::vector status; // record every set status - std::vector status_mtxs; // mutex for status - std::vector mutexs; // mutex array for meta - std::vector cvs; // cv array, used in conjunction with mutexes - -public: - using CacheAT::nset; - using CacheAT::way_num; - CacheArrayMultiThread(unsigned int extra_way = 0, std::string name = "") : CacheAT(extra_way, name){ - size_t meta_num = nset * way_num; - status.resize(nset); - for(uint32_t i = 0; i < nset; i++) status[i] = 0; - - mutexs.resize(meta_num); - for(auto &t:mutexs) t = new std::mutex(); - - status_mtxs.resize(nset); - for(auto &s : status_mtxs) s = new std::mutex(); - - cvs.resize(nset); - for(auto &c : cvs) c = new std::condition_variable(); - } - - virtual ~CacheArrayMultiThread(){ - for(auto t: mutexs) delete t; - for(auto s : status_mtxs) delete s; - for(auto c : cvs) delete c; - } - - virtual std::mutex* get_cacheline_mutex(uint32_t s, uint32_t w) { return mutexs[s*(way_num) + w]; } - - virtual std::vector *get_status(){ return &status; } - virtual std::mutex* get_mutex(uint32_t s) { return status_mtxs[s]; } - virtual std::condition_variable* get_cv(uint32_t s) { return cvs[s]; } -}; - - -// Multi-thread support for CacheBase -class CacheBaseMultiThreadSupport -{ -public: - virtual std::vector *get_status(uint32_t ai) = 0; - virtual std::mutex* get_mutex(uint32_t ai, uint32_t s) = 0; - virtual std::condition_variable* get_cv(uint32_t ai, uint32_t s) = 0; - virtual std::mutex* get_cacheline_mutex(uint32_t ai, uint32_t s, uint32_t w) = 0; - - // get set's status, mutex and cv in one function call - virtual std::tuple *, std::mutex*, std::condition_variable*> - get_set_control(uint32_t ai, uint32_t s) = 0; - - virtual bool hit(uint64_t addr, uint32_t *ai, uint32_t *s, uint32_t *w, - uint16_t priority, bool need_replace = false) = 0; -}; - // Multi-thread Skewed Cache // IW: index width, NW: number of ways, P: number of partitions @@ -91,11 +12,10 @@ class CacheBaseMultiThreadSupport template requires C_DERIVE && C_DERIVE_OR_VOID && C_DERIVE && C_DERIVE_OR_VOID -class CacheSkewedMultiThread : public CacheSkewed, - public CacheBaseMultiThreadSupport +class CacheSkewedMultiThread : public CacheSkewed { typedef CacheSkewed CacheT; - typedef CacheArrayMultiThread CacheAT; + typedef CacheArrayNorm CacheAT; protected: using CacheT::arrays; @@ -104,34 +24,7 @@ class CacheSkewedMultiThread : public CacheSkewed *get_status(uint32_t ai){ - return (static_cast(arrays[ai]))->get_status(); - } - virtual std::mutex* get_mutex(uint32_t ai, uint32_t s){ - return (static_cast(arrays[ai]))->get_mutex(s); - } - virtual std::condition_variable* get_cv(uint32_t ai, uint32_t s) { - return (static_cast(arrays[ai]))->get_cv(s); - } - - virtual std::tuple *, std::mutex*, std::condition_variable*> - get_set_control(uint32_t ai, uint32_t s) - { - return std::make_tuple(get_status(ai), get_mutex(ai, s), get_cv(ai, s)); - } - - virtual std::mutex* get_cacheline_mutex(uint32_t ai, uint32_t s, uint32_t w){ - return (static_cast(arrays[ai]))->get_cacheline_mutex(s, w); - } - + : CacheT(name, extra_par, extra_way) {} virtual bool hit(uint64_t addr, uint32_t *ai, uint32_t *s, uint32_t *w, uint16_t priority, bool need_replace = false) @@ -144,14 +37,7 @@ class CacheSkewedMultiThread : public CacheSkewedwait(lk, [idx, status, priority] { return ((*status)[idx] < priority);} ); - (*status)[*s] |= priority; - lk.unlock(); + this->set_mt_state(*ai, *s, priority); for(*w = 0; *w < NW; (*w)++){ if(access(*ai, *s, *w)->match(addr)) { hit = true; break;} @@ -164,13 +50,7 @@ class CacheSkewedMultiThread : public CacheSkewednotify_all(); + this->reset_mt_state(i, indexer.index(addr, i), priority); } } return hit; diff --git a/cache/coherence_multi.hpp b/cache/coherence_multi.hpp index 77c77f3..28973d5 100644 --- a/cache/coherence_multi.hpp +++ b/cache/coherence_multi.hpp @@ -11,23 +11,20 @@ // the higher the value, the higher the priority. class Priority{ public: - static const uint16_t acquire = 0x001; - static const uint16_t flush = 0x001; - static const uint16_t read = 0x001; - static const uint16_t write = 0x001; - static const uint16_t probe = 0x010; // acquire miss, requiring lower cahce which back-probe this cache - static const uint16_t evict = 0x010; - static const uint16_t evict_cv_wait = 0x100; - static const uint16_t release = 0x100; // acquire hit but need back probe and writeback from inner + static const uint16_t acquire = 0x0001; + static const uint16_t flush = 0x0001; + static const uint16_t read = 0x0001; + static const uint16_t write = 0x0001; + static const uint16_t probe = 0x0010; // acquire miss, requiring lower cahce which back-probe this cache + static const uint16_t evict = 0x0100; + //static const uint16_t evict_cv_wait = 0x100; + static const uint16_t release = 0x1000; // acquire hit but need back probe and writeback from inner }; struct addr_info{ uint32_t ai; uint32_t s; uint32_t w; - std::mutex* mtx; - std::condition_variable* cv; - std::vector* status; }; struct info{ @@ -37,28 +34,14 @@ struct info{ ///////////////////////////////// // database for store inner acquire address -class InnerAddressDataBase -{ -public: - /** add acquire address infomation to database */ - virtual void add(int64_t id, uint64_t addr, addr_info loc) = 0; - /** erase address infomation */ - virtual void erase(int64_t id, uint64_t addr) = 0; - /** query address infomation */ - virtual std::pair query(int64_t id, uint64_t addr) = 0; - virtual void resize(uint32_t size) = 0; - - virtual ~InnerAddressDataBase() {} -}; - -class InnerAddressDataMap : public InnerAddressDataBase +class InnerAddressDataMap { protected: std::vector mtx; // mutex for protecting record std::vector > map; public: - InnerAddressDataMap() : InnerAddressDataBase() {} + InnerAddressDataMap() {} void add(int64_t id, uint64_t addr, addr_info loc){ std::unique_lock lk(*mtx[id]); @@ -118,14 +101,13 @@ class InnerCohPortMultiThreadSupport virtual void acquire_ack_resp(uint64_t addr, coh_cmd_t cmd, uint64_t *delay) = 0; protected: - virtual std::tuple*, bool> + virtual std::tuple access_line_multithread(uint64_t addr, coh_cmd_t cmd, uint64_t *delay) = 0; }; // common behvior for multi-thread uncached outer ports template - requires C_DERIVE + requires C_DERIVE class OuterCohPortMultiThreadUncached : public OuterCohPortUncached, public OuterCohPortMultiThreadSupport { public: @@ -151,7 +133,6 @@ class OuterCohPortMultiThreadUncached : public OuterCohPortUncached, publi // common behavior for cached outer ports template requires C_DERIVE - && C_DERIVE class OuterCohPortMultiThreadT : public OPUC { protected: @@ -178,9 +159,6 @@ class OuterCohPortMultiThreadT : public OPUC CMDataBase *data = nullptr; bool hit = cache->hit(addr, &ai, &s, &w, Priority::probe); if(hit){ - auto [status, mtx, cv] = cache->get_set_control(ai, s); - cache->lock_line(ai, s, w); - std::unique_lock lk(*mtx, std::defer_lock); std::tie(meta, data) = cache->access_line(ai, s, w); /** It is possible that higher priority behaviors have caused the meta to change, so need check again */ if(!meta->is_valid() || meta->addr(s) != addr){ @@ -201,10 +179,7 @@ class OuterCohPortMultiThreadT : public OPUC cache->unlock_line(ai, s, w); } - lk.lock(); - (*status)[s] &= (~Priority::probe); - lk.unlock(); - cv->notify_all(); + cache->reset_mt_state(ai, s, Priority::probe); } cache->hook_manage(addr, ai, s, w, hit, OPUC::policy->is_outer_evict(outer_cmd), writeback, meta, data, delay); return std::make_pair(hit, writeback); @@ -214,24 +189,22 @@ class OuterCohPortMultiThreadT : public OPUC template requires C_DERIVE - && C_DERIVE using OuterCohMultiThreadPort = OuterCohPortMultiThreadT, IT, CT>; template requires C_DERIVE - && C_DERIVE + && C_DERIVE && C_DERIVE class InnerCohPortMultiThreadUncached : public InnerCohPortUncached, public InnerCohPortMultiThreadSupport { protected: - InnerAddressDataBase* database; - virtual std::tuple*, bool> + InnerAddressDataMap* database; + virtual std::tuple access_line_multithread(uint64_t addr, coh_cmd_t cmd, uint64_t *delay) { uint32_t ai, s, w; auto cache = static_cast(InnerCohPortUncached::cache); /** true indicates that replace is desired */ bool hit = cache->hit(addr, &ai, &s, &w, Priority::acquire, true); - auto [status, mtx, cv] = cache->get_set_control(ai, s); auto [meta, data] = cache->access_line(ai, s, w); cache->lock_line(ai, s, w); if(hit){ @@ -249,7 +222,7 @@ class InnerCohPortMultiThreadUncached : public InnerCohPortUncached, publi if(meta->is_valid()) evict(meta, data, ai, s, w, delay); (static_cast(outer))->acquire_req(addr, meta, data, policy->cmd_for_outer_acquire(cmd), delay, ai, s, w); // fetch the missing block } - return std::make_tuple(meta, data, ai, s, w, mtx, cv, status, hit); + return std::make_tuple(meta, data, ai, s, w, hit); } virtual void evict(CMMetadataBase *meta, CMDataBase *data, uint32_t ai, uint32_t s, uint32_t w, uint64_t *delay) { auto cache = static_cast(InnerCohPortUncached::cache); @@ -261,22 +234,12 @@ class InnerCohPortMultiThreadUncached : public InnerCohPortUncached, publi } auto writeback = policy->writeback_need_writeback(meta, outer->is_uncached()); if(writeback.first){ - auto [status, mtx, cv] = cache->get_set_control(ai, s); - /** evict can ignore probe, so the evict_cv_wait value is 0x100, if changed to 0x10 will cause deadlock! */ - uint16_t evict_cv_wait = Priority::evict_cv_wait; - std::unique_lock lk(*mtx, std::defer_lock); - lk.lock(); - cv->wait(lk, [s, status, evict_cv_wait] { return ((*status)[s] < evict_cv_wait);} ); - (*status)[s] |= Priority::evict; - lk.unlock(); + cache->set_mt_state(ai, s, Priority::evict); auto writeback_r = policy->writeback_need_writeback(meta, outer->is_uncached()); if(writeback_r.first) outer->writeback_req(addr, meta, data, writeback.second, delay); // writeback if dirty - lk.lock(); - (*status)[s] &= ~(Priority::evict); - lk.unlock(); - cv->notify_all(); + cache->reset_mt_state(ai, s, Priority::evict); } policy->meta_after_evict(meta); cache->hook_manage(addr, ai, s, w, true, true, writeback.first, meta, data, delay); @@ -287,18 +250,13 @@ class InnerCohPortMultiThreadUncached : public InnerCohPortUncached, publi auto cache = static_cast(InnerCohPortUncached::cache); bool hit = cache->hit(addr, &ai, &s, &w, Priority::release); if(hit){ - auto [status, mtx, cv] = cache->get_set_control(ai, s); auto [meta, data] = cache->access_line(ai, s, w); if(data_inner) data->copy(data_inner); policy->meta_after_release(cmd, meta, meta_inner); assert(meta_inner); // assume meta_inner is valid for all writebacks cache->hook_write(addr, ai, s, w, hit, true, meta, data, delay); - std::unique_lock lk(*mtx, std::defer_lock); - lk.lock(); - (*status)[s] = (*status)[s] & (~Priority::release); - lk.unlock(); - cv->notify_all(); + cache->reset_mt_state(ai, s, Priority::release); } } @@ -317,12 +275,7 @@ class InnerCohPortMultiThreadUncached : public InnerCohPortUncached, publi auto [flush, probe, probe_cmd] = policy->flush_need_sync(cmd, meta, outer->is_uncached()); if(!flush) { if(hit){ - std::tie(status, mtx, cv) = cache->get_set_control(ai, s); - std::unique_lock lk(*mtx, std::defer_lock); - lk.lock(); - (*status)[s] = (*status)[s] & (~Priority::flush); - lk.unlock(); - cv->notify_all(); + cache->reset_mt_state(ai, s, Priority::flush); } // do not handle flush at this level, and send it to the outer cache outer->writeback_req(addr, nullptr, nullptr, policy->cmd_for_flush(), delay); @@ -331,7 +284,6 @@ class InnerCohPortMultiThreadUncached : public InnerCohPortUncached, publi if(!hit) return; - std::tie(status, mtx, cv) = cache->get_set_control(ai, s); if(probe) { auto [phit, pwb] = probe_req(addr, meta, data, probe_cmd, delay); // sync if necessary if(pwb) cache->hook_write(addr, ai, s, w, true, true, meta, data, delay); // a write occurred during the probe @@ -343,11 +295,7 @@ class InnerCohPortMultiThreadUncached : public InnerCohPortUncached, publi policy->meta_after_flush(cmd, meta); cache->hook_manage(addr, ai, s, w, hit, policy->is_evict(cmd), writeback.first, meta, data, delay); - std::unique_lock lk(*mtx, std::defer_lock); - lk.lock(); - (*status)[s] = (*status)[s] & (~Priority::flush); - lk.unlock(); - cv->notify_all(); + cache->reset_mt_state(ai, s, Priority::flush); } public: @@ -359,7 +307,7 @@ class InnerCohPortMultiThreadUncached : public InnerCohPortUncached, publi virtual void acquire_resp(uint64_t addr, CMDataBase *data_inner, CMMetadataBase *meta_inner, coh_cmd_t cmd, uint64_t *delay){ auto p_policy = std::static_pointer_cast(policy); - auto [meta, data, ai, s, w, mtx, cv, status, hit] = access_line_multithread(addr, cmd, delay); + auto [meta, data, ai, s, w, hit] = access_line_multithread(addr, cmd, delay); if(meta->is_valid() && meta->addr(s) == addr){ policy->meta_after_grant(cmd, meta, meta_inner); if(data_inner) data_inner->copy(this->cache->get_data(ai, s, w)); @@ -377,14 +325,10 @@ class InnerCohPortMultiThreadUncached : public InnerCohPortUncached, publi bool unlock = p_policy->acquire_need_unlock(cmd); if(unlock){ cache->unlock_line(ai, s, w); - std::unique_lock lk(*mtx, std::defer_lock); - lk.lock(); - (*status)[s] = (*status)[s] & (~Priority::acquire); - lk.unlock(); - cv->notify_all(); + cache->reset_mt_state(ai, s, Priority::acquire); }else{ /** store relevant locks in the database and wait for the upper-level cache to issue an ack request */ - database->add(cmd.id, addr, addr_info{ai, s, w, mtx, cv, status}); + database->add(cmd.id, addr, addr_info{ai, s, w}); } } @@ -392,13 +336,9 @@ class InnerCohPortMultiThreadUncached : public InnerCohPortUncached, publi /** query whether the information of this address exists in the database */ auto info = database->query(cmd.id, addr); if(info.first){ - auto [ai, s, w, mtx, cv, status] = info.second; + auto [ai, s, w] = info.second; cache->unlock_line(ai, s, w); - std::unique_lock lk(*mtx, std::defer_lock); - lk.lock(); - (*status)[s] = (*status)[s] & (~Priority::acquire); - lk.unlock(); - cv->notify_all(); + cache->reset_mt_state(ai, s, Priority::acquire); database->erase(cmd.id, addr); } } @@ -436,13 +376,13 @@ class InnerCohPortMultiThreadT : public IPUC template requires C_DERIVE - && C_DERIVE + && C_DERIVE && C_DERIVE using InnerCohMultiThreadPort = InnerCohPortMultiThreadT >; template requires C_DERIVE - && C_DERIVE + && C_DERIVE && C_DERIVE class CoreMultiThreadInterface : public InnerCohPortMultiThreadUncached, public CoreInterfaceBase { @@ -462,7 +402,7 @@ class CoreMultiThreadInterface : public InnerCohPortMultiThreadUncached(InnerT::policy); addr = normalize(addr); auto cmd = policy->cmd_for_read(); - auto [meta, data, ai, s, w, mtx, cv, status, hit] = access_line_multithread(addr, cmd, delay); + auto [meta, data, ai, s, w, hit] = access_line_multithread(addr, cmd, delay); cache->hook_read(addr, ai, s, w, hit, meta, data, delay); @@ -471,11 +411,7 @@ class CoreMultiThreadInterface : public InnerCohPortMultiThreadUncached(outer))->acquire_ack_req(addr, ack.second, delay); cache->unlock_line(ai, s, w); - std::unique_lock lk(*mtx, std::defer_lock); - lk.lock(); - (*status)[s] = (*status)[s] & (~Priority::read); - lk.unlock(); - cv->notify_all(); + cache->reset_mt_state(ai, s, Priority::read); return data; } @@ -484,7 +420,7 @@ class CoreMultiThreadInterface : public InnerCohPortMultiThreadUncached(InnerT::policy); addr = normalize(addr); auto cmd = policy->cmd_for_write(); - auto [meta, data, ai, s, w, mtx, cv, status, hit] = access_line_multithread(addr, cmd, delay); + auto [meta, data, ai, s, w, hit] = access_line_multithread(addr, cmd, delay); meta->to_dirty(); if(data) data->copy(m_data); @@ -494,11 +430,7 @@ class CoreMultiThreadInterface : public InnerCohPortMultiThreadUncached(outer))->acquire_ack_req(addr, ack.second, delay); cache->unlock_line(ai, s, w); - std::unique_lock lk(*mtx, std::defer_lock); - lk.lock(); - (*status)[s] = (*status)[s] & (~Priority::read); - lk.unlock(); - cv->notify_all(); + cache->reset_mt_state(ai, s, Priority::read); } // flush a cache block from the whole cache hierarchy, (clflush in x86-64)