diff --git a/cache/exclusive.hpp b/cache/exclusive.hpp index eb132b7..960253e 100644 --- a/cache/exclusive.hpp +++ b/cache/exclusive.hpp @@ -167,7 +167,7 @@ class CacheSkewedExclusive : public CacheSkewed -using CacheNormExclusiveBroadcast = CacheSkewedExclusive, DLY, EnMon, false>; +using CacheNormExclusiveBroadcast = CacheSkewedExclusive, DLY, EnMon, false>; template class ExclusiveInnerCohPortUncachedBroadcast : public InnerCohPortUncached diff --git a/cache/replace.hpp b/cache/replace.hpp index 585c918..d52db85 100644 --- a/cache/replace.hpp +++ b/cache/replace.hpp @@ -7,18 +7,23 @@ #include "util/random.hpp" #include "util/multithread.hpp" +#include +#ifdef __cpp_lib_bitops +// for the popcount() supported in C++20 +#include +#endif + /////////////////////////////////// // Base class -// EF: empty first -template +// EF: empty first, EnMT: multithread +template requires NW <= 64 class ReplaceFuncBase { - const uint32_t NW; protected: std::vector > used_map; // at the size of 16, vector is actually faster than list and do not require alloc - std::vector > free_map; + std::vector free_map_st; // free map when single thread + std::vector *> free_map_mt; // multi-thread version std::vector alloc_map; // record the way allocated for the next access (only one allocated ay at any time) - std::vector free_num; #ifdef CHECK_MULTI #ifdef BOOST_STACKTRACE_LINK @@ -28,26 +33,47 @@ class ReplaceFuncBase #endif #endif - __always_inline uint32_t alloc_from_free(uint32_t s) { - free_num[s]--; - for(uint32_t i=0; iload() : free_map_st[s]; + if(fmap) { + auto way_oh = fmap & (~fmap + 1ull); + if constexpr (EnMT) { + if(!free_map_mt[s]->compare_exchange_strong(fmap, fmap & ~way_oh)) continue; + } else { + free_map_st[s] &= ~way_oh; + } + for(int i=0; i<64; i++) if(way_oh == (1ull << i)) return i; + assert(0 == "replacer free_map corrupted!"); + return -1; + } else + return -1; + } } virtual uint32_t select(uint32_t s) = 0; - __always_inline void delist_from_free(uint32_t s, uint32_t w, bool demand_acc) { - // in multithread simulation, a simultaneous probe may invalidate a cache block waiting for permission promotion - if(!free_map[s][w]) return; - assert(demand_acc); // assume such situation can occur only in permission promotion - free_map[s][w] = false; - free_num[s]--; + __always_inline void delist_from_free(uint32_t s, uint32_t w) { + uint64_t way_oh = 1ull << w; + if constexpr (EnMT) { + while(true) { + auto fmap = free_map_mt[s]->load(); + if(0 == (fmap & way_oh)) return; + if(free_map_mt[s]->compare_exchange_strong(fmap, fmap & ~way_oh)) return; + } + } else + free_map_st[s] &= ~way_oh; + } + + __always_inline void list_to_free(uint32_t s, uint32_t w) { + uint64_t way_oh = 1ull << w; + if constexpr (EnMT) { + while(true) { + auto fmap = free_map_mt[s]->load(); + if(free_map_mt[s]->compare_exchange_strong(fmap, fmap | way_oh)) return; + } + } else + free_map_st[s] |= way_oh; } __always_inline void set_alloc_map(uint32_t s, int32_t v) { @@ -90,8 +116,8 @@ class ReplaceFuncBase } public: - ReplaceFuncBase(uint32_t nset, uint32_t nway) - :NW(nway), used_map(nset), free_map(nset), alloc_map(nset, -1), free_num(nset, nway) { + ReplaceFuncBase(uint32_t nset) + :used_map(nset), free_map_st(nset), free_map_mt(nset, nullptr), alloc_map(nset, -1) { #ifdef CHECK_MULTI #ifdef BOOST_STACKTRACE_LINK alloc_record.resize(nset, {0, ""}); @@ -99,23 +125,43 @@ class ReplaceFuncBase alloc_record.resize(nset, 0); #endif #endif - for (auto &s: free_map) s.resize(NW, true); + constexpr uint64_t fmap = NW < 64 ? (1ull << NW) - 1 : ~(0ull); + if constexpr (EnMT) { + for (auto &s: free_map_mt) s = new std::atomic(fmap); + } else + for (auto &s: free_map_st) s = fmap; } - virtual ~ReplaceFuncBase() = default; + virtual ~ReplaceFuncBase() { + if constexpr (EnMT) for (auto s: free_map_mt) delete s; + } - __always_inline uint32_t get_free_num(uint32_t s) const { return free_num[s]; } + __always_inline uint32_t get_free_num(uint32_t s) { // return the number of free places by popcount the free map + auto fmap = EnMT ? free_map_mt[s]->load() : free_map_st[s]; +#ifdef __cpp_lib_bitops + return std::popcount(fmap); +#elif defined __GNUG__ + return __builtin_popcountll(fmap); +#else + uint32_t rv = 0; + while(fmap) { + rv += (fmap & 0x1ull); + fmap >> 1; + } + return rv; +#endif + } virtual void replace(uint32_t s, uint32_t *w) { - uint32_t i = 0; + int32_t i = 0; if constexpr (EF) { - if(free_num[s] > 0) i = alloc_from_free(s); - else i = select(s); + i = alloc_from_free(s); + if (i<0) i = select(s); } else { i = select(s); - if(free_map[s][i]) { free_num[s]--; free_map[s][i] = false; } + delist_from_free(s, i); } - assert(i < NW || 0 == "replacer used_map corrupted!"); + assert((uint32_t)i < NW || 0 == "replacer used_map corrupted!"); this->set_alloc_map(s, i); *w = i; } @@ -123,10 +169,7 @@ class ReplaceFuncBase virtual void access(uint32_t s, uint32_t w, bool demand_acc, bool prefetch) = 0; virtual void invalid(uint32_t s, uint32_t w) { - if((int32_t)w != alloc_map[s]) { - free_map[s][w] = true; - free_num[s]++; - } + if((int32_t)w != alloc_map[s]) list_to_free(s, w); } }; @@ -134,10 +177,10 @@ class ReplaceFuncBase // FIFO replacement // IW: index width, NW: number of ways // EF: empty first, DUO: demand update only (do not update state for release) -template -class ReplaceFIFO : public ReplaceFuncBase +template +class ReplaceFIFO : public ReplaceFuncBase { - typedef ReplaceFuncBase RPT; + typedef ReplaceFuncBase RPT; protected: using RPT::alloc_map; using RPT::used_map; @@ -150,7 +193,7 @@ class ReplaceFIFO : public ReplaceFuncBase } public: - ReplaceFIFO() : RPT(1ul << IW, NW) { + ReplaceFIFO() : RPT(1ul << IW) { for (auto &s: used_map) { s.resize(NW); for(uint32_t i=0; i used_map[s][w] = 0; // insert at LRU position } } - RPT::delist_from_free(s, w, demand_acc); + if constexpr (EnMT) RPT::delist_from_free(s, w); } }; @@ -177,10 +220,10 @@ class ReplaceFIFO : public ReplaceFuncBase // LRU replacement // IW: index width, NW: number of ways // EF: empty first, DUO: demand update only (do not update state for release) -template -class ReplaceLRU : public ReplaceFIFO +template +class ReplaceLRU : public ReplaceFIFO { - typedef ReplaceFuncBase RPT; + typedef ReplaceFuncBase RPT; protected: using RPT::alloc_map; using RPT::used_map; @@ -198,7 +241,7 @@ class ReplaceLRU : public ReplaceFIFO } } if((int32_t)w == alloc_map[s] && demand_acc) this->set_alloc_map(s, -1); - RPT::delist_from_free(s, w, demand_acc); + if constexpr (EnMT) RPT::delist_from_free(s, w); } }; @@ -206,10 +249,10 @@ class ReplaceLRU : public ReplaceFIFO // Static RRIP replacement // IW: index width, NW: number of ways // EF: empty first, DUO: demand update only (do not update state for release) -template -class ReplaceSRRIP : public ReplaceFuncBase +template +class ReplaceSRRIP : public ReplaceFuncBase { - typedef ReplaceFuncBase RPT; + typedef ReplaceFuncBase RPT; protected: using RPT::used_map; using RPT::alloc_map; @@ -224,7 +267,7 @@ class ReplaceSRRIP : public ReplaceFuncBase } public: - ReplaceSRRIP() : RPT(1ul << IW, NW) { + ReplaceSRRIP() : RPT(1ul << IW) { for (auto &s: used_map) s.resize(NW, 3); } @@ -236,7 +279,7 @@ class ReplaceSRRIP : public ReplaceFuncBase used_map[s][w] = 3; } if((int32_t)w == alloc_map[s] && demand_acc) this->set_alloc_map(s, -1); - RPT::delist_from_free(s, w, demand_acc); + if constexpr (EnMT) RPT::delist_from_free(s, w, demand_acc); } virtual void invalid(uint32_t s, uint32_t w) override { @@ -249,10 +292,10 @@ class ReplaceSRRIP : public ReplaceFuncBase // Random replacement // IW: index width, NW: number of ways // EF: empty first, DUO: demand update only (do not update state for release) -template -class ReplaceRandom : public ReplaceFuncBase +template +class ReplaceRandom : public ReplaceFuncBase { - typedef ReplaceFuncBase RPT; + typedef ReplaceFuncBase RPT; protected: using RPT::alloc_map; @@ -263,12 +306,12 @@ class ReplaceRandom : public ReplaceFuncBase } public: - ReplaceRandom() : RPT(1ul << IW, NW), loc_random(cm_alloc_rand32()) {} + ReplaceRandom() : RPT(1ul << IW), loc_random(cm_alloc_rand32()) {} virtual ~ReplaceRandom() override { delete loc_random; } virtual void access(uint32_t s, uint32_t w, bool demand_acc, bool prefetch) override { if((int32_t)w == alloc_map[s] && demand_acc) this->set_alloc_map(s, -1); - RPT::delist_from_free(s, w, demand_acc); + if constexpr (EnMT) RPT::delist_from_free(s, w, demand_acc); } }; diff --git a/util/cache_type.hpp b/util/cache_type.hpp index 9609a30..78a8eef 100644 --- a/util/cache_type.hpp +++ b/util/cache_type.hpp @@ -85,14 +85,14 @@ inline auto get_l1_core_interface(std::vector& array) { } template class RPT, - template class DRPT, + template class RPT, + template class DRPT, template class CPT, typename Policy, bool isL1, bool uncached, typename DLY, bool EnMon, bool EnMT> inline auto cache_gen(int size, const std::string& name_prefix) { using index_type = IndexNorm; - using replace_type = RPT; - using ext_replace_type = DRPT; + using replace_type = RPT; + using ext_replace_type = DRPT; constexpr bool isDir = ct::is_dir(); constexpr bool isExc = ct::is_exc_msi() || ct::is_exc_mesi(); static_assert(!(isExc && EnMT), "multithread support ia not available for exclusive caches!"); @@ -111,7 +111,7 @@ inline auto cache_gen(int size, const std::string& name_prefix) { } template class RPT, + template class RPT, template class CPT, typename Policy, bool uncached, typename DLY, bool EnMon, bool EnMT = false> inline auto cache_gen_l1(int size, const std::string& name_prefix) { @@ -119,7 +119,7 @@ inline auto cache_gen_l1(int size, const std::string& name_prefix) { } template class RPT, + template class RPT, template class CPT, typename Policy, bool uncached, typename DLY, bool EnMon, bool EnMT = false> inline auto cache_gen_inc(int size, const std::string& name_prefix) { @@ -127,7 +127,7 @@ inline auto cache_gen_inc(int size, const std::string& name_prefix) { } template class RPT, + template class RPT, template typename CPT, typename Policy, bool uncached, typename DLY, bool EnMon> inline auto cache_gen_exc(int size, const std::string& name_prefix) { @@ -136,8 +136,8 @@ inline auto cache_gen_exc(int size, const std::string& name_prefix) { } template class RPT, - template class DRPT, + template class RPT, + template class DRPT, template class CPT, typename Policy, bool uncached, typename DLY, bool EnMon> inline auto cache_gen_exc(int size, const std::string& name_prefix) { @@ -148,15 +148,15 @@ inline auto cache_gen_exc(int size, const std::string& name_prefix) { namespace ct { namespace mirage { template class MRPT, - template class DRPT, + template class MRPT, + template class DRPT, typename Outer, typename DLY, bool EnMon, bool EnableRelocation> struct types { using meta_index_type = IndexSkewed; using data_index_type = IndexRandom; - using meta_replace_type = MRPT; - using data_replace_type = DRPT; + using meta_replace_type = MRPT; + using data_replace_type = DRPT; using meta_metadata_type = MirageMetadataMSIBroadcast<48,0,6>; using data_metadata_type = MirageDataMeta; using cache_base_type = MirageCache