Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix replacer concurrency issue with atomic variable #155

Merged
merged 1 commit into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cache/exclusive.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ class CacheSkewedExclusive : public CacheSkewed<IW, NW, P, MT, DT, IDX, RPC, DLY
// EnMon: whether to enable monitoring
// EnDir: whether to enable use directory
template<int IW, int NW, typename MT, typename DT, typename IDX, typename RPC, typename DLY, bool EnMon>
using CacheNormExclusiveBroadcast = CacheSkewedExclusive<IW, NW, 0, 1, MT, DT, IDX, RPC, ReplaceRandom<1,1>, DLY, EnMon, false>;
using CacheNormExclusiveBroadcast = CacheSkewedExclusive<IW, NW, 0, 1, MT, DT, IDX, RPC, ReplaceRandom<1,1,true,true,false>, DLY, EnMon, false>;

template<typename Policy, bool EnMT>
class ExclusiveInnerCohPortUncachedBroadcast : public InnerCohPortUncached<Policy, EnMT>
Expand Down
151 changes: 97 additions & 54 deletions cache/replace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,23 @@
#include "util/random.hpp"
#include "util/multithread.hpp"

#include <version>
#ifdef __cpp_lib_bitops
// for the popcount() supported in C++20
#include <bit>
#endif

///////////////////////////////////
// Base class
// EF: empty first
template<bool EF>
// EF: empty first, EnMT: multithread
template<bool EF, int NW, bool EnMT> requires NW <= 64
class ReplaceFuncBase
{
const uint32_t NW;
protected:
std::vector<std::vector<uint32_t> > used_map; // at the size of 16, vector is actually faster than list and do not require alloc
std::vector<std::vector<bool> > free_map;
std::vector<uint64_t> free_map_st; // free map when single thread
std::vector<std::atomic<uint64_t> *> free_map_mt; // multi-thread version
std::vector<int32_t> alloc_map; // record the way allocated for the next access (only one allocated ay at any time)
std::vector<uint32_t> free_num;

#ifdef CHECK_MULTI
#ifdef BOOST_STACKTRACE_LINK
Expand All @@ -28,26 +33,47 @@ class ReplaceFuncBase
#endif
#endif

__always_inline uint32_t alloc_from_free(uint32_t s) {
free_num[s]--;
for(uint32_t i=0; i<NW; i++)
if(free_map[s][i]) {
free_map[s][i] = false;
return i;
}

assert(0 == "replacer free_map corrupted!");
return -1;
__always_inline int32_t alloc_from_free(uint32_t s) {
while(true) {
auto fmap = EnMT ? free_map_mt[s]->load() : free_map_st[s];
if(fmap) {
auto way_oh = fmap & (~fmap + 1ull);
if constexpr (EnMT) {
if(!free_map_mt[s]->compare_exchange_strong(fmap, fmap & ~way_oh)) continue;
} else {
free_map_st[s] &= ~way_oh;
}
for(int i=0; i<64; i++) if(way_oh == (1ull << i)) return i;
assert(0 == "replacer free_map corrupted!");
return -1;
} else
return -1;
}
}

virtual uint32_t select(uint32_t s) = 0;

__always_inline void delist_from_free(uint32_t s, uint32_t w, bool demand_acc) {
// in multithread simulation, a simultaneous probe may invalidate a cache block waiting for permission promotion
if(!free_map[s][w]) return;
assert(demand_acc); // assume such situation can occur only in permission promotion
free_map[s][w] = false;
free_num[s]--;
__always_inline void delist_from_free(uint32_t s, uint32_t w) {
uint64_t way_oh = 1ull << w;
if constexpr (EnMT) {
while(true) {
auto fmap = free_map_mt[s]->load();
if(0 == (fmap & way_oh)) return;
if(free_map_mt[s]->compare_exchange_strong(fmap, fmap & ~way_oh)) return;
}
} else
free_map_st[s] &= ~way_oh;
}

__always_inline void list_to_free(uint32_t s, uint32_t w) {
uint64_t way_oh = 1ull << w;
if constexpr (EnMT) {
while(true) {
auto fmap = free_map_mt[s]->load();
if(free_map_mt[s]->compare_exchange_strong(fmap, fmap | way_oh)) return;
}
} else
free_map_st[s] |= way_oh;
}

__always_inline void set_alloc_map(uint32_t s, int32_t v) {
Expand Down Expand Up @@ -90,54 +116,71 @@ class ReplaceFuncBase
}

public:
ReplaceFuncBase(uint32_t nset, uint32_t nway)
:NW(nway), used_map(nset), free_map(nset), alloc_map(nset, -1), free_num(nset, nway) {
ReplaceFuncBase(uint32_t nset)
:used_map(nset), free_map_st(nset), free_map_mt(nset, nullptr), alloc_map(nset, -1) {
#ifdef CHECK_MULTI
#ifdef BOOST_STACKTRACE_LINK
alloc_record.resize(nset, {0, ""});
#else
alloc_record.resize(nset, 0);
#endif
#endif
for (auto &s: free_map) s.resize(NW, true);
constexpr uint64_t fmap = NW < 64 ? (1ull << NW) - 1 : ~(0ull);
if constexpr (EnMT) {
for (auto &s: free_map_mt) s = new std::atomic<uint64_t>(fmap);
} else
for (auto &s: free_map_st) s = fmap;
}

virtual ~ReplaceFuncBase() = default;
virtual ~ReplaceFuncBase() {
if constexpr (EnMT) for (auto s: free_map_mt) delete s;
}

__always_inline uint32_t get_free_num(uint32_t s) const { return free_num[s]; }
__always_inline uint32_t get_free_num(uint32_t s) { // return the number of free places by popcount the free map
auto fmap = EnMT ? free_map_mt[s]->load() : free_map_st[s];
#ifdef __cpp_lib_bitops
return std::popcount(fmap);
#elif defined __GNUG__
return __builtin_popcountll(fmap);
#else
uint32_t rv = 0;
while(fmap) {
rv += (fmap & 0x1ull);
fmap >> 1;
}
return rv;
#endif
}

virtual void replace(uint32_t s, uint32_t *w) {
uint32_t i = 0;
int32_t i = 0;
if constexpr (EF) {
if(free_num[s] > 0) i = alloc_from_free(s);
else i = select(s);
i = alloc_from_free(s);
if (i<0) i = select(s);
} else {
i = select(s);
if(free_map[s][i]) { free_num[s]--; free_map[s][i] = false; }
delist_from_free(s, i);
}
assert(i < NW || 0 == "replacer used_map corrupted!");
assert((uint32_t)i < NW || 0 == "replacer used_map corrupted!");
this->set_alloc_map(s, i);
*w = i;
}

virtual void access(uint32_t s, uint32_t w, bool demand_acc, bool prefetch) = 0;

virtual void invalid(uint32_t s, uint32_t w) {
if((int32_t)w != alloc_map[s]) {
free_map[s][w] = true;
free_num[s]++;
}
if((int32_t)w != alloc_map[s]) list_to_free(s, w);
}
};

/////////////////////////////////
// FIFO replacement
// IW: index width, NW: number of ways
// EF: empty first, DUO: demand update only (do not update state for release)
template<int IW, int NW, bool EF = true, bool DUO = true>
class ReplaceFIFO : public ReplaceFuncBase<EF>
template<int IW, int NW, bool EF, bool DUO, bool EnMT>
class ReplaceFIFO : public ReplaceFuncBase<EF, NW, EnMT>
{
typedef ReplaceFuncBase<EF> RPT;
typedef ReplaceFuncBase<EF, NW, EnMT> RPT;
protected:
using RPT::alloc_map;
using RPT::used_map;
Expand All @@ -150,7 +193,7 @@ class ReplaceFIFO : public ReplaceFuncBase<EF>
}

public:
ReplaceFIFO() : RPT(1ul << IW, NW) {
ReplaceFIFO() : RPT(1ul << IW) {
for (auto &s: used_map) {
s.resize(NW);
for(uint32_t i=0; i<NW; i++) s[i] = i;
Expand All @@ -169,18 +212,18 @@ class ReplaceFIFO : public ReplaceFuncBase<EF>
used_map[s][w] = 0; // insert at LRU position
}
}
RPT::delist_from_free(s, w, demand_acc);
if constexpr (EnMT) RPT::delist_from_free(s, w);
}
};

/////////////////////////////////
// LRU replacement
// IW: index width, NW: number of ways
// EF: empty first, DUO: demand update only (do not update state for release)
template<int IW, int NW, bool EF = true, bool DUO = true>
class ReplaceLRU : public ReplaceFIFO<IW, NW, EF, DUO>
template<int IW, int NW, bool EF, bool DUO, bool EnMT>
class ReplaceLRU : public ReplaceFIFO<IW, NW, EF, DUO, EnMT>
{
typedef ReplaceFuncBase<EF> RPT;
typedef ReplaceFuncBase<EF, NW, EnMT> RPT;
protected:
using RPT::alloc_map;
using RPT::used_map;
Expand All @@ -198,18 +241,18 @@ class ReplaceLRU : public ReplaceFIFO<IW, NW, EF, DUO>
}
}
if((int32_t)w == alloc_map[s] && demand_acc) this->set_alloc_map(s, -1);
RPT::delist_from_free(s, w, demand_acc);
if constexpr (EnMT) RPT::delist_from_free(s, w);
}
};

/////////////////////////////////
// Static RRIP replacement
// IW: index width, NW: number of ways
// EF: empty first, DUO: demand update only (do not update state for release)
template<int IW, int NW, bool EF = true, bool DUO = true>
class ReplaceSRRIP : public ReplaceFuncBase<EF>
template<int IW, int NW, bool EF, bool DUO, bool EnMT>
class ReplaceSRRIP : public ReplaceFuncBase<EF, NW, EnMT>
{
typedef ReplaceFuncBase<EF> RPT;
typedef ReplaceFuncBase<EF, NW, EnMT> RPT;
protected:
using RPT::used_map;
using RPT::alloc_map;
Expand All @@ -224,7 +267,7 @@ class ReplaceSRRIP : public ReplaceFuncBase<EF>
}

public:
ReplaceSRRIP() : RPT(1ul << IW, NW) {
ReplaceSRRIP() : RPT(1ul << IW) {
for (auto &s: used_map) s.resize(NW, 3);
}

Expand All @@ -236,7 +279,7 @@ class ReplaceSRRIP : public ReplaceFuncBase<EF>
used_map[s][w] = 3;
}
if((int32_t)w == alloc_map[s] && demand_acc) this->set_alloc_map(s, -1);
RPT::delist_from_free(s, w, demand_acc);
if constexpr (EnMT) RPT::delist_from_free(s, w, demand_acc);
}

virtual void invalid(uint32_t s, uint32_t w) override {
Expand All @@ -249,10 +292,10 @@ class ReplaceSRRIP : public ReplaceFuncBase<EF>
// Random replacement
// IW: index width, NW: number of ways
// EF: empty first, DUO: demand update only (do not update state for release)
template<int IW, int NW, bool EF = true, bool DUO = true>
class ReplaceRandom : public ReplaceFuncBase<EF>
template<int IW, int NW, bool EF, bool DUO, bool EnMT>
class ReplaceRandom : public ReplaceFuncBase<EF, NW, EnMT>
{
typedef ReplaceFuncBase<EF> RPT;
typedef ReplaceFuncBase<EF, NW, EnMT> RPT;
protected:
using RPT::alloc_map;

Expand All @@ -263,12 +306,12 @@ class ReplaceRandom : public ReplaceFuncBase<EF>
}

public:
ReplaceRandom() : RPT(1ul << IW, NW), loc_random(cm_alloc_rand32()) {}
ReplaceRandom() : RPT(1ul << IW), loc_random(cm_alloc_rand32()) {}
virtual ~ReplaceRandom() override { delete loc_random; }

virtual void access(uint32_t s, uint32_t w, bool demand_acc, bool prefetch) override {
if((int32_t)w == alloc_map[s] && demand_acc) this->set_alloc_map(s, -1);
RPT::delist_from_free(s, w, demand_acc);
if constexpr (EnMT) RPT::delist_from_free(s, w, demand_acc);
}
};

Expand Down
26 changes: 13 additions & 13 deletions util/cache_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,14 @@ inline auto get_l1_core_interface(std::vector<CoherentCacheBase *>& array) {
}

template<int IW, int WN, int DW, typename DT, typename MT,
template <int, int, bool> class RPT,
template <int, int, bool> class DRPT,
template <int, int, bool, bool, bool> class RPT,
template <int, int, bool, bool, bool> class DRPT,
template <bool, bool, typename> class CPT, typename Policy,
bool isL1, bool uncached, typename DLY, bool EnMon, bool EnMT>
inline auto cache_gen(int size, const std::string& name_prefix) {
using index_type = IndexNorm<IW,6>;
using replace_type = RPT<IW,WN,true>;
using ext_replace_type = DRPT<IW,DW,true>;
using replace_type = RPT<IW,WN,true,true,EnMT>;
using ext_replace_type = DRPT<IW,DW,true,true,EnMT>;
constexpr bool isDir = ct::is_dir<MT>();
constexpr bool isExc = ct::is_exc_msi<CPT>() || ct::is_exc_mesi<CPT>();
static_assert(!(isExc && EnMT), "multithread support ia not available for exclusive caches!");
Expand All @@ -111,23 +111,23 @@ inline auto cache_gen(int size, const std::string& name_prefix) {
}

template<int IW, int WN, typename DT, typename MT,
template <int, int, bool> class RPT,
template <int, int, bool, bool, bool> class RPT,
template <bool, bool, typename> class CPT, typename Policy,
bool uncached, typename DLY, bool EnMon, bool EnMT = false>
inline auto cache_gen_l1(int size, const std::string& name_prefix) {
return cache_gen<IW, WN, 1, DT, MT, RPT, ReplaceLRU, CPT, Policy, true, uncached, DLY, EnMon, EnMT>(size, name_prefix);
}

template<int IW, int WN, typename DT, typename MT,
template <int, int, bool> class RPT,
template <int, int, bool, bool, bool> class RPT,
template <bool, bool, typename> class CPT, typename Policy,
bool uncached, typename DLY, bool EnMon, bool EnMT = false>
inline auto cache_gen_inc(int size, const std::string& name_prefix) {
return cache_gen<IW, WN, 1, DT, MT, RPT, ReplaceLRU, CPT, Policy, false, uncached, DLY, EnMon, EnMT>(size, name_prefix);
}

template<int IW, int WN, typename DT, typename MT,
template <int, int, bool> class RPT,
template <int, int, bool, bool, bool> class RPT,
template <bool, bool, typename> typename CPT, typename Policy,
bool uncached, typename DLY, bool EnMon>
inline auto cache_gen_exc(int size, const std::string& name_prefix) {
Expand All @@ -136,8 +136,8 @@ inline auto cache_gen_exc(int size, const std::string& name_prefix) {
}

template<int IW, int WN, int DW, typename DT, typename MT,
template <int, int, bool> class RPT,
template <int, int, bool> class DRPT,
template <int, int, bool, bool, bool> class RPT,
template <int, int, bool, bool, bool> class DRPT,
template <bool, bool, typename> class CPT, typename Policy,
bool uncached, typename DLY, bool EnMon>
inline auto cache_gen_exc(int size, const std::string& name_prefix) {
Expand All @@ -148,15 +148,15 @@ inline auto cache_gen_exc(int size, const std::string& name_prefix) {
namespace ct {
namespace mirage {
template<int IW, int WN, int EW, int P, int MaxRelocN, typename DT,
template <int, int, bool> class MRPT,
template <int, int, bool> class DRPT,
template <int, int, bool, bool, bool> class MRPT,
template <int, int, bool, bool, bool> class DRPT,
typename Outer,
typename DLY, bool EnMon, bool EnableRelocation>
struct types {
using meta_index_type = IndexSkewed<IW, 6, P>;
using data_index_type = IndexRandom<IW, 6>;
using meta_replace_type = MRPT<IW, WN, true>;
using data_replace_type = DRPT<IW, WN*P, true>;
using meta_replace_type = MRPT<IW, WN, true, true, false>;
using data_replace_type = DRPT<IW, WN*P, true, true, false>;
using meta_metadata_type = MirageMetadataMSIBroadcast<48,0,6>;
using data_metadata_type = MirageDataMeta;
using cache_base_type = MirageCache<IW, WN, EW, P, MaxRelocN,
Expand Down
Loading