Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cap eviction effort (CPU under stress) in HyperClockCache #12141

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cache/cache_bench_tool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ DEFINE_uint64(cache_size, 1 * GiB,
"Number of bytes to use as a cache of uncompressed data.");
DEFINE_int32(num_shard_bits, -1,
"ShardedCacheOptions::shard_bits. Default = auto");
DEFINE_int32(
eviction_effort_cap,
ROCKSDB_NAMESPACE::HyperClockCacheOptions(1, 1).eviction_effort_cap,
"HyperClockCacheOptions::eviction_effort_cap");

DEFINE_double(resident_ratio, 0.25,
"Ratio of keys fitting in cache to keyspace.");
Expand Down Expand Up @@ -391,6 +395,7 @@ class CacheBench {
FLAGS_cache_size, /*estimated_entry_charge=*/0, FLAGS_num_shard_bits);
opts.hash_seed = BitwiseAnd(FLAGS_seed, INT32_MAX);
opts.memory_allocator = allocator;
opts.eviction_effort_cap = FLAGS_eviction_effort_cap;
if (FLAGS_cache_type == "fixed_hyper_clock_cache" ||
FLAGS_cache_type == "hyper_clock_cache") {
opts.estimated_entry_charge = FLAGS_value_bytes_estimate > 0
Expand Down
135 changes: 94 additions & 41 deletions cache/clock_cache.cc

Large diffs are not rendered by default.

68 changes: 49 additions & 19 deletions cache/clock_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include <array>
#include <atomic>
#include <climits>
#include <cstddef>
#include <cstdint>
#include <memory>
Expand Down Expand Up @@ -374,6 +375,14 @@ struct ClockHandle : public ClockHandleBasicData {

class BaseClockTable {
public:
struct BaseOpts {
explicit BaseOpts(int _eviction_effort_cap)
: eviction_effort_cap(_eviction_effort_cap) {}
explicit BaseOpts(const HyperClockCacheOptions& opts)
: BaseOpts(opts.eviction_effort_cap) {}
int eviction_effort_cap;
};

BaseClockTable(CacheMetadataChargePolicy metadata_charge_policy,
MemoryAllocator* allocator,
const Cache::EvictionCallback* eviction_callback,
Expand All @@ -386,13 +395,13 @@ class BaseClockTable {
template <class Table>
typename Table::HandleImpl* CreateStandalone(ClockHandleBasicData& proto,
size_t capacity,
bool strict_capacity_limit,
uint32_t eec_and_scl,
bool allow_uncharged);

template <class Table>
Status Insert(const ClockHandleBasicData& proto,
typename Table::HandleImpl** handle, Cache::Priority priority,
size_t capacity, bool strict_capacity_limit);
size_t capacity, uint32_t eec_and_scl);

void Ref(ClockHandle& handle);

Expand All @@ -406,12 +415,17 @@ class BaseClockTable {

uint64_t GetYieldCount() const { return yield_count_.LoadRelaxed(); }

uint64_t GetEvictionEffortExceededCount() const {
return eviction_effort_exceeded_count_.LoadRelaxed();
}

struct EvictionData {
size_t freed_charge = 0;
size_t freed_count = 0;
size_t seen_pinned_count = 0;
};

void TrackAndReleaseEvictedEntry(ClockHandle* h, EvictionData* data);
void TrackAndReleaseEvictedEntry(ClockHandle* h);

#ifndef NDEBUG
// Acquire N references
Expand All @@ -436,6 +450,7 @@ class BaseClockTable {
template <class Table>
Status ChargeUsageMaybeEvictStrict(size_t total_charge, size_t capacity,
bool need_evict_for_occupancy,
uint32_t eviction_effort_cap,
typename Table::InsertState& state);

// Helper for updating `usage_` for new entry with given `total_charge`
Expand All @@ -449,6 +464,7 @@ class BaseClockTable {
template <class Table>
bool ChargeUsageMaybeEvictNonStrict(size_t total_charge, size_t capacity,
bool need_evict_for_occupancy,
uint32_t eviction_effort_cap,
typename Table::InsertState& state);

protected: // data
Expand All @@ -461,9 +477,15 @@ class BaseClockTable {
RelaxedAtomic<uint64_t> clock_pointer_{};

// Counter for number of times we yield to wait on another thread.
// It is normal for this to occur rarely in normal operation.
// (Relaxed: a simple stat counter.)
RelaxedAtomic<uint64_t> yield_count_{};

// Counter for number of times eviction effort cap is exceeded.
// It is normal for this to occur rarely in normal operation.
// (Relaxed: a simple stat counter.)
RelaxedAtomic<uint64_t> eviction_effort_exceeded_count_{};

// TODO: is this separation needed if we don't do background evictions?
ALIGN_AS(CACHE_LINE_SIZE)
// Number of elements in the table.
Expand Down Expand Up @@ -517,17 +539,19 @@ class FixedHyperClockTable : public BaseClockTable {
inline void SetStandalone() { standalone = true; }
}; // struct HandleImpl

struct Opts {
explicit Opts(size_t _estimated_value_size)
: estimated_value_size(_estimated_value_size) {}
explicit Opts(const HyperClockCacheOptions& opts) {
struct Opts : public BaseOpts {
explicit Opts(size_t _estimated_value_size, int _eviction_effort_cap)
: BaseOpts(_eviction_effort_cap),
estimated_value_size(_estimated_value_size) {}
explicit Opts(const HyperClockCacheOptions& opts)
: BaseOpts(opts.eviction_effort_cap) {
assert(opts.estimated_entry_charge > 0);
estimated_value_size = opts.estimated_entry_charge;
}
size_t estimated_value_size;
};

FixedHyperClockTable(size_t capacity, bool strict_capacity_limit,
FixedHyperClockTable(size_t capacity,
CacheMetadataChargePolicy metadata_charge_policy,
MemoryAllocator* allocator,
const Cache::EvictionCallback* eviction_callback,
Expand All @@ -549,7 +573,8 @@ class FixedHyperClockTable : public BaseClockTable {
// Runs the clock eviction algorithm trying to reclaim at least
// requested_charge. Returns how much is evicted, which could be less
// if it appears impossible to evict the requested amount without blocking.
void Evict(size_t requested_charge, InsertState& state, EvictionData* data);
void Evict(size_t requested_charge, InsertState& state, EvictionData* data,
uint32_t eviction_effort_cap);

HandleImpl* Lookup(const UniqueId64x2& hashed_key);

Expand Down Expand Up @@ -803,18 +828,20 @@ class AutoHyperClockTable : public BaseClockTable {
}
}; // struct HandleImpl

struct Opts {
explicit Opts(size_t _min_avg_value_size)
: min_avg_value_size(_min_avg_value_size) {}
struct Opts : public BaseOpts {
explicit Opts(size_t _min_avg_value_size, int _eviction_effort_cap)
: BaseOpts(_eviction_effort_cap),
min_avg_value_size(_min_avg_value_size) {}

explicit Opts(const HyperClockCacheOptions& opts) {
explicit Opts(const HyperClockCacheOptions& opts)
: BaseOpts(opts.eviction_effort_cap) {
assert(opts.estimated_entry_charge == 0);
min_avg_value_size = opts.min_avg_entry_charge;
}
size_t min_avg_value_size;
};

AutoHyperClockTable(size_t capacity, bool strict_capacity_limit,
AutoHyperClockTable(size_t capacity,
CacheMetadataChargePolicy metadata_charge_policy,
MemoryAllocator* allocator,
const Cache::EvictionCallback* eviction_callback,
Expand All @@ -841,7 +868,8 @@ class AutoHyperClockTable : public BaseClockTable {
// Runs the clock eviction algorithm trying to reclaim at least
// requested_charge. Returns how much is evicted, which could be less
// if it appears impossible to evict the requested amount without blocking.
void Evict(size_t requested_charge, InsertState& state, EvictionData* data);
void Evict(size_t requested_charge, InsertState& state, EvictionData* data,
uint32_t eviction_effort_cap);

HandleImpl* Lookup(const UniqueId64x2& hashed_key);

Expand Down Expand Up @@ -906,7 +934,8 @@ class AutoHyperClockTable : public BaseClockTable {
// with proper handling to ensure all existing data is seen even in the
// presence of concurrent insertions, etc. (See implementation.)
template <class OpData>
void PurgeImpl(OpData* op_data, size_t home = SIZE_MAX);
void PurgeImpl(OpData* op_data, size_t home = SIZE_MAX,
EvictionData* data = nullptr);

// An RAII wrapper for locking a chain of entries for removals. See
// implementation.
Expand All @@ -916,7 +945,7 @@ class AutoHyperClockTable : public BaseClockTable {
// implementation.
template <class OpData>
void PurgeImplLocked(OpData* op_data, ChainRewriteLock& rewrite_lock,
size_t home);
size_t home, EvictionData* data);

// Update length_info_ as much as possible without waiting, given a known
// usable (ready for inserts and lookups) grow_home. (Previous grow_homes
Expand Down Expand Up @@ -1078,9 +1107,10 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShardBase {
// (Relaxed: eventual consistency/update is OK)
RelaxedAtomic<size_t> capacity_;

// Whether to reject insertion if cache reaches its full capacity.
// Encodes eviction_effort_cap (bottom 31 bits) and strict_capacity_limit
// (top bit). See HyperClockCacheOptions::eviction_effort_cap etc.
// (Relaxed: eventual consistency/update is OK)
RelaxedAtomic<bool> strict_capacity_limit_;
RelaxedAtomic<uint32_t> eec_and_scl_;
}; // class ClockCacheShard

template <class Table>
Expand Down
2 changes: 2 additions & 0 deletions cache/compressed_secondary_cache_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,8 @@ class CompressedSecCacheTestWithTiered
/*_capacity=*/0,
/*_estimated_entry_charge=*/256 << 10,
/*_num_shard_bits=*/0);
// eviction_effort_cap setting simply to avoid churn in existing test
hcc_opts.eviction_effort_cap = 100;
TieredCacheOptions opts;
lru_opts.capacity = 0;
lru_opts.num_shard_bits = 0;
Expand Down
60 changes: 58 additions & 2 deletions cache/lru_cache_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -389,12 +389,13 @@ class ClockCacheTest : public testing::Test {
}
}

void NewShard(size_t capacity, bool strict_capacity_limit = true) {
void NewShard(size_t capacity, bool strict_capacity_limit = true,
int eviction_effort_cap = 30) {
DeleteShard();
shard_ =
reinterpret_cast<Shard*>(port::cacheline_aligned_alloc(sizeof(Shard)));

TableOpts opts{1 /*value_size*/};
TableOpts opts{1 /*value_size*/, eviction_effort_cap};
new (shard_)
Shard(capacity, strict_capacity_limit, kDontChargeCacheMetadata,
/*allocator*/ nullptr, &eviction_callback_, &hash_seed_, opts);
Expand Down Expand Up @@ -445,12 +446,20 @@ class ClockCacheTest : public testing::Test {
return Slice(reinterpret_cast<const char*>(&hashed_key), 16U);
}

// A bad hash function for testing / stressing collision handling
static inline UniqueId64x2 TestHashedKey(char key) {
// For testing hash near-collision behavior, put the variance in
// hashed_key in bits that are unlikely to be used as hash bits.
return {(static_cast<uint64_t>(key) << 56) + 1234U, 5678U};
}

// A reasonable hash function, for testing "typical behavior" etc.
template <typename T>
static inline UniqueId64x2 CheapHash(T i) {
return {static_cast<uint64_t>(i) * uint64_t{0x85EBCA77C2B2AE63},
static_cast<uint64_t>(i) * uint64_t{0xC2B2AE3D27D4EB4F}};
}

Shard* shard_ = nullptr;

private:
Expand Down Expand Up @@ -683,6 +692,53 @@ TYPED_TEST(ClockCacheTest, ClockEvictionTest) {
}
}

TYPED_TEST(ClockCacheTest, ClockEvictionEffortCapTest) {
using HandleImpl = typename ClockCacheTest<TypeParam>::Shard::HandleImpl;
for (bool strict_capacity_limit : {true, false}) {
SCOPED_TRACE("strict_capacity_limit = " +
std::to_string(strict_capacity_limit));
for (int eec : {-42, 0, 1, 10, 100, 1000}) {
SCOPED_TRACE("eviction_effort_cap = " + std::to_string(eec));
constexpr size_t kCapacity = 1000;
// Start with much larger capacity to ensure that we can go way over
// capacity without reaching table occupancy limit.
this->NewShard(3 * kCapacity, strict_capacity_limit, eec);
auto& shard = *this->shard_;
shard.SetCapacity(kCapacity);

// Nearly fill the cache with pinned entries, then add a bunch of
// non-pinned entries. eviction_effort_cap should affect how many
// evictable entries are present beyond the cache capacity, despite
// being evictable.
constexpr size_t kCount = kCapacity - 1;
std::unique_ptr<HandleImpl* []> ha { new HandleImpl* [kCount] {} };
for (size_t i = 0; i < 2 * kCount; ++i) {
UniqueId64x2 hkey = this->CheapHash(i);
ASSERT_OK(shard.Insert(
this->TestKey(hkey), hkey, nullptr /*value*/, &kNoopCacheItemHelper,
1 /*charge*/, i < kCount ? &ha[i] : nullptr, Cache::Priority::LOW));
}

if (strict_capacity_limit) {
// If strict_capacity_limit is enabled, the cache will never exceed its
// capacity
EXPECT_EQ(shard.GetOccupancyCount(), kCapacity);
} else {
// Rough inverse relationship between cap and possible memory
// explosion, which shows up as increased table occupancy count.
int effective_eec = std::max(int{1}, eec) + 1;
EXPECT_NEAR(shard.GetOccupancyCount() * 1.0,
kCount * (1 + 1.4 / effective_eec),
kCount * (0.6 / effective_eec) + 1.0);
}

for (size_t i = 0; i < kCount; ++i) {
shard.Release(ha[i]);
}
}
}
}

namespace {
struct DeleteCounter {
int deleted = 0;
Expand Down
40 changes: 37 additions & 3 deletions include/rocksdb/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -380,9 +380,6 @@ inline std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
// to find the appropriate balance automatically.
// * Cache priorities are less aggressively enforced, which could cause
// cache dilution from long range scans (unless they use fill_cache=false).
// * Can be worse for small caches, because if almost all of a cache shard is
// pinned (more likely with non-partitioned filters), then CLOCK eviction
// becomes very CPU intensive.
//
// See internal cache/clock_cache.h for full description.
struct HyperClockCacheOptions : public ShardedCacheOptions {
Expand Down Expand Up @@ -441,6 +438,43 @@ struct HyperClockCacheOptions : public ShardedCacheOptions {
// load factor for efficient Lookup, Insert, etc.
size_t min_avg_entry_charge = 450;

// A tuning parameter to cap eviction CPU usage in a "thrashing" situation
// by allowing the memory capacity to be exceeded slightly as needed. The
// default setting should offer balanced protection against excessive CPU
// and memory usage under extreme stress conditions, with no effect on
// normal operation. Such stress conditions are proportionally more likely
// with small caches (10s of MB or less) vs. large caches (GB-scale).
// (NOTE: With the unusual setting of strict_capacity_limit=true, this
// parameter is ignored.)
//
// BACKGROUND: Without some kind of limiter, inserting into a CLOCK-based
// cache with no evictable entries (all "pinned") requires scanning the
// entire cache to determine that nothing can be evicted. (By contrast,
// LRU caches can determine no entries are evictable in O(1) time, but
// require more synchronization/coordination on that eviction metadata.)
// This aspect of a CLOCK cache can make a stressed situation worse by
// bogging down the CPU with repeated scans of the cache. And with
// strict_capacity_limit=false (normal setting), finding something evictable
// doesn't change the outcome of insertion: the entry is inserted anyway
// and the cache is allowed to exceed its target capacity if necessary.
//
// SOLUTION: Eviction is aborted upon seeing some number of pinned
// entries before evicting anything, or if the ratio of pinned to evicted
// is too high. This setting `eviction_effort_cap` essentially controls both
// that allowed initial number of pinned entries and the maximum allowed
// ratio. As the pinned size approaches the target cache capacity, roughly
// 1/eviction_effort_cap additional portion of the capacity might be kept
// in memory and evictable in order to keep CLOCK eviction reasonably
// performant. Under the default setting and high stress conditions, this
// memory overhead is around 3-5%. Under normal or even moderate stress
// conditions, the memory overhead is negligible to zero.
//
// A large value like 1000 offers some protection with essentially no
// memory overhead, while the minimum value of 1 could be useful for a
// small cache where roughly doubling in size under stress could be OK to
// keep operations very fast.
int eviction_effort_cap = 30;

HyperClockCacheOptions(
size_t _capacity, size_t _estimated_entry_charge,
int _num_shard_bits = -1, bool _strict_capacity_limit = false,
Expand Down
1 change: 1 addition & 0 deletions unreleased_history/behavior_changes/eviction_effort_cap.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
HyperClockCache now has built-in protection against excessive CPU consumption under the extreme stress condition of no (or very few) evictable cache entries, which can slightly increase memory usage such conditions. New option `HyperClockCacheOptions::eviction_effort_cap` controls the space-time trade-off of the response. The default should be generally well-balanced, with no measurable affect on normal operation.
Loading