Skip to content

Commit

Permalink
Rate Limit Scan Statistics
Browse files Browse the repository at this point in the history
This pull request uses a `TokenBucket` to limit the number of keys that
read from `ScanStatistics` within a certain period of time.

Fixes: cockroachdb#2778
  • Loading branch information
raggar committed Aug 2, 2023
1 parent 32fc647 commit 03a3d55
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 55 deletions.
32 changes: 31 additions & 1 deletion db.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/cockroachdb/pebble/sstable"
"github.com/cockroachdb/pebble/vfs"
"github.com/cockroachdb/pebble/vfs/atomicfs"
"github.com/cockroachdb/tokenbucket"
"github.com/prometheus/client_golang/prometheus"
)

Expand Down Expand Up @@ -1183,6 +1184,7 @@ func (d *DB) ScanInternal(
visitRangeKey func(start, end []byte, keys []rangekey.Key) error,
visitSharedFile func(sst *SharedSSTMeta) error,
includeObsoleteKeys bool,
rateLimitFunc func(key *InternalKey, val LazyValue),
) error {
scanInternalOpts := &scanInternalOptions{
visitPointKey: visitPointKey,
Expand All @@ -1191,6 +1193,7 @@ func (d *DB) ScanInternal(
visitSharedFile: visitSharedFile,
skipSharedLevels: visitSharedFile != nil,
includeObsoleteKeys: includeObsoleteKeys,
rateLimitFunc: rateLimitFunc,
IterOptions: IterOptions{
KeyTypes: IterKeyTypePointsAndRanges,
LowerBound: lower,
Expand Down Expand Up @@ -2670,11 +2673,37 @@ type LSMKeyStatistics struct {
bytesRead uint64
}

// ScanStatisticsOptions is used by DB.ScanStatistics.
type ScanStatisticsOptions struct {
// LimitBytesPerSecond indicates the number of bytes that are able to be read
// per second using ScanInternal.
LimitBytesPerSecond int64
}

// ScanStatistics returns the count of different key kinds within the lsm for a
// key span [lower, upper) as well as the number of snapshot keys.
func (d *DB) ScanStatistics(ctx context.Context, lower, upper []byte) (LSMKeyStatistics, error) {
func (d *DB) ScanStatistics(
ctx context.Context, lower, upper []byte, opts ScanStatisticsOptions,
) (LSMKeyStatistics, error) {
stats := LSMKeyStatistics{}
var prevKey InternalKey
var rateLimitFunc func(key *InternalKey, val LazyValue)
tb := tokenbucket.TokenBucket{}

if opts.LimitBytesPerSecond != 0 {
// Each "token" roughly corresponds to a byte that was read.
tb.Init(tokenbucket.TokensPerSecond(opts.LimitBytesPerSecond), tokenbucket.Tokens(1024))
rateLimitFunc = func(key *InternalKey, val LazyValue) {
for {
fulfilled, tryAgainAfter := tb.TryToFulfill(tokenbucket.Tokens(key.Size() + val.Len()))

if fulfilled {
break
}
time.Sleep(tryAgainAfter)
}
}
}

err := d.ScanInternal(ctx, lower, upper,
func(key *InternalKey, value LazyValue, iterInfo IteratorLevel) error {
Expand Down Expand Up @@ -2712,6 +2741,7 @@ func (d *DB) ScanStatistics(ctx context.Context, lower, upper []byte) (LSMKeySta
},
nil,
true,
rateLimitFunc,
)

if err != nil {
Expand Down
4 changes: 3 additions & 1 deletion ingest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,9 @@ func TestIngestShared(t *testing.T) {
sharedSSTs = append(sharedSSTs, *sst)
return nil
},
false)
false,
nil, /* rateLimitFunc */
)
require.NoError(t, err)
require.NoError(t, w.Close())

Expand Down
3 changes: 3 additions & 0 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,9 @@ type scanInternalOptions struct {
// includeObsoleteKeys specifies whether keys shadowed by newer internal keys
// are exposed. If false, only one internal key per user key is exposed.
includeObsoleteKeys bool

// rateLimitFunc is used to limit the amount of bytes read per second.
rateLimitFunc func(key *InternalKey, value LazyValue)
}

// RangeKeyMasking configures automatic hiding of point keys by range keys. A
Expand Down
46 changes: 5 additions & 41 deletions scan_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,6 @@ func (p *pointCollapsingIterator) String() string {

var _ internalIterator = &pointCollapsingIterator{}

<<<<<<< HEAD
type iteratorLevelKind int8

const (
Expand All @@ -374,14 +373,6 @@ type IteratorLevel struct {
Level int
// Sublevel is only valid if kind == iteratorLevelLSM and Level == 0.
Sublevel int
=======
// This is used with scanInternalIterator to surface additional iterator-specific info where possible.
// Note: this is struct is only provided for point keys.
type iterInfo struct {
// level indicates the level of point key called with visitPointKey (level == -1 indicates an invalid level).
// Note: level may be inaccurate if scanInternalOptions.includeObsoleteKeys is False.
level int
>>>>>>> ba2f425f (pebble: Collect Pebble Key Statistics)
}

// scanInternalIterator is an iterator that returns all internal keys, including
Expand Down Expand Up @@ -410,11 +401,7 @@ type scanInternalIterator struct {
newIters tableNewIters
newIterRangeKey keyspan.TableNewSpanIter
seqNum uint64
<<<<<<< HEAD
iterLevels []IteratorLevel
=======
iterInfo []iterInfo
>>>>>>> ba2f425f (pebble: Collect Pebble Key Statistics)
mergingIter *mergingIter

// boundsBuf holds two buffers used to store the lower and upper bounds.
Expand Down Expand Up @@ -647,6 +634,11 @@ func scanInternalImpl(

for valid := iter.seekGE(lower); valid && iter.error() == nil; valid = iter.next() {
key := iter.unsafeKey()

if opts.rateLimitFunc != nil {
opts.rateLimitFunc(key, iter.lazyValue())
}

switch key.Kind() {
case InternalKeyKindRangeKeyDelete, InternalKeyKindRangeKeyUnset, InternalKeyKindRangeKeySet:
if opts.visitRangeKey != nil {
Expand All @@ -664,24 +656,12 @@ func scanInternalImpl(
}
default:
if opts.visitPointKey != nil {
<<<<<<< HEAD
var info IteratorLevel
if len(iter.mergingIter.heap.items) > 0 {
mergingIterIdx := iter.mergingIter.heap.items[0].index
info = iter.iterLevels[mergingIterIdx]
} else {
info = IteratorLevel{kind: iteratorLevelUnknown}
=======
var info iterInfo
if len(iter.mergingIter.heap.items) > 0 {
mergingIterIdx := iter.mergingIter.heap.items[0].index
info = iter.iterInfo[mergingIterIdx]
} else {
// Point key does not have a valid level (mergingIter heap is empty).
info = iterInfo{
level: -1,
}
>>>>>>> ba2f425f (pebble: Collect Pebble Key Statistics)
}
val := iter.lazyValue()
if err := opts.visitPointKey(key, val, info); err != nil {
Expand Down Expand Up @@ -733,11 +713,7 @@ func (i *scanInternalIterator) constructPointIter(memtables flushableList, buf *
rangeDelIters := make([]keyspan.FragmentIterator, 0, numMergingLevels)
rangeDelLevels := make([]keyspan.LevelIter, 0, numLevelIters)

<<<<<<< HEAD
i.iterLevels = make([]IteratorLevel, numMergingLevels)
=======
i.iterInfo = make([]iterInfo, numMergingLevels)
>>>>>>> ba2f425f (pebble: Collect Pebble Key Statistics)
mlevelsIndex := 0

// Next are the memtables.
Expand All @@ -746,14 +722,10 @@ func (i *scanInternalIterator) constructPointIter(memtables flushableList, buf *
mlevels = append(mlevels, mergingIterLevel{
iter: mem.newIter(&i.opts.IterOptions),
})
<<<<<<< HEAD
i.iterLevels[mlevelsIndex] = IteratorLevel{
kind: iteratorLevelFlushable,
FlushableIndex: j,
}
=======
i.iterInfo[mlevelsIndex] = iterInfo{level: -1}
>>>>>>> ba2f425f (pebble: Collect Pebble Key Statistics)
mlevelsIndex++
if rdi := mem.newRangeDelIter(&i.opts.IterOptions); rdi != nil {
rangeDelIters = append(rangeDelIters, rdi)
Expand Down Expand Up @@ -785,15 +757,11 @@ func (i *scanInternalIterator) constructPointIter(memtables flushableList, buf *
}

for j := len(current.L0SublevelFiles) - 1; j >= 0; j-- {
<<<<<<< HEAD
i.iterLevels[mlevelsIndex] = IteratorLevel{
kind: iteratorLevelLSM,
Level: 0,
Sublevel: j,
}
=======
i.iterInfo[mlevelsIndex] = iterInfo{level: 0}
>>>>>>> ba2f425f (pebble: Collect Pebble Key Statistics)
addLevelIterForFiles(current.L0SublevelFiles[j].Iter(), manifest.L0Sublevel(j))
}
// Add level iterators for the non-empty non-L0 levels.
Expand All @@ -804,11 +772,7 @@ func (i *scanInternalIterator) constructPointIter(memtables flushableList, buf *
if i.opts.skipSharedLevels && level >= sharedLevelsStart {
continue
}
<<<<<<< HEAD
i.iterLevels[mlevelsIndex] = IteratorLevel{kind: iteratorLevelLSM, Level: level}
=======
i.iterInfo[mlevelsIndex] = iterInfo{level: level}
>>>>>>> ba2f425f (pebble: Collect Pebble Key Statistics)
addLevelIterForFiles(current.Levels[level].Iter(), manifest.Level(level))
}

Expand Down
9 changes: 4 additions & 5 deletions scan_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ func TestScanStatistics(t *testing.T) {
ScanStatistics(
ctx context.Context,
lower, upper []byte,
opts ScanStatisticsOptions,
) (LSMKeyStatistics, error)
}
batches := map[string]*Batch{}
Expand Down Expand Up @@ -163,7 +164,7 @@ func TestScanStatistics(t *testing.T) {
default:
}
}
stats, err := reader.ScanStatistics(ctx, lower, upper)
stats, err := reader.ScanStatistics(ctx, lower, upper, ScanStatisticsOptions{})
if err != nil {
return err.Error()
}
Expand Down Expand Up @@ -208,6 +209,7 @@ func TestScanInternal(t *testing.T) {
visitRangeKey func(start, end []byte, keys []rangekey.Key) error,
visitSharedFile func(sst *SharedSSTMeta) error,
includeObsoleteKeys bool,
rateLimitFunc func(key *InternalKey, val LazyValue),
) error
}
batches := map[string]*Batch{}
Expand Down Expand Up @@ -413,11 +415,7 @@ func TestScanInternal(t *testing.T) {
}
}
err := reader.ScanInternal(context.TODO(), lower, upper,
<<<<<<< HEAD
func(key *InternalKey, value LazyValue, _ IteratorLevel) error {
=======
func(key *InternalKey, value LazyValue, _ iterInfo) error {
>>>>>>> ba2f425f (pebble: Collect Pebble Key Statistics)
v := value.InPlaceValue()
fmt.Fprintf(&b, "%s (%s)\n", key, v)
return nil
Expand All @@ -433,6 +431,7 @@ func TestScanInternal(t *testing.T) {
},
fileVisitor,
false,
nil, /* rateLimitFunc */
)
if err != nil {
return err.Error()
Expand Down
2 changes: 2 additions & 0 deletions snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ func (s *Snapshot) ScanInternal(
visitRangeKey func(start, end []byte, keys []rangekey.Key) error,
visitSharedFile func(sst *SharedSSTMeta) error,
includeObsoleteKeys bool,
rateLimitFunc func(key *InternalKey, value LazyValue),
) error {
if s.db == nil {
panic(ErrClosed)
Expand All @@ -82,6 +83,7 @@ func (s *Snapshot) ScanInternal(
visitSharedFile: visitSharedFile,
skipSharedLevels: visitSharedFile != nil,
includeObsoleteKeys: includeObsoleteKeys,
rateLimitFunc: rateLimitFunc,
IterOptions: IterOptions{
KeyTypes: IterKeyTypePointsAndRanges,
LowerBound: lower,
Expand Down
7 changes: 0 additions & 7 deletions testdata/scan_statistics
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,10 @@ set e foo
----
committed 2 keys

<<<<<<< HEAD
scan-statistics lower=b upper=f keys=(SET)
----
Aggregate:
SET key count: 2
=======
scan-statistics lower=b upper=e keys=(SET)
----
Aggregate:
SET key count: 0
>>>>>>> ba2f425f (pebble: Collect Pebble Key Statistics)

flush
----
Expand Down

0 comments on commit 03a3d55

Please sign in to comment.