Skip to content

Commit

Permalink
db: have excises wait for EFOS below them, reducing EFOS waits
Browse files Browse the repository at this point in the history
Previously, we had to make EventuallyFileOnlySnapshots (EFOS) wait
for all relevant keys in their key bounds to be flushed, just in case
an excise came in before that happened. This change avoids the need
to do that by assigning a seqnum to every excise (which will be
useful when we do #2676), and by flushing any memtables that
overlap with the protected ranges of any EFOS that has a sequence
number below the excise.

The reduction in waiting is desirable as actual conflicts between
excises and EventuallyFileOnlySnapshots are rare, while users of
EventuallyFileOnlySnapshots were previously stuck waiting or stuck
doing excessive flushes at times when no such excises were happening.

Fixes #3210.
  • Loading branch information
itsbilal committed Jan 31, 2024
1 parent fea6cbb commit 1d7852b
Show file tree
Hide file tree
Showing 13 changed files with 183 additions and 237 deletions.
8 changes: 0 additions & 8 deletions compaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -2067,14 +2067,6 @@ func (d *DB) flush1() (bytesFlushed uint64, err error) {
s = s.next
continue
}
if s.efos.excised.Load() {
// If a concurrent excise has happened that overlaps with one of the key
// ranges this snapshot is interested in, this EFOS cannot transition to
// a file-only snapshot as keys in that range could now be deleted. Move
// onto the next snapshot.
s = s.next
continue
}
currentVersion.Ref()

// NB: s.efos.transitionToFileOnlySnapshot could close s, in which
Expand Down
70 changes: 65 additions & 5 deletions ingest.go
Original file line number Diff line number Diff line change
Expand Up @@ -1392,6 +1392,52 @@ func (d *DB) ingest(
d.mu.Lock()
defer d.mu.Unlock()

// Check if any of the currently-open EventuallyFileOnlySnapshots overlap
// in key ranges with the excise span. If so, we need to check for memtable
// overlaps with all bounds of that EventuallyFileOnlySnapshot in addition
// to the ingestion's own bounds too.

if exciseSpan.Valid() {
for s := d.mu.snapshots.root.next; s != &d.mu.snapshots.root; s = s.next {
if s.efos == nil {
continue
}
if base.Visible(seqNum, s.efos.seqNum, base.InternalKeySeqNumMax) {
// We only worry about snapshots older than the excise. Any snapshots
// created after the excise should see the excised view of the LSM
// anyway.
//
// Since we delay publishing the excise seqnum as visible until after
// the apply step, this case will never be hit in practice until we
// make excises flushable ingests.
continue
}
if invariants.Enabled {
if s.efos.hasTransitioned() {
panic("unexpected transitioned EFOS in snapshots list")
}
}
for i := range s.efos.protectedRanges {
if !s.efos.protectedRanges[i].OverlapsKeyRange(d.cmp, exciseSpan) {
continue
}
// Our excise conflicts with this EFOS. We need to add its protected
// ranges to our overlapBounds. Grow overlapBounds in one allocation
// if necesary.
prs := s.efos.protectedRanges
if cap(overlapBounds) < len(overlapBounds)+len(prs) {
oldOverlapBounds := overlapBounds
overlapBounds = make([]bounded, len(oldOverlapBounds), len(oldOverlapBounds)+len(prs))
copy(overlapBounds, oldOverlapBounds)
}
for i := range prs {
overlapBounds = append(overlapBounds, &prs[i])
}
break
}
}
}

// Check to see if any files overlap with any of the memtables. The queue
// is ordered from oldest to newest with the mutable memtable being the
// last element in the slice. We want to wait for the newest table that
Expand Down Expand Up @@ -1419,7 +1465,8 @@ func (d *DB) ingest(
// flushable queue and switching to a new memtable.
metaFlushableOverlaps[v.FileNum] = true
case *KeyRange:
// An excise span; not a file.
// An excise span or an EventuallyFileOnlySnapshot protected range;
// not a file.
default:
panic("unreachable")
}
Expand Down Expand Up @@ -1491,6 +1538,13 @@ func (d *DB) ingest(
return
}

// If there's an excise being done atomically with the same ingest, we
// assign the lowest sequence number in the set of sequence numbers for this
// ingestion to the excise. Note that we've already allocated fileCount+1
// sequence numbers in this case.
if exciseSpan.Valid() {
seqNum++ // the first seqNum is reserved for the excise.
}
// Update the sequence numbers for all ingested sstables'
// metadata. When the version edit is applied, the metadata is
// written to the manifest, persisting the sequence number.
Expand Down Expand Up @@ -1524,8 +1578,12 @@ func (d *DB) ingest(
// the commit mutex which would prevent unrelated batches from writing their
// changes to the WAL and memtable. This will cause a bigger commit hiccup
// during ingestion.
seqNumCount := loadResult.fileCount
if exciseSpan.Valid() {
seqNumCount++
}
d.commit.ingestSem <- struct{}{}
d.commit.AllocateSeqNum(loadResult.fileCount, prepare, apply)
d.commit.AllocateSeqNum(seqNumCount, prepare, apply)
<-d.commit.ingestSem

if err != nil {
Expand Down Expand Up @@ -2253,7 +2311,10 @@ func (d *DB) ingestApply(
}
}
// Check for any EventuallyFileOnlySnapshots that could be watching for
// an excise on this span.
// an excise on this span. There should be none as the
// computePossibleOverlaps steps should have forced these EFOS to transition
// to file-only snapshots by now. If we see any that conflict with this
// excise, panic.
if exciseSpan.Valid() {
for s := d.mu.snapshots.root.next; s != &d.mu.snapshots.root; s = s.next {
if s.efos == nil {
Expand All @@ -2266,8 +2327,7 @@ func (d *DB) ingestApply(
// snapshot.
for i := range efos.protectedRanges {
if efos.protectedRanges[i].OverlapsKeyRange(d.cmp, exciseSpan) {
efos.excised.Store(true)
break
panic("unexpected excise of an EventuallyFileOnlySnapshot's bounds")
}
}
}
Expand Down
8 changes: 0 additions & 8 deletions metamorphic/ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -1664,14 +1664,6 @@ func (o *newSnapshotOp) run(t *Test, h historyRecorder) {
if createEfos || excisePossible {
s := t.getDB(o.dbID).NewEventuallyFileOnlySnapshot(bounds)
t.setSnapshot(o.snapID, s)
// If the EFOS isn't guaranteed to always create iterators, we must force
// a flush on this DB so it transitions this EFOS into a file-only snapshot.
if excisePossible && !t.testOpts.efosAlwaysCreatesIters {
err := t.getDB(o.dbID).Flush()
if err != nil {
h.Recordf("%s // %v", o, err)
}
}
} else {
s := t.getDB(o.dbID).NewSnapshot()
t.setSnapshot(o.snapID, s)
Expand Down
16 changes: 0 additions & 16 deletions metamorphic/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,6 @@ func parseOptions(
case "TestOptions.use_excise":
opts.useExcise = true
return true
case "TestOptions.efos_always_creates_iterators":
opts.efosAlwaysCreatesIters = true
opts.Opts.TestingAlwaysCreateEFOSIterators(true /* value */)
return true
default:
if customOptionParsers == nil {
return false
Expand Down Expand Up @@ -215,9 +211,6 @@ func optionsToString(opts *TestOptions) string {
if opts.useExcise {
fmt.Fprintf(&buf, " use_excise=%v\n", opts.useExcise)
}
if opts.efosAlwaysCreatesIters {
fmt.Fprintf(&buf, " efos_always_creates_iterators=%v\n", opts.efosAlwaysCreatesIters)
}
for _, customOpt := range opts.CustomOpts {
fmt.Fprintf(&buf, " %s=%s\n", customOpt.Name(), customOpt.Value())
}
Expand Down Expand Up @@ -320,11 +313,6 @@ type TestOptions struct {
// excises. However !useExcise && !useSharedReplicate can be used to guarantee
// lack of excises.
useExcise bool
// Enables EFOS to always create iterators, even if a conflicting excise
// happens. Used to guarantee EFOS determinism when conflicting excises are
// in play. If false, EFOS determinism is maintained by having the DB do a
// flush after every new EFOS.
efosAlwaysCreatesIters bool
}

// CustomOption defines a custom option that configures the behavior of an
Expand Down Expand Up @@ -666,10 +654,6 @@ func RandomOptions(
testOpts.Opts.FormatMajorVersion = pebble.FormatVirtualSSTables
}
}
if testOpts.useExcise || testOpts.useSharedReplicate {
testOpts.efosAlwaysCreatesIters = rng.Intn(2) == 0
opts.TestingAlwaysCreateEFOSIterators(testOpts.efosAlwaysCreatesIters)
}
testOpts.Opts.EnsureDefaults()
return testOpts
}
Expand Down
12 changes: 0 additions & 12 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -1056,11 +1056,6 @@ type Options struct {
// against the FS are made after the DB is closed, the FS may leak a
// goroutine indefinitely.
fsCloser io.Closer

// efosAlwaysCreatesIterators is set by some tests to force
// EventuallyFileOnlySnapshots to always create iterators, even after a
// conflicting excise.
efosAlwaysCreatesIterators bool
}
}

Expand Down Expand Up @@ -1237,13 +1232,6 @@ func (o *Options) AddEventListener(l EventListener) {
o.EventListener = &l
}

// TestingAlwaysCreateEFOSIterators is used to toggle a private option for
// having EventuallyFileOnlySnapshots always create iterators. Meant to only
// be used in tests.
func (o *Options) TestingAlwaysCreateEFOSIterators(value bool) {
o.private.efosAlwaysCreatesIterators = value
}

func (o *Options) equal() Equal {
if o.Comparer.Equal == nil {
return bytes.Equal
Expand Down
Loading

0 comments on commit 1d7852b

Please sign in to comment.