From dd26bf0e8c225b3eb1314f75f4f44b76a4dc552c Mon Sep 17 00:00:00 2001 From: Bilal Akhtar Date: Tue, 15 Oct 2024 14:23:04 -0400 Subject: [PATCH] db: delete compactions output in case of late cancellation Previously, if we had a late cancellation of a compaction (ie. right before we were gonna logAndApply), we would end up not marking compaction outputs as obsolete/zombie, and neither would we delete them rightaway. This change updates the compaction running code to also clean up compaction outputs in this case of compaction cancellation. Fixes cockroachdb/cockroach#132668. --- compaction.go | 74 +++++++++++++++++++++++++-- internal/manifest/version.go | 6 +++ internal/manifest/virtual_backings.go | 2 +- 3 files changed, 78 insertions(+), 4 deletions(-) diff --git a/compaction.go b/compaction.go index d6a516be7b..18a40fda73 100644 --- a/compaction.go +++ b/compaction.go @@ -2174,6 +2174,53 @@ func (d *DB) compact(c *compaction, errChannel chan error) { }) } +// cleanupVersionEdit cleans up any on-disk artifacts that were created +// for the application of a versionEdit that is no longer going to be applied. +// +// d.mu must be held when calling this method. +func (d *DB) cleanupVersionEdit(ve *versionEdit) { + obsoleteFiles := make([]*fileBacking, 0, len(ve.NewFiles)) + deletedFiles := make(map[base.FileNum]struct{}) + for key := range ve.DeletedFiles { + deletedFiles[key.FileNum] = struct{}{} + } + for i := range ve.NewFiles { + if ve.NewFiles[i].Meta.Virtual { + // We handle backing files separately. + continue + } + if _, ok := deletedFiles[ve.NewFiles[i].Meta.FileNum]; ok { + // This file is being moved in this ve to a different level. + // Don't mark it as obsolete. + continue + } + obsoleteFiles = append(obsoleteFiles, ve.NewFiles[i].Meta.PhysicalMeta().FileBacking) + } + for i := range ve.CreatedBackingTables { + if ve.CreatedBackingTables[i].IsUnused() { + obsoleteFiles = append(obsoleteFiles, ve.CreatedBackingTables[i]) + } + } + for i := range obsoleteFiles { + // Add this file to zombie tables as well, as the versionSet + // asserts on whether every obsolete file was at one point + // marked zombie. + d.mu.versions.zombieTables[obsoleteFiles[i].DiskFileNum] = tableInfo{ + fileInfo: fileInfo{ + FileNum: obsoleteFiles[i].DiskFileNum, + FileSize: obsoleteFiles[i].Size, + }, + // TODO(bilal): This is harmless if it's wrong, as it only causes + // incorrect accounting for the size of it in metrics. Currently + // all compactions only write to local files anyway except with + // disaggregated storage; if this becomes the norm, we should do + // an objprovider lookup here. + isLocal: true, + } + } + d.mu.versions.addObsoleteLocked(obsoleteFiles) +} + // compact1 runs one compaction. // // d.mu must be held when calling this, but the mutex may be dropped and @@ -2204,8 +2251,11 @@ func (d *DB) compact1(c *compaction, errChannel chan error) (err error) { // as only the holder of the manifest lock will ever write to it. if c.cancel.Load() { err = firstError(err, ErrCancelledCompaction) - } - if err != nil { + // This is the first time we've seen a cancellation during the + // life of this compaction (or the original condition on err == nil + // would not have been true). We should delete any tables already + // created, as d.runCompaction did not do that. + d.cleanupVersionEdit(ve) // logAndApply calls logUnlock. If we didn't call it, we need to call // logUnlock ourselves. d.mu.versions.logUnlock() @@ -2748,9 +2798,27 @@ func (d *DB) runCompaction( } if result.Err != nil { // Delete any created tables. + obsoleteFiles := make([]*fileBacking, 0, len(result.Tables)) + d.mu.Lock() for i := range result.Tables { - _ = d.objProvider.Remove(fileTypeTable, result.Tables[i].ObjMeta.DiskFileNum) + backing := &fileBacking{ + DiskFileNum: result.Tables[i].ObjMeta.DiskFileNum, + Size: result.Tables[i].WriterMeta.Size, + } + obsoleteFiles = append(obsoleteFiles, backing) + // Add this file to zombie tables as well, as the versionSet + // asserts on whether every obsolete file was at one point + // marked zombie. + d.mu.versions.zombieTables[backing.DiskFileNum] = tableInfo{ + fileInfo: fileInfo{ + FileNum: backing.DiskFileNum, + FileSize: backing.Size, + }, + isLocal: true, + } } + d.mu.versions.addObsoleteLocked(obsoleteFiles) + d.mu.Unlock() } // Refresh the disk available statistic whenever a compaction/flush // completes, before re-acquiring the mutex. diff --git a/internal/manifest/version.go b/internal/manifest/version.go index abf61e317a..45e5c7cf76 100644 --- a/internal/manifest/version.go +++ b/internal/manifest/version.go @@ -470,6 +470,12 @@ func (b *FileBacking) Ref() { b.refs.Add(1) } +// IsUnused returns if the backing is not being used by any tables in a version +// or btree. +func (b *FileBacking) IsUnused() bool { + return b.refs.Load() == 0 +} + // Unref decrements the backing's ref count (and returns the new count). func (b *FileBacking) Unref() int32 { v := b.refs.Add(-1) diff --git a/internal/manifest/virtual_backings.go b/internal/manifest/virtual_backings.go index 110b9f314c..e3aa97a3ef 100644 --- a/internal/manifest/virtual_backings.go +++ b/internal/manifest/virtual_backings.go @@ -99,7 +99,7 @@ type backingWithMetadata struct { } // AddAndRef adds a new backing to the set and takes a reference on it. Another -// backing for the same DiskFilNum must not exist. +// backing for the same DiskFileNum must not exist. // // The added backing is unused until it is associated with a table via AddTable // or protected via Protect.