From 123e628d4046599fc82a43f3036653b53223d2d2 Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Mon, 28 Aug 2023 11:55:20 -0700 Subject: [PATCH] remoteobjcat: improve ApplyBatch code Improving the ApplyBatch code to first perform sanity checks, then apply the change to the catalog file, then apply to the in-memory state. This ensures correct state if writing to the catalog file fails. This also fixes a problem with the existing code when the catalog file gets rotated: we write down the in-memory state in the new file then write the batch; but because the in-memory state already reflected the batch, we essentially were applying it twice. The fix is accompanied by new sanity checks when loading the catalog. --- objstorage/objstorageprovider/provider.go | 8 +++- .../remoteobjcat/catalog.go | 45 ++++++++++--------- .../remoteobjcat/testdata/catalog | 22 ++++++--- .../remoteobjcat/version_edit.go | 15 +++++-- tool/remotecat.go | 4 +- 5 files changed, 61 insertions(+), 33 deletions(-) diff --git a/objstorage/objstorageprovider/provider.go b/objstorage/objstorageprovider/provider.go index 3a943ba6e82..b24fe9f3a37 100644 --- a/objstorage/objstorageprovider/provider.go +++ b/objstorage/objstorageprovider/provider.go @@ -144,7 +144,13 @@ func DefaultSettings(fs vfs.FS, dirName string) Settings { // Open creates the provider. func Open(settings Settings) (objstorage.Provider, error) { - return open(settings) + // Note: we can't just `return open(settings)` because in an error case we + // would return (*provider)(nil) which is not objstorage.Provider(nil). + p, err := open(settings) + if err != nil { + return nil, err + } + return p, nil } func open(settings Settings) (p *provider, _ error) { diff --git a/objstorage/objstorageprovider/remoteobjcat/catalog.go b/objstorage/objstorageprovider/remoteobjcat/catalog.go index 36bb7d61345..8508f19cfef 100644 --- a/objstorage/objstorageprovider/remoteobjcat/catalog.go +++ b/objstorage/objstorageprovider/remoteobjcat/catalog.go @@ -218,37 +218,40 @@ func (c *Catalog) ApplyBatch(b Batch) error { c.mu.Lock() defer c.mu.Unlock() - // Add new objects before deleting any objects. This allows for cases where - // the same batch adds and deletes an object. - for _, meta := range b.ve.NewObjects { - if _, exists := c.mu.objects[meta.FileNum]; exists { - return errors.AssertionFailedf("adding existing object %s", meta.FileNum) + // Sanity checks. + toAdd := make(map[base.DiskFileNum]struct{}, len(b.ve.NewObjects)) + exists := func(n base.DiskFileNum) bool { + _, ok := c.mu.objects[n] + if !ok { + _, ok = toAdd[n] } + return ok } for _, meta := range b.ve.NewObjects { - c.mu.objects[meta.FileNum] = meta - } - removeAddedObjects := func() { - for i := range b.ve.NewObjects { - delete(c.mu.objects, b.ve.NewObjects[i].FileNum) + if exists(meta.FileNum) { + return errors.AssertionFailedf("adding existing object %s", meta.FileNum) } + toAdd[meta.FileNum] = struct{}{} } for _, n := range b.ve.DeletedObjects { - if _, exists := c.mu.objects[n]; !exists { - removeAddedObjects() + if !exists(n) { return errors.AssertionFailedf("deleting non-existent object %s", n) } } - // Apply the remainder of the batch to our current state. - for _, n := range b.ve.DeletedObjects { - delete(c.mu.objects, n) - } if err := c.writeToCatalogFileLocked(&b.ve); err != nil { return errors.Wrapf(err, "pebble: could not write to remote object catalog: %v", err) } - b.Reset() + // Add new objects before deleting any objects. This allows for cases where + // the same batch adds and deletes an object. + for _, meta := range b.ve.NewObjects { + c.mu.objects[meta.FileNum] = meta + } + for _, n := range b.ve.DeletedObjects { + delete(c.mu.objects, n) + } + return nil } @@ -273,13 +276,15 @@ func (c *Catalog) loadFromCatalogFile(filename string) error { errors.Safe(filename)) } var ve VersionEdit - err = ve.Decode(r) - if err != nil { + if err := ve.Decode(r); err != nil { return errors.Wrapf(err, "pebble: error when loading remote object catalog file %q", errors.Safe(filename)) } // Apply the version edit to the current state. - ve.Apply(&c.mu.creatorID, c.mu.objects) + if err := ve.Apply(&c.mu.creatorID, c.mu.objects); err != nil { + return errors.Wrapf(err, "pebble: error when loading remote object catalog file %q", + errors.Safe(filename)) + } } return nil } diff --git a/objstorage/objstorageprovider/remoteobjcat/testdata/catalog b/objstorage/objstorageprovider/remoteobjcat/testdata/catalog index d3e8d377182..b67dff2449d 100644 --- a/objstorage/objstorageprovider/remoteobjcat/testdata/catalog +++ b/objstorage/objstorageprovider/remoteobjcat/testdata/catalog @@ -177,6 +177,7 @@ sync: test/REMOTE-OBJ-CATALOG-000003 sync: test/REMOTE-OBJ-CATALOG-000003 sync: test/REMOTE-OBJ-CATALOG-000003 sync: test/REMOTE-OBJ-CATALOG-000003 +sync: test/REMOTE-OBJ-CATALOG-000003 close: test/REMOTE-OBJ-CATALOG-000003 create: test/REMOTE-OBJ-CATALOG-000004 sync: test/REMOTE-OBJ-CATALOG-000004 @@ -188,7 +189,6 @@ remove: test/REMOTE-OBJ-CATALOG-000003 sync: test/REMOTE-OBJ-CATALOG-000004 sync: test/REMOTE-OBJ-CATALOG-000004 sync: test/REMOTE-OBJ-CATALOG-000004 -sync: test/REMOTE-OBJ-CATALOG-000004 list test ---- @@ -210,6 +210,7 @@ sync: test/REMOTE-OBJ-CATALOG-000004 sync: test/REMOTE-OBJ-CATALOG-000004 sync: test/REMOTE-OBJ-CATALOG-000004 sync: test/REMOTE-OBJ-CATALOG-000004 +sync: test/REMOTE-OBJ-CATALOG-000004 close: test/REMOTE-OBJ-CATALOG-000004 create: test/REMOTE-OBJ-CATALOG-000005 sync: test/REMOTE-OBJ-CATALOG-000005 @@ -224,7 +225,6 @@ sync: test/REMOTE-OBJ-CATALOG-000005 sync: test/REMOTE-OBJ-CATALOG-000005 sync: test/REMOTE-OBJ-CATALOG-000005 sync: test/REMOTE-OBJ-CATALOG-000005 -sync: test/REMOTE-OBJ-CATALOG-000005 list test ---- @@ -245,7 +245,6 @@ sync: test remove: test/REMOTE-OBJ-CATALOG-000005 sync: test/REMOTE-OBJ-CATALOG-000006 sync: test/REMOTE-OBJ-CATALOG-000006 -sync: test/REMOTE-OBJ-CATALOG-000006 close: test/REMOTE-OBJ-CATALOG-000006 create: test/REMOTE-OBJ-CATALOG-000007 sync: test/REMOTE-OBJ-CATALOG-000007 @@ -258,10 +257,19 @@ sync: test/REMOTE-OBJ-CATALOG-000007 sync: test/REMOTE-OBJ-CATALOG-000007 sync: test/REMOTE-OBJ-CATALOG-000007 sync: test/REMOTE-OBJ-CATALOG-000007 -sync: test/REMOTE-OBJ-CATALOG-000007 -sync: test/REMOTE-OBJ-CATALOG-000007 +close: test/REMOTE-OBJ-CATALOG-000007 +create: test/REMOTE-OBJ-CATALOG-000008 +sync: test/REMOTE-OBJ-CATALOG-000008 +create: test/marker.remote-obj-catalog.000008.REMOTE-OBJ-CATALOG-000008 +close: test/marker.remote-obj-catalog.000008.REMOTE-OBJ-CATALOG-000008 +remove: test/marker.remote-obj-catalog.000007.REMOTE-OBJ-CATALOG-000007 +sync: test +remove: test/REMOTE-OBJ-CATALOG-000007 +sync: test/REMOTE-OBJ-CATALOG-000008 +sync: test/REMOTE-OBJ-CATALOG-000008 +sync: test/REMOTE-OBJ-CATALOG-000008 list test ---- -REMOTE-OBJ-CATALOG-000007 -marker.remote-obj-catalog.000007.REMOTE-OBJ-CATALOG-000007 +REMOTE-OBJ-CATALOG-000008 +marker.remote-obj-catalog.000008.REMOTE-OBJ-CATALOG-000008 diff --git a/objstorage/objstorageprovider/remoteobjcat/version_edit.go b/objstorage/objstorageprovider/remoteobjcat/version_edit.go index 2c1bc869b10..a9044b7806f 100644 --- a/objstorage/objstorageprovider/remoteobjcat/version_edit.go +++ b/objstorage/objstorageprovider/remoteobjcat/version_edit.go @@ -229,14 +229,21 @@ var errCorruptCatalog = base.CorruptionErrorf("pebble: corrupt remote object cat // Apply the version edit to a creator ID and a map of objects. func (v *VersionEdit) Apply( creatorID *objstorage.CreatorID, objects map[base.DiskFileNum]RemoteObjectMetadata, -) { +) error { if v.CreatorID.IsSet() { *creatorID = v.CreatorID } - for _, fileNum := range v.DeletedObjects { - delete(objects, fileNum) - } for _, meta := range v.NewObjects { + if _, exists := objects[meta.FileNum]; exists { + return errors.AssertionFailedf("version edit adds existing object %s", meta.FileNum) + } objects[meta.FileNum] = meta } + for _, fileNum := range v.DeletedObjects { + if _, exists := objects[fileNum]; !exists { + return errors.AssertionFailedf("version edit deletes non-existent object %s", fileNum) + } + delete(objects, fileNum) + } + return nil } diff --git a/tool/remotecat.go b/tool/remotecat.go index 7bb9f42386f..16c920609ac 100644 --- a/tool/remotecat.go +++ b/tool/remotecat.go @@ -110,7 +110,9 @@ func (m *remoteCatalogT) runDumpOne(stdout io.Writer, filename string) error { } } editIdx++ - ve.Apply(&creatorID, objects) + if err := ve.Apply(&creatorID, objects); err != nil { + return err + } } fmt.Fprintf(stdout, "CreatorID: %v\n", creatorID) var filenums []base.DiskFileNum