Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

db: verify ingest sst bounds are suffixless or adjacent #4126

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 64 additions & 24 deletions ingest.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/cache"
"github.com/cockroachdb/pebble/internal/invariants"
"github.com/cockroachdb/pebble/internal/keyspan"
"github.com/cockroachdb/pebble/internal/manifest"
"github.com/cockroachdb/pebble/internal/overlap"
"github.com/cockroachdb/pebble/internal/sstableinternal"
Expand Down Expand Up @@ -241,14 +242,20 @@ func ingestLoad1External(

// ingestLoad1 creates the FileMetadata for one file. This file will be owned
// by this store.
//
// lastRangeKey is the last range key from the previous file. It is used to
// ensure that the range keys defragment cleanly across files. These checks
// are disabled if disableRangeKeyChecks is true.
func ingestLoad1(
ctx context.Context,
opts *Options,
fmv FormatMajorVersion,
readable objstorage.Readable,
cacheID cache.ID,
fileNum base.FileNum,
) (*fileMetadata, error) {
lastRangeKey keyspan.Span,
disableRangeKeyChecks bool,
) (*fileMetadata, keyspan.Span, error) {
o := opts.MakeReaderOptions()
o.SetInternalCacheOpts(sstableinternal.CacheOptions{
Cache: opts.Cache,
Expand All @@ -257,24 +264,24 @@ func ingestLoad1(
})
r, err := sstable.NewReader(ctx, readable, o)
if err != nil {
return nil, err
return nil, keyspan.Span{}, err
}
defer r.Close()

// Avoid ingesting tables with format versions this DB doesn't support.
tf, err := r.TableFormat()
if err != nil {
return nil, err
return nil, keyspan.Span{}, err
}
if tf < fmv.MinTableFormat() || tf > fmv.MaxTableFormat() {
return nil, errors.Newf(
return nil, keyspan.Span{}, errors.Newf(
"pebble: table format %s is not within range supported at DB format major version %d, (%s,%s)",
tf, fmv, fmv.MinTableFormat(), fmv.MaxTableFormat(),
)
}
if tf.BlockColumnar() {
if _, ok := opts.KeySchemas[r.Properties.KeySchemaName]; !ok {
return nil, errors.Newf(
return nil, keyspan.Span{}, errors.Newf(
"pebble: table uses key schema %q unknown to the database",
r.Properties.KeySchemaName)
}
Expand All @@ -300,52 +307,52 @@ func ingestLoad1(
{
iter, err := r.NewIter(sstable.NoTransforms, nil /* lower */, nil /* upper */)
if err != nil {
return nil, err
return nil, keyspan.Span{}, err
}
defer iter.Close()
var smallest InternalKey
if kv := iter.First(); kv != nil {
if err := ingestValidateKey(opts, &kv.K); err != nil {
return nil, err
return nil, keyspan.Span{}, err
}
smallest = kv.K.Clone()
}
if err := iter.Error(); err != nil {
return nil, err
return nil, keyspan.Span{}, err
}
if kv := iter.Last(); kv != nil {
if err := ingestValidateKey(opts, &kv.K); err != nil {
return nil, err
return nil, keyspan.Span{}, err
}
meta.ExtendPointKeyBounds(opts.Comparer.Compare, smallest, kv.K.Clone())
}
if err := iter.Error(); err != nil {
return nil, err
return nil, keyspan.Span{}, err
}
}

iter, err := r.NewRawRangeDelIter(ctx, sstable.NoFragmentTransforms)
if err != nil {
return nil, err
return nil, keyspan.Span{}, err
}
if iter != nil {
defer iter.Close()
var smallest InternalKey
if s, err := iter.First(); err != nil {
return nil, err
return nil, keyspan.Span{}, err
} else if s != nil {
key := s.SmallestKey()
if err := ingestValidateKey(opts, &key); err != nil {
return nil, err
return nil, keyspan.Span{}, err
}
smallest = key.Clone()
}
if s, err := iter.Last(); err != nil {
return nil, err
return nil, keyspan.Span{}, err
} else if s != nil {
k := s.SmallestKey()
if err := ingestValidateKey(opts, &k); err != nil {
return nil, err
return nil, keyspan.Span{}, err
}
largest := s.LargestKey().Clone()
meta.ExtendPointKeyBounds(opts.Comparer.Compare, smallest, largest)
Expand All @@ -356,45 +363,64 @@ func ingestLoad1(
{
iter, err := r.NewRawRangeKeyIter(ctx, sstable.NoFragmentTransforms)
if err != nil {
return nil, err
return nil, keyspan.Span{}, err
}
if iter != nil {
defer iter.Close()
var smallest InternalKey
if s, err := iter.First(); err != nil {
return nil, err
return nil, keyspan.Span{}, err
} else if s != nil {
key := s.SmallestKey()
if err := ingestValidateKey(opts, &key); err != nil {
return nil, err
return nil, keyspan.Span{}, err
}
smallest = key.Clone()
// Range keys need some additional validation as we need to ensure they
// defragment cleanly with the lastRangeKey from the previous file.
if !disableRangeKeyChecks {
if lastRangeKey.Valid() {
if opts.Comparer.Split.HasSuffix(lastRangeKey.End) && (!opts.Comparer.Equal(lastRangeKey.End, s.Start) ||
!keyspan.DefragmentInternal.ShouldDefragment(opts.Comparer.CompareRangeSuffixes, &lastRangeKey, s)) {
// The last range key has a suffix, and it doesn't defragment cleanly with this range key.
return nil, keyspan.Span{}, errors.AssertionFailedf("pebble: ingest sstable has suffixed range key that won't defragment with previous sstable: %s",
opts.Comparer.FormatKey(lastRangeKey.End))
}
} else if opts.Comparer.Split.HasSuffix(s.Start) {
return nil, keyspan.Span{}, errors.Newf("pebble: ingest sstable has suffixed range key start that won't defragment: %s",
opts.Comparer.FormatKey(s.Start))
}
}
}
lastRangeKey = keyspan.Span{}
if s, err := iter.Last(); err != nil {
return nil, err
return nil, keyspan.Span{}, err
} else if s != nil {
k := s.SmallestKey()
if err := ingestValidateKey(opts, &k); err != nil {
return nil, err
return nil, keyspan.Span{}, err
}
// As range keys are fragmented, the end key of the last range key in
// the table provides the upper bound for the table.
largest := s.LargestKey().Clone()
meta.ExtendRangeKeyBounds(opts.Comparer.Compare, smallest, largest)
lastRangeKey = s.Clone()
}
} else {
lastRangeKey = keyspan.Span{}
}
}

if !meta.HasPointKeys && !meta.HasRangeKeys {
return nil, nil
return nil, keyspan.Span{}, nil
}

// Sanity check that the various bounds on the file were set consistently.
if err := meta.Validate(opts.Comparer.Compare, opts.Comparer.FormatKey); err != nil {
return nil, err
return nil, keyspan.Span{}, err
}

return meta, nil
return meta, lastRangeKey, nil
}

type ingestLoadResult struct {
Expand Down Expand Up @@ -445,6 +471,15 @@ func ingestLoad(

var result ingestLoadResult
result.local = make([]ingestLocalMeta, 0, len(paths))
var lastRangeKey keyspan.Span
// NB: we disable range key boundary assertions if we have shared or external files
// present in this ingestion. This is because a suffixed range key in a local file
// can possibly defragment with a suffixed range key in a shared or external file.
// We also disable range key boundary assertions if we have CreateOnShared set to
// true, as that means we could have suffixed RangeKeyDels or Unsets in the local
// files that won't ever be surfaced, even if there are no shared or external files
// in the ingestion.
disableRangeKeyChecks := len(shared) > 0 || len(external) > 0 || opts.Experimental.CreateOnShared != remote.CreateOnSharedNone
for i := range paths {
f, err := opts.FS.Open(paths[i])
if err != nil {
Expand All @@ -455,7 +490,8 @@ func ingestLoad(
if err != nil {
return ingestLoadResult{}, err
}
m, err := ingestLoad1(ctx, opts, fmv, readable, cacheID, localFileNums[i])
var m *fileMetadata
m, lastRangeKey, err = ingestLoad1(ctx, opts, fmv, readable, cacheID, localFileNums[i], lastRangeKey, disableRangeKeyChecks)
if err != nil {
return ingestLoadResult{}, err
}
Expand All @@ -466,6 +502,10 @@ func ingestLoad(
})
}
}
if !disableRangeKeyChecks && lastRangeKey.Valid() && opts.Comparer.Split.HasSuffix(lastRangeKey.End) {
return ingestLoadResult{}, errors.AssertionFailedf("pebble: last ingest sstable has suffixed range key end %s",
opts.Comparer.FormatKey(lastRangeKey.End))
}

// Sort the shared files according to level.
sort.Sort(sharedByLevel(shared))
Expand Down
7 changes: 7 additions & 0 deletions internal/base/comparer.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,13 @@ func (s Split) Prefix(k []byte) []byte {
return k[:i:i]
}

// HasSuffix returns true if the key k has a suffix remaining after
// Split is called on it. For keys where the entirety of the key is
// returned by Split, HasSuffix will return false.
func (s Split) HasSuffix(k []byte) bool {
return s(k) < len(k)
}

// DefaultSplit is a trivial implementation of Split which always returns the
// full key.
var DefaultSplit Split = func(key []byte) int { return len(key) }
Expand Down
10 changes: 10 additions & 0 deletions iterator_histories_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,16 @@ func TestIterHistories(t *testing.T) {
}
}
return buf.String()
case "build":
if err := runBuildCmd(td, d, d.opts.FS); err != nil {
return err.Error()
}
return ""
case "ingest-existing":
if err := runIngestCmd(td, d, d.opts.FS); err != nil {
return err.Error()
}
return ""
case "ingest":
if err := runBuildCmd(td, d, d.opts.FS); err != nil {
return err.Error()
Expand Down
9 changes: 8 additions & 1 deletion open.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/cockroachdb/pebble/internal/cache"
"github.com/cockroachdb/pebble/internal/constants"
"github.com/cockroachdb/pebble/internal/invariants"
"github.com/cockroachdb/pebble/internal/keyspan"
"github.com/cockroachdb/pebble/internal/manifest"
"github.com/cockroachdb/pebble/internal/manual"
"github.com/cockroachdb/pebble/objstorage"
Expand Down Expand Up @@ -807,17 +808,23 @@ func (d *DB) replayIngestedFlushable(
}

meta := make([]*fileMetadata, len(fileNums))
var lastRangeKey keyspan.Span
for i, n := range fileNums {
readable, err := d.objProvider.OpenForReading(context.TODO(), fileTypeTable, n, objstorage.OpenOptions{MustExist: true})
if err != nil {
return nil, errors.Wrap(err, "pebble: error when opening flushable ingest files")
}
// NB: ingestLoad1 will close readable.
meta[i], err = ingestLoad1(context.TODO(), d.opts, d.FormatMajorVersion(), readable, d.cacheID, base.PhysicalTableFileNum(n))
meta[i], lastRangeKey, err = ingestLoad1(context.TODO(), d.opts, d.FormatMajorVersion(),
readable, d.cacheID, base.PhysicalTableFileNum(n), lastRangeKey, false /* disableRangeKeyChecks */)
if err != nil {
return nil, errors.Wrap(err, "pebble: error when loading flushable ingest files")
}
}
if lastRangeKey.Valid() && d.opts.Comparer.Split.HasSuffix(lastRangeKey.End) {
return nil, errors.AssertionFailedf("pebble: last ingest sstable has suffixed range key end %s",
d.opts.Comparer.FormatKey(lastRangeKey.End))
}

numFiles := len(meta)
if exciseSpan.Valid() {
Expand Down
17 changes: 13 additions & 4 deletions testdata/iter_histories/prefix_iteration
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,14 @@ ingest ext1
range-key-set a c@8 @1 bar
set c@9 c@9
----
pebble: last ingest sstable has suffixed range key end c@8

ingest ext2
build ext1
range-key-set a c@8 @1 bar
set c@9 c@9
----

build ext2
range-key-set c@8 e @1 bar
set c@8 c@8
set c@7 c@7
Expand All @@ -152,6 +158,9 @@ set c@3 c@3
set c@2 c@2
----

ingest-existing ext1 ext2
----

ingest ext2
range-key-set y z @1 foo
set z z
Expand All @@ -160,9 +169,9 @@ set z z
lsm
----
L6:
000004:[a#10,RANGEKEYSET-c@8#inf,RANGEKEYSET]
000005:[c@8#11,RANGEKEYSET-e#inf,RANGEKEYSET]
000006:[y#12,RANGEKEYSET-z#12,SET]
000005:[a#10,RANGEKEYSET-c@8#inf,RANGEKEYSET]
000006:[c@8#11,RANGEKEYSET-e#inf,RANGEKEYSET]
000007:[y#12,RANGEKEYSET-z#12,SET]


# The first seek-prefix-ge y@1 converts the iterator from lazy combined iterator
Expand Down
Loading