diff --git a/internal/manifest/version.go b/internal/manifest/version.go index b584899dcd9..6eb86a43f8a 100644 --- a/internal/manifest/version.go +++ b/internal/manifest/version.go @@ -17,6 +17,7 @@ import ( "github.com/cockroachdb/errors" "github.com/cockroachdb/pebble/internal/base" "github.com/cockroachdb/pebble/internal/invariants" + "github.com/cockroachdb/pebble/internal/overlap/overlapcache" "github.com/cockroachdb/pebble/sstable" ) @@ -287,6 +288,9 @@ type FileMetadata struct { // SyntheticSuffix overrides all suffixes in a table; used for some virtual tables. SyntheticSuffix sstable.SyntheticSuffix + + // OverlapCache is used to speed up overlap checks during ingestion. + OverlapCache overlapcache.C } // InternalKeyBounds returns the set of overall table bounds. diff --git a/internal/overlap/checker.go b/internal/overlap/checker.go index c219b553a16..3f88d71a2b6 100644 --- a/internal/overlap/checker.go +++ b/internal/overlap/checker.go @@ -8,6 +8,7 @@ package overlap import ( "context" + "slices" "github.com/cockroachdb/pebble/internal/base" "github.com/cockroachdb/pebble/internal/keyspan" @@ -23,7 +24,8 @@ type WithLSM [manifest.NumLevels]WithLevel type WithLevel struct { Result Kind // SplitFile can be set only when result is OnlyBoundary. If it is set, this - // file can be split to free up the range of interest. + // file can be split to free up the range of interest. SplitFile is not set + // for L0 (overlapping tables are allowed in L0). SplitFile *manifest.FileMetadata } @@ -89,6 +91,7 @@ func (c *Checker) LSMOverlap( } if res.Result == OnlyBoundary { result[0].Result = OnlyBoundary + // We don't set SplitFile for L0 (tables in L0 are allowed to overlap). } } for level := 1; level < manifest.NumLevels; level++ { @@ -135,11 +138,11 @@ func (c *Checker) LevelOverlap( return WithLevel{Result: Data}, nil } // We have a single file to look at; its boundaries enclose our region. - empty, err := c.EmptyRegion(ctx, region, file) + overlap, err := c.OverlapWithFile(ctx, region, file) if err != nil { return WithLevel{}, err } - if !empty { + if overlap { return WithLevel{Result: Data}, nil } return WithLevel{ @@ -148,116 +151,183 @@ func (c *Checker) LevelOverlap( }, nil } -// EmptyRegion returns true if the given region doesn't overlap with any keys or -// ranges in the given table. -func (c *Checker) EmptyRegion( +// OverlapWithFile returns true if the given region overlaps with any keys or +// spans in the given table. +func (c *Checker) OverlapWithFile( ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata, ) (bool, error) { - empty, err := c.emptyRegionPointsAndRangeDels(ctx, region, m) - if err != nil || !empty { - return empty, err + if overlap, ok := m.OverlapCache.CheckDataOverlap(c.cmp, region); ok { + return overlap, nil } - return c.emptyRegionRangeKeys(ctx, region, m) -} + // We want to check overlap with file, but we also want to update the cache + // with useful information. We try to find two data regions r1 and r2 with a + // space-in between; r1 ends before region.Start and r2 ends at or after + // region.Start. See overlapcache.C.ReportEmptyRegion(). + var r1, r2 base.UserKeyBounds -// emptyRegionPointsAndRangeDels returns true if the file doesn't contain any -// point keys or range del spans that overlap with region. -func (c *Checker) emptyRegionPointsAndRangeDels( - ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata, -) (bool, error) { - if !m.HasPointKeys { - return true, nil + if m.HasPointKeys { + lt, ge, err := c.pointKeysAroundKey(ctx, region.Start, m) + if err != nil { + return false, err + } + r1 = base.UserKeyBoundsInclusive(lt, lt) + r2 = base.UserKeyBoundsInclusive(ge, ge) + + if err := c.extendRegionsWithSpans(ctx, &r1, &r2, region.Start, m, manifest.KeyTypePoint); err != nil { + return false, err + } } - pointBounds := m.UserKeyBoundsByType(manifest.KeyTypePoint) - if !pointBounds.Overlaps(c.cmp, ®ion) { + if m.HasRangeKeys { + if err := c.extendRegionsWithSpans(ctx, &r1, &r2, region.Start, m, manifest.KeyTypeRange); err != nil { + return false, err + } + } + // If the regions now overlap or touch, it's all one big data region. + if r1.Start != nil && r2.Start != nil && c.cmp(r1.End.Key, r2.Start) >= 0 { + m.OverlapCache.ReportDataRegion(c.cmp, base.UserKeyBounds{ + Start: r1.Start, + End: r2.End, + }) return true, nil } + m.OverlapCache.ReportEmptyRegion(c.cmp, r1, r2) + // There is overlap iff we overlap with r2. + overlap := r2.Start != nil && region.End.IsUpperBoundFor(c.cmp, r2.Start) + return overlap, nil +} + +// pointKeysAroundKey returns two consecutive point keys: the greatest key that +// is < key and the smallest key that is >= key. If there is no such key, the +// corresponding return value is nil. Both lt and ge are nil if the file +// contains no point keys. +func (c *Checker) pointKeysAroundKey( + ctx context.Context, key []byte, m *manifest.FileMetadata, +) (lt, ge []byte, _ error) { + pointBounds := m.UserKeyBoundsByType(manifest.KeyTypePoint) + points, err := c.iteratorFactory.Points(ctx, m) - if err != nil { - return false, err + if points == nil || err != nil { + return nil, nil, err } - if points != nil { - defer points.Close() - var kv *base.InternalKV - if c.cmp(region.Start, pointBounds.Start) <= 0 { - kv = points.First() - } else { - kv = points.SeekGE(region.Start, base.SeekGEFlagsNone) + defer points.Close() + switch { + case c.cmp(key, pointBounds.Start) <= 0: + kv := points.First() + if kv != nil { + ge = slices.Clone(kv.K.UserKey) } - if kv == nil && points.Error() != nil { - return false, points.Error() + case c.cmp(key, pointBounds.End.Key) > 0: + kv := points.Last() + if kv != nil { + lt = slices.Clone(kv.K.UserKey) } - if kv != nil && region.End.IsUpperBoundForInternalKey(c.cmp, kv.K) { - // Found overlap. - return false, nil + default: + kv := points.SeekLT(key, base.SeekLTFlagsNone) + if kv != nil { + lt = slices.Clone(kv.K.UserKey) } - } - rangeDels, err := c.iteratorFactory.RangeDels(ctx, m) - if err != nil { - return false, err - } - if rangeDels != nil { - defer rangeDels.Close() - empty, err := c.emptyFragmentRegion(region, pointBounds.Start, rangeDels) - if err != nil || !empty { - return empty, err + if kv = points.Next(); kv != nil { + ge = slices.Clone(kv.K.UserKey) } } - // Found no overlap. - return true, nil + return lt, ge, points.Error() } -// emptyRegionRangeKeys returns true if the file doesn't contain any range key -// spans that overlap with region. -func (c *Checker) emptyRegionRangeKeys( - ctx context.Context, region base.UserKeyBounds, m *manifest.FileMetadata, -) (bool, error) { - if !m.HasRangeKeys { - return true, nil - } - rangeKeyBounds := m.UserKeyBoundsByType(manifest.KeyTypeRange) - if !rangeKeyBounds.Overlaps(c.cmp, ®ion) { - return true, nil +// extendRegionsWithSpans opens a fragment iterator for either range dels or +// range keys (depending n keyType), finds the last span that ends before key +// and the following span, and extends/replaces regions r1 and r2. +func (c *Checker) extendRegionsWithSpans( + ctx context.Context, + r1, r2 *base.UserKeyBounds, + key []byte, + m *manifest.FileMetadata, + keyType manifest.KeyType, +) error { + var iter keyspan.FragmentIterator + var err error + if keyType == manifest.KeyTypePoint { + iter, err = c.iteratorFactory.RangeDels(ctx, m) + } else { + iter, err = c.iteratorFactory.RangeKeys(ctx, m) } - rangeKeys, err := c.iteratorFactory.RangeKeys(ctx, m) - if err != nil { - return false, err + if iter == nil || err != nil { + return err } - if rangeKeys != nil { - defer rangeKeys.Close() - empty, err := c.emptyFragmentRegion(region, rangeKeyBounds.Start, rangeKeys) - if err != nil || !empty { - return empty, err + defer iter.Close() + + fragmentBounds := m.UserKeyBoundsByType(keyType) + switch { + case c.cmp(key, fragmentBounds.Start) <= 0: + span, err := iter.First() + if err != nil { + return err + } + c.updateR2(r2, span) + + case !fragmentBounds.End.IsUpperBoundFor(c.cmp, key): + span, err := iter.Last() + if err != nil { + return err + } + c.updateR1(r1, span) + + default: + span, err := iter.SeekGE(key) + if err != nil { + return err } + c.updateR2(r2, span) + span, err = iter.Prev() + if err != nil { + return err + } + c.updateR1(r1, span) } - // Found no overlap. - return true, nil + return nil } -// emptyFragmentRegion returns true if the given iterator doesn't contain any -// spans that overlap with region. The fragmentLowerBounds is a known lower -// bound for all the spans. -func (c *Checker) emptyFragmentRegion( - region base.UserKeyBounds, fragmentLowerBound []byte, fragments keyspan.FragmentIterator, -) (bool, error) { - var span *keyspan.Span - var err error - if c.cmp(region.Start, fragmentLowerBound) <= 0 { - // This is an optimization: we know there are no spans before region.Start, - // so we can use First. - span, err = fragments.First() - } else { - span, err = fragments.SeekGE(region.Start) - } - if err != nil { - return false, err - } - if span != nil && span.Empty() { - return false, base.AssertionFailedf("fragment iterator produced empty span") +// updateR1 updates r1, the region of data that ends before a key of interest. +func (c *Checker) updateR1(r1 *base.UserKeyBounds, s *keyspan.Span) { + switch { + case s == nil: + + case r1.Start == nil || c.cmp(r1.End.Key, s.Start) < 0: + // Region completely to the right of r1. + *r1 = base.UserKeyBoundsEndExclusive(slices.Clone(s.Start), slices.Clone(s.End)) + + case c.cmp(s.End, r1.Start) < 0: + // Region completely to the left of r1, nothing to do. + + default: + // Regions are overlapping or touching. + if c.cmp(s.Start, r1.Start) < 0 { + r1.Start = slices.Clone(s.Start) + } + if c.cmp(r1.End.Key, s.End) < 0 { + r1.End = base.UserKeyExclusive(slices.Clone(s.End)) + } } - if span != nil && region.End.IsUpperBoundFor(c.cmp, span.Start) { - // Found overlap. - return false, nil +} + +// updateR2 updates r2, the region of data that ends before a key of interest. +func (c *Checker) updateR2(r2 *base.UserKeyBounds, s *keyspan.Span) { + switch { + case s == nil: + + case r2.Start == nil || c.cmp(s.End, r2.Start) < 0: + // Region completely to the left of r2. + *r2 = base.UserKeyBoundsEndExclusive(slices.Clone(s.Start), slices.Clone(s.End)) + + case c.cmp(r2.End.Key, s.Start) < 0: + // Region completely to the right of r2, nothing to do. + + default: + // Regions are overlapping or touching. + if c.cmp(s.Start, r2.Start) < 0 { + r2.Start = slices.Clone(s.Start) + } + if c.cmp(r2.End.Key, s.End) < 0 { + r2.End = base.UserKeyExclusive(slices.Clone(s.End)) + } } - return true, nil } diff --git a/internal/overlap/checker_test.go b/internal/overlap/checker_test.go index 21a2fc539b8..4fcf5ce1c4b 100644 --- a/internal/overlap/checker_test.go +++ b/internal/overlap/checker_test.go @@ -16,14 +16,21 @@ import ( "github.com/cockroachdb/pebble/internal/base" "github.com/cockroachdb/pebble/internal/keyspan" "github.com/cockroachdb/pebble/internal/manifest" + "github.com/cockroachdb/pebble/internal/overlap/overlapcache" "github.com/stretchr/testify/require" ) func TestChecker(t *testing.T) { tables := newTestTables() byName := make(map[string]*manifest.FileMetadata) + clearCaches := func() { + for _, t := range tables.tables { + t.meta.OverlapCache = overlapcache.C{} + } + } datadriven.RunTest(t, "testdata/checker", func(t *testing.T, d *datadriven.TestData) string { + clearCaches() switch d.Cmd { case "define": tt := testTable{ @@ -99,10 +106,15 @@ func TestChecker(t *testing.T) { tables.tables[tt.meta] = tt case "overlap": + var withCache bool var metas []*manifest.FileMetadata lines := strings.Split(d.Input, "\n") for _, arg := range d.CmdArgs { name := arg.String() + if name == "with-cache" { + withCache = true + continue + } m := byName[name] if m == nil { d.Fatalf(t, "unknown table %q", name) @@ -113,6 +125,9 @@ func TestChecker(t *testing.T) { c := MakeChecker(bytes.Compare, tables) var buf strings.Builder for _, l := range lines { + if !withCache { + clearCaches() + } bounds := base.ParseUserKeyBounds(l) withLevel, err := c.LevelOverlap(context.Background(), bounds, levelMeta.Slice()) require.NoError(t, err) diff --git a/internal/overlap/testdata/checker b/internal/overlap/testdata/checker index ea7e31f631b..b2b71fc835f 100644 --- a/internal/overlap/testdata/checker +++ b/internal/overlap/testdata/checker @@ -20,7 +20,7 @@ overlap t1 [a, b]: possible data overlap iterators opened: none [b1, b2]: boundary overlap, no data overlap (split file: t1) iterators opened: t1/points, t1/range-del [b, c]: possible data overlap iterators opened: none -[d, e]: possible data overlap iterators opened: t1/points +[d, e]: possible data overlap iterators opened: t1/points, t1/range-del [e, u): boundary overlap, no data overlap (split file: t1) iterators opened: t1/points, t1/range-del [e, u]: possible data overlap iterators opened: none [u, w]: possible data overlap iterators opened: none @@ -76,6 +76,26 @@ overlap t3 [j, l): possible data overlap iterators opened: none [k, l]: no overlap iterators opened: none +overlap t3 with-cache +[a, b) +[a, b] +[d, f] +[d, f] +[e, f] +[e, f] +[e, h) +[e, h] +[j, l) +---- +[a, b): no overlap iterators opened: none +[a, b]: possible data overlap iterators opened: none +[d, f]: possible data overlap iterators opened: t3/range-key +[d, f]: possible data overlap iterators opened: none +[e, f]: boundary overlap, no data overlap (split file: t3) iterators opened: t3/range-key +[e, f]: boundary overlap, no data overlap (split file: t3) iterators opened: none +[e, h): boundary overlap, no data overlap (split file: t3) iterators opened: none +[e, h]: possible data overlap iterators opened: none +[j, l): possible data overlap iterators opened: none define tBE points: @@ -109,11 +129,46 @@ overlap tBE tFM tMP [n, o] [a, z] ---- -[b1, b2]: boundary overlap, no data overlap (split file: tBE) iterators opened: tBE/points, tBE/range-del +[b1, b2]: boundary overlap, no data overlap (split file: tBE) iterators opened: tBE/points, tBE/range-del, tBE/range-key [c1, c2]: possible data overlap iterators opened: tBE/points, tBE/range-del, tBE/range-key [e, f): no overlap iterators opened: none [g1, g2]: boundary overlap, no data overlap (split file: tFM) iterators opened: tFM/points, tFM/range-del [j1, m1]: possible data overlap iterators opened: none [n1, n2]: boundary overlap, no data overlap (split file: tMP) iterators opened: tMP/points, tMP/range-del -[n, o]: possible data overlap iterators opened: tMP/points +[n, o]: possible data overlap iterators opened: tMP/points, tMP/range-del +[a, z]: possible data overlap iterators opened: none + +overlap tBE tFM tMP with-cache +[b1, b2] +[c1, c2] +[e, f) +[g1, g2] +[j1, m1] +[n1, n2] +[n, o] +[a, z] +[b1, b2] +[c1, c2] +[e, f) +[g1, g2] +[j1, m1] +[n1, n2] +[n, o] +[a, z] +---- +[b1, b2]: boundary overlap, no data overlap (split file: tBE) iterators opened: tBE/points, tBE/range-del, tBE/range-key +[c1, c2]: possible data overlap iterators opened: none +[e, f): no overlap iterators opened: none +[g1, g2]: boundary overlap, no data overlap (split file: tFM) iterators opened: tFM/points, tFM/range-del +[j1, m1]: possible data overlap iterators opened: none +[n1, n2]: boundary overlap, no data overlap (split file: tMP) iterators opened: tMP/points, tMP/range-del +[n, o]: possible data overlap iterators opened: none +[a, z]: possible data overlap iterators opened: none +[b1, b2]: boundary overlap, no data overlap (split file: tBE) iterators opened: none +[c1, c2]: possible data overlap iterators opened: none +[e, f): no overlap iterators opened: none +[g1, g2]: boundary overlap, no data overlap (split file: tFM) iterators opened: none +[j1, m1]: possible data overlap iterators opened: none +[n1, n2]: boundary overlap, no data overlap (split file: tMP) iterators opened: none +[n, o]: possible data overlap iterators opened: none [a, z]: possible data overlap iterators opened: none diff --git a/testdata/metrics b/testdata/metrics index 6776db1cd81..909a61794a2 100644 --- a/testdata/metrics +++ b/testdata/metrics @@ -488,7 +488,7 @@ Virtual tables: 0 (0B) Local tables size: 4.3KB Compression types: snappy: 7 Block cache: 12 entries (1.9KB) hit rate: 9.1% -Table cache: 1 entries (760B) hit rate: 53.8% +Table cache: 1 entries (760B) hit rate: 57.1% Secondary cache: 0 entries (0B) hit rate: 0.0% Snapshots: 0 earliest seq num: 0 Table iters: 0 @@ -551,7 +551,7 @@ Virtual tables: 0 (0B) Local tables size: 6.1KB Compression types: snappy: 10 Block cache: 12 entries (1.9KB) hit rate: 9.1% -Table cache: 1 entries (760B) hit rate: 53.8% +Table cache: 1 entries (760B) hit rate: 57.1% Secondary cache: 0 entries (0B) hit rate: 0.0% Snapshots: 0 earliest seq num: 0 Table iters: 0 @@ -869,7 +869,7 @@ Virtual tables: 0 (0B) Local tables size: 0B Compression types: snappy: 2 Block cache: 6 entries (996B) hit rate: 0.0% -Table cache: 1 entries (760B) hit rate: 50.0% +Table cache: 1 entries (760B) hit rate: 66.7% Secondary cache: 0 entries (0B) hit rate: 0.0% Snapshots: 0 earliest seq num: 0 Table iters: 0 @@ -917,7 +917,7 @@ Virtual tables: 0 (0B) Local tables size: 589B Compression types: snappy: 3 Block cache: 6 entries (996B) hit rate: 0.0% -Table cache: 1 entries (760B) hit rate: 50.0% +Table cache: 1 entries (760B) hit rate: 66.7% Secondary cache: 0 entries (0B) hit rate: 0.0% Snapshots: 0 earliest seq num: 0 Table iters: 0