From 2801a3eab3d893da826005d90de3ea7ebeca81be Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Thu, 29 Aug 2024 15:27:55 -0400 Subject: [PATCH] colblk: add KeyCount, UnsafeBoundaryKeys to KeyspanBlockWriter Add KeyCount and UnsafeBoundaryKeys methods to the KeyspanBlockWriter. The KeyCount will be used by the sstable writer to determine whether a range deletion or range key block needs to be serialized at all. The UnsafeBoundaryKeys method will be used to determine the smallest and largest rangedel/rangekey keys within a table. --- sstable/colblk/keyspan.go | 22 +++++++++++++++++++++- sstable/colblk/keyspan_test.go | 8 +++++--- sstable/colblk/testdata/keyspan_block | 3 +++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/sstable/colblk/keyspan.go b/sstable/colblk/keyspan.go index b8ebfdd48d..2462e863bb 100644 --- a/sstable/colblk/keyspan.go +++ b/sstable/colblk/keyspan.go @@ -82,7 +82,7 @@ func (w *KeyspanBlockWriter) Reset() { // AddSpan appends a new Span to the pending block. Spans must already be // fragmented (non-overlapping) and added in sorted order. -func (w *KeyspanBlockWriter) AddSpan(s *keyspan.Span) { +func (w *KeyspanBlockWriter) AddSpan(s keyspan.Span) { // When keyspans are fragmented, abutting spans share a user key. One span's // end key is the next span's start key. Check if the previous user key // equals this span's start key, and avoid encoding it again if so. @@ -109,6 +109,26 @@ func (w *KeyspanBlockWriter) AddSpan(s *keyspan.Span) { } } +// KeyCount returns the count of keyspan.Keys written to the writer. +func (w *KeyspanBlockWriter) KeyCount() int { + return w.keyCount +} + +// UnsafeBoundaryKeys returns the smallest and largest keys written to the +// keyspan block so far. The returned internal keys have user keys that point +// directly into the block writer's memory and must not be mutated. +func (w *KeyspanBlockWriter) UnsafeBoundaryKeys() (smallest, largest base.InternalKey) { + if w.keyCount == 0 { + return smallest, largest + } + smallest.UserKey = w.boundaryUserKeys.UnsafeGet(0) + smallest.Trailer = base.InternalKeyTrailer(w.trailers.Get(0)) + largest.UserKey = w.boundaryUserKeys.UnsafeGet(w.boundaryUserKeys.rows - 1) + largest.Trailer = base.MakeTrailer(base.SeqNumMax, + base.InternalKeyTrailer(w.trailers.Get(w.keyCount-1)).Kind()) + return smallest, largest +} + // Size returns the size of the pending block. func (w *KeyspanBlockWriter) Size() int { off := blockHeaderSize(keyspanColumnCount, keyspanHeaderSize) diff --git a/sstable/colblk/keyspan_test.go b/sstable/colblk/keyspan_test.go index 72a2803089..b62512df64 100644 --- a/sstable/colblk/keyspan_test.go +++ b/sstable/colblk/keyspan_test.go @@ -15,6 +15,7 @@ import ( "github.com/cockroachdb/datadriven" "github.com/cockroachdb/pebble/internal/base" "github.com/cockroachdb/pebble/internal/keyspan" + "github.com/cockroachdb/pebble/internal/testkeys" "golang.org/x/exp/rand" ) @@ -35,12 +36,13 @@ func TestKeyspanBlock(t *testing.T) { return buf.String() case "add": for _, line := range strings.Split(td.Input, "\n") { - s := keyspan.ParseSpan(line) - w.AddSpan(&s) + w.AddSpan(keyspan.ParseSpan(line)) } fmt.Fprint(&buf, &w) return buf.String() case "finish": + sm, la := w.UnsafeBoundaryKeys() + fmt.Fprintf(&buf, "Boundaries: %s — %s\n", sm.Pretty(testkeys.Comparer.FormatKey), la.Pretty(testkeys.Comparer.FormatKey)) block := w.Finish() kr.Init(block) fmt.Fprint(&buf, kr.DebugString()) @@ -90,7 +92,7 @@ func benchmarkKeyspanBlockRangeDeletions(b *testing.B, numSpans, keysPerSpan, ke t := base.MakeTrailer(base.SeqNum(j), base.InternalKeyKindRangeDelete) s.Keys = append(s.Keys, keyspan.Key{Trailer: t}) } - w.AddSpan(&s) + w.AddSpan(s) } block := w.Finish() avgRowSize := float64(w.Size()) / float64(numSpans*keysPerSpan) diff --git a/sstable/colblk/testdata/keyspan_block b/sstable/colblk/testdata/keyspan_block index 913d561b02..83cf7281be 100644 --- a/sstable/colblk/testdata/keyspan_block +++ b/sstable/colblk/testdata/keyspan_block @@ -49,6 +49,7 @@ size=73: finish ---- +Boundaries: a#0,RANGEDEL — e#inf,RANGEDEL # keyspan block header 00-04: x 05000000 # user key count: 5 # columnar block header @@ -226,6 +227,7 @@ size=70: finish ---- +Boundaries: b#4,RANGEKEYSET — d#inf,RANGEKEYSET # keyspan block header 00-04: x 02000000 # user key count: 2 # columnar block header @@ -311,6 +313,7 @@ size=80: finish ---- +Boundaries: b#4,RANGEKEYSET — g#inf,RANGEKEYSET # keyspan block header 00-04: x 04000000 # user key count: 4 # columnar block header