Skip to content

Commit

Permalink
db: allow value block writing to be disabled
Browse files Browse the repository at this point in the history
Previously there was no way to disable value blocks in table formats that
supported value blocks. Now, it can be disabled using
WriterOptions.DisableValueBlocks. This is never used by Pebble since
we expect small sstables where the memory overhead of buffering value
blocks in the sstable.Writer is acceptable.

Informs cockroachdb/cockroach#117113
  • Loading branch information
sumeerbhola committed Jan 16, 2024
1 parent 27d566e commit aba7c0c
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 14 deletions.
9 changes: 5 additions & 4 deletions metamorphic/ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -652,10 +652,11 @@ func buildForIngest(

equal := t.opts.Comparer.Equal
tableFormat := db.FormatMajorVersion().MaxTableFormat()
w := sstable.NewWriter(
objstorageprovider.NewFileWritable(f),
t.opts.MakeWriterOptions(0, tableFormat),
)
wOpts := t.opts.MakeWriterOptions(0, tableFormat)
if t.testOpts.disableValueBlocksForIngestSSTables {
wOpts.DisableValueBlocks = true
}
w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), wOpts)

var lastUserKey []byte
for key, value := iter.First(); key != nil; key, value = iter.Next() {
Expand Down
9 changes: 9 additions & 0 deletions metamorphic/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ func parseOptions(
opts.enableValueBlocks = true
opts.Opts.Experimental.EnableValueBlocks = func() bool { return true }
return true
case "TestOptions.disable_value_blocks_for_ingest_sstables":
opts.disableValueBlocksForIngestSSTables = true
return true
case "TestOptions.async_apply_to_db":
opts.asyncApplyToDB = true
return true
Expand Down Expand Up @@ -188,6 +191,9 @@ func optionsToString(opts *TestOptions) string {
if opts.enableValueBlocks {
fmt.Fprintf(&buf, " enable_value_blocks=%t\n", opts.enableValueBlocks)
}
if opts.disableValueBlocksForIngestSSTables {
fmt.Fprintf(&buf, " disable_value_blocks_for_ingest_sstables=%t\n", opts.disableValueBlocksForIngestSSTables)
}
if opts.asyncApplyToDB {
fmt.Fprint(&buf, " async_apply_to_db=true\n")
}
Expand Down Expand Up @@ -287,6 +293,8 @@ type TestOptions struct {
disableBlockPropertyCollector bool
// Enable the use of value blocks.
enableValueBlocks bool
// Disables value blocks in the sstables written for ingest.
disableValueBlocksForIngestSSTables bool
// Use DB.ApplyNoSyncWait for applies that want to sync the WAL.
asyncApplyToDB bool
// Enable the use of shared storage.
Expand Down Expand Up @@ -618,6 +626,7 @@ func RandomOptions(
if testOpts.enableValueBlocks {
testOpts.Opts.Experimental.EnableValueBlocks = func() bool { return true }
}
testOpts.disableValueBlocksForIngestSSTables = rng.Intn(2) == 0
testOpts.asyncApplyToDB = rng.Intn(2) != 0
// 20% of time, enable shared storage.
if rng.Intn(5) == 0 {
Expand Down
11 changes: 11 additions & 0 deletions sstable/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,17 @@ type WriterOptions struct {
// RequiredInPlaceValueBound mirrors
// Options.Experimental.RequiredInPlaceValueBound.
RequiredInPlaceValueBound UserKeyPrefixBound

// DisableValueBlocks is only used for TableFormat >= TableFormatPebblev3,
// and if set to true, does not write any values to value blocks. This is
// only intended for cases where the in-memory buffering of all value blocks
// while writing a sstable is too expensive and likely to cause an OOM. It
// is never set to true by a Pebble DB, and can be set to true when some
// external code is directly generating huge sstables using Pebble's
// sstable.Writer (for example, CockroachDB backups can sometimes write
// 750MB sstables -- see
// https://github.com/cockroachdb/cockroach/issues/117113).
DisableValueBlocks bool
}

func (o WriterOptions) ensureDefaults() WriterOptions {
Expand Down
26 changes: 26 additions & 0 deletions sstable/testdata/writer_value_blocks
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,32 @@ scan-cloned-lazy-values
3(in-place: len 4): bat3
4(lazy: len 5, attr: 5): vbat2

# Same data as previous, with disable-value-blocks set to true
build disable-value-blocks=true
[email protected]:a2
[email protected]:b5
[email protected]:
[email protected]:bat3
[email protected]:vbat2
----
value-blocks: num-values 0, num-blocks: 0, size: 0

scan-raw
----
a@2#1,1:in-place a2, same-pre false
b@5#7,1:in-place b5, same-pre false
b@4#3,0:
b@3#2,1:in-place bat3, same-pre false
b@2#1,1:in-place vbat2, same-pre true

scan
----
a@2#1,1:a2
b@5#7,1:b5
b@4#3,0:
b@3#2,1:bat3
b@2#1,1:vbat2

# Size of value index is 3 bytes plus 5 + 5 = 10 bytes of trailer of the value
# block and value index block. So size 33 - 13 = 20 is the total size of the
# values in the value block.
Expand Down
24 changes: 14 additions & 10 deletions sstable/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,9 @@ type Writer struct {
// For value blocks.
shortAttributeExtractor base.ShortAttributeExtractor
requiredInPlaceValueBound UserKeyPrefixBound
valueBlockWriter *valueBlockWriter
// When w.tableFormat >= TableFormatPebblev3, valueBlockWriter is nil iff
// WriterOptions.DisableValueBlocks was true.
valueBlockWriter *valueBlockWriter
}

type pointKeyInfo struct {
Expand Down Expand Up @@ -803,6 +805,7 @@ func (w *Writer) getLastPointUserKey() []byte {
return w.dataBlockBuf.dataBlock.getCurUserKey()
}

// REQUIRES: w.tableFormat >= TableFormatPebblev3
func (w *Writer) makeAddPointDecisionV3(
key InternalKey, valueLen int,
) (setHasSamePrefix bool, writeToValueBlock bool, isObsolete bool, err error) {
Expand Down Expand Up @@ -912,14 +915,13 @@ func (w *Writer) makeAddPointDecisionV3(
// NB: it is possible that cmpUser == 0, i.e., these two SETs have identical
// user keys (because of an open snapshot). This should be the rare case.
setHasSamePrefix = cmpPrefix == 0
considerWriteToValueBlock = setHasSamePrefix
// Use of 0 here is somewhat arbitrary. Given the minimum 3 byte encoding of
// valueHandle, this should be > 3. But tiny values are common in test and
// unlikely in production, so we use 0 here for better test coverage.
const tinyValueThreshold = 0
if considerWriteToValueBlock && valueLen <= tinyValueThreshold {
considerWriteToValueBlock = false
}
// NB: setting WriterOptions.DisableValueBlocks does not disable the
// setHasSamePrefix optimization.
considerWriteToValueBlock = setHasSamePrefix && valueLen > tinyValueThreshold && w.valueBlockWriter != nil
return setHasSamePrefix, considerWriteToValueBlock, isObsolete, nil
}

Expand All @@ -931,7 +933,7 @@ func (w *Writer) addPoint(key InternalKey, value []byte, forceObsolete bool) err
var setHasSameKeyPrefix, writeToValueBlock, addPrefixToValueStoredWithKey bool
var isObsolete bool
maxSharedKeyLen := len(key.UserKey)
if w.valueBlockWriter != nil {
if w.tableFormat >= TableFormatPebblev3 {
// maxSharedKeyLen is limited to the prefix of the preceding key. If the
// preceding key was in a different block, then the blockWriter will
// ignore this maxSharedKeyLen.
Expand Down Expand Up @@ -2218,10 +2220,12 @@ func NewWriter(writable objstorage.Writable, o WriterOptions, extraOpts ...Write
if w.tableFormat >= TableFormatPebblev3 {
w.shortAttributeExtractor = o.ShortAttributeExtractor
w.requiredInPlaceValueBound = o.RequiredInPlaceValueBound
w.valueBlockWriter = newValueBlockWriter(
w.blockSize, w.blockSizeThreshold, w.compression, w.checksumType, func(compressedSize int) {
w.coordination.sizeEstimate.dataBlockCompressed(compressedSize, 0)
})
if !o.DisableValueBlocks {
w.valueBlockWriter = newValueBlockWriter(
w.blockSize, w.blockSizeThreshold, w.compression, w.checksumType, func(compressedSize int) {
w.coordination.sizeEstimate.dataBlockCompressed(compressedSize, 0)
})
}
}

w.dataBlockBuf = newDataBlockBuf(w.restartInterval, w.checksumType)
Expand Down
5 changes: 5 additions & 0 deletions sstable/writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,13 +282,18 @@ func TestWriterWithValueBlocks(t *testing.T) {
inPlaceValueBound.Lower = []byte(l)
inPlaceValueBound.Upper = []byte(u)
}
var disableValueBlocks bool
if td.HasArg("disable-value-blocks") {
td.ScanArgs(t, "disable-value-blocks", &disableValueBlocks)
}
meta, r, err = runBuildCmd(td, &WriterOptions{
BlockSize: blockSize,
Comparer: testkeys.Comparer,
TableFormat: formatVersion,
Parallelism: parallelism,
RequiredInPlaceValueBound: inPlaceValueBound,
ShortAttributeExtractor: attributeExtractor,
DisableValueBlocks: disableValueBlocks,
}, 0)
if err != nil {
return err.Error()
Expand Down

0 comments on commit aba7c0c

Please sign in to comment.