From b013ca78e9dc2b468d60f8a6b46ae7fccc488acc Mon Sep 17 00:00:00 2001 From: Arjun Nair Date: Sun, 1 Oct 2023 20:51:39 -0400 Subject: [PATCH] db: keep track of virtual sstable size sum The virtual sstable size sum is useful because it along with the backing table size can be used to compute the space amp associated with virtual sstables. --- internal/manifest/level_metadata.go | 29 +++++++++++--- metrics.go | 27 +++++++++++++ metrics_test.go | 10 ++++- testdata/event_listener | 2 + testdata/ingest | 1 + testdata/metrics | 60 ++++++++++++++++++++++++----- tool/testdata/db_lsm | 1 + version_set.go | 7 ++++ 8 files changed, 122 insertions(+), 15 deletions(-) diff --git a/internal/manifest/level_metadata.go b/internal/manifest/level_metadata.go index 1af510f820..d48e277900 100644 --- a/internal/manifest/level_metadata.go +++ b/internal/manifest/level_metadata.go @@ -19,17 +19,20 @@ type LevelMetadata struct { totalSize uint64 // NumVirtual is the number of virtual sstables in the level. NumVirtual uint64 - tree btree + // VirtualSize is the size of the virtual sstables in the level. + VirtualSize uint64 + tree btree } // clone makes a copy of the level metadata, implicitly increasing the ref // count of every file contained within lm. func (lm *LevelMetadata) clone() LevelMetadata { return LevelMetadata{ - level: lm.level, - totalSize: lm.totalSize, - NumVirtual: lm.NumVirtual, - tree: lm.tree.Clone(), + level: lm.level, + totalSize: lm.totalSize, + NumVirtual: lm.NumVirtual, + VirtualSize: lm.VirtualSize, + tree: lm.tree.Clone(), } } @@ -49,6 +52,7 @@ func makeLevelMetadata(cmp Compare, level int, files []*FileMetadata) LevelMetad lm.totalSize += f.Size if f.Virtual { lm.NumVirtual++ + lm.VirtualSize += f.Size } } return lm @@ -70,6 +74,7 @@ func (lm *LevelMetadata) insert(f *FileMetadata) error { lm.totalSize += f.Size if f.Virtual { lm.NumVirtual++ + lm.VirtualSize += f.Size } return nil } @@ -78,6 +83,7 @@ func (lm *LevelMetadata) remove(f *FileMetadata) bool { lm.totalSize -= f.Size if f.Virtual { lm.NumVirtual-- + lm.VirtualSize -= f.Size } return lm.tree.Delete(f) } @@ -328,6 +334,19 @@ func (ls *LevelSlice) NumVirtual() uint64 { return n } +// VirtualSizeSum returns the sum of the sizes of the virtual sstables in the +// level. +func (ls *LevelSlice) VirtualSizeSum() uint64 { + var sum uint64 + iter := ls.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if f.Virtual { + sum += f.Size + } + } + return sum +} + // Reslice constructs a new slice backed by the same underlying level, with // new start and end positions. Reslice invokes the provided function, passing // two LevelIterators: one positioned to i's inclusive start and one diff --git a/metrics.go b/metrics.go index 038d023b84..cab929114d 100644 --- a/metrics.go +++ b/metrics.go @@ -48,6 +48,8 @@ type LevelMetrics struct { NumVirtualFiles uint64 // The total size in bytes of the files in the level. Size int64 + // The total size of the virtual sstables in the level. + VirtualSize uint64 // The level's compaction score. This is the compensatedScoreRatio in the // candidateLevelInfo. Score float64 @@ -110,6 +112,7 @@ type LevelMetrics struct { func (m *LevelMetrics) Add(u *LevelMetrics) { m.NumFiles += u.NumFiles m.NumVirtualFiles += u.NumVirtualFiles + m.VirtualSize += u.VirtualSize m.Size += u.Size m.BytesIn += u.BytesIn m.BytesIngested += u.BytesIngested @@ -334,6 +337,27 @@ func (m *Metrics) DiskSpaceUsage() uint64 { return usageBytes } +// NumVirtual is the number of virtual sstables in the latest version +// summed over every level in the lsm. +func (m *Metrics) NumVirtual() uint64 { + var n uint64 + for _, level := range m.Levels { + n += level.NumVirtualFiles + } + return n +} + +// VirtualSize is the sum of the sizes of the virtual sstables in the +// latest version. BackingTableSize - VirtualSize gives an estimate for +// the space amplification caused by not compacting virtual sstables. +func (m *Metrics) VirtualSize() uint64 { + var size uint64 + for _, level := range m.Levels { + size += level.VirtualSize + } + return size +} + // ReadAmp returns the current read amplification of the database. // It's computed as the number of sublevels in L0 + the number of non-empty // levels below L0. @@ -540,6 +564,9 @@ func (m *Metrics) SafeFormat(w redact.SafePrinter, _ rune) { w.Printf("Backing tables: %d (%s)\n", redact.Safe(m.Table.BackingTableCount), humanize.Bytes.Uint64(m.Table.BackingTableSize)) + w.Printf("Virtual tables: %d (%s)\n", + redact.Safe(m.NumVirtual()), + humanize.Bytes.Uint64(m.VirtualSize())) formatCacheMetrics := func(m *CacheMetrics, name redact.SafeString) { w.Printf("%s: %s entries (%s) hit rate: %.1f%%\n", diff --git a/metrics_test.go b/metrics_test.go index 5749940181..d9025a5eff 100644 --- a/metrics_test.go +++ b/metrics_test.go @@ -69,7 +69,8 @@ func exampleMetrics() Metrics { base := uint64((i + 1) * 100) l.Sublevels = int32(i + 1) l.NumFiles = int64(base) + 1 - l.NumVirtualFiles = uint64(base) / 2 + l.NumVirtualFiles = uint64(base) + 1 + l.VirtualSize = base + 3 l.Size = int64(base) + 2 l.Score = float64(base) + 3 l.BytesIn = base + 4 @@ -264,8 +265,15 @@ func TestMetrics(t *testing.T) { buf.WriteString(fmt.Sprintf("%d\n", m.Table.BackingTableCount)) } else if line == "backing-size" { buf.WriteString(fmt.Sprintf("%s\n", humanize.Bytes.Uint64(m.Table.BackingTableSize))) + } else if line == "virtual-size" { + buf.WriteString(fmt.Sprintf("%s\n", humanize.Bytes.Uint64(m.VirtualSize()))) } else if strings.HasPrefix(line, "num-virtual") { splits := strings.Split(line, " ") + if len(splits) == 1 { + buf.WriteString(fmt.Sprintf("%d\n", m.NumVirtual())) + continue + } + // Level is specified. l, err := strconv.Atoi(splits[1]) if err != nil { panic(err) diff --git a/testdata/event_listener b/testdata/event_listener index de90d51a12..2922840717 100644 --- a/testdata/event_listener +++ b/testdata/event_listener @@ -287,6 +287,7 @@ Compactions: 1 estimated debt: 2.0KB in progress: 0 (0B) MemTables: 1 (256KB) zombie: 1 (256KB) Zombie tables: 0 (0B) Backing tables: 0 (0B) +Virtual tables: 0 (0B) Block cache: 6 entries (1.1KB) hit rate: 11.1% Table cache: 1 entries (800B) hit rate: 40.0% Secondary cache: 0 entries (0B) hit rate: 0.0% @@ -385,6 +386,7 @@ Compactions: 1 estimated debt: 4.0KB in progress: 0 (0B) MemTables: 1 (512KB) zombie: 1 (512KB) Zombie tables: 0 (0B) Backing tables: 0 (0B) +Virtual tables: 0 (0B) Block cache: 12 entries (2.3KB) hit rate: 14.3% Table cache: 1 entries (800B) hit rate: 50.0% Secondary cache: 0 entries (0B) hit rate: 0.0% diff --git a/testdata/ingest b/testdata/ingest index 4500b79588..ee30ab9761 100644 --- a/testdata/ingest +++ b/testdata/ingest @@ -50,6 +50,7 @@ Compactions: 0 estimated debt: 0B in progress: 0 (0B) MemTables: 1 (256KB) zombie: 0 (0B) Zombie tables: 0 (0B) Backing tables: 0 (0B) +Virtual tables: 0 (0B) Block cache: 6 entries (1.2KB) hit rate: 35.7% Table cache: 1 entries (800B) hit rate: 50.0% Secondary cache: 0 entries (0B) hit rate: 0.0% diff --git a/testdata/metrics b/testdata/metrics index 088677dc4f..cfaf560509 100644 --- a/testdata/metrics +++ b/testdata/metrics @@ -3,14 +3,14 @@ example | | | | ingested | moved | written | | amp | multilevel level | tables size val-bl vtables | score | in | tables size | tables size | tables size | read | r w | top in read ------+-----------------------------+-------+-------+--------------+--------------+--------------+-------+----------+------------------ - 0 | 101 102B 0B 50 | 103.0 | 104B | 112 104B | 113 106B | 221 217B | 107B | 1 2.1 | 104B 104B 104B - 1 | 201 202B 0B 100 | 203.0 | 204B | 212 204B | 213 206B | 421 417B | 207B | 2 2.0 | 204B 204B 204B - 2 | 301 302B 0B 150 | 303.0 | 304B | 312 304B | 313 306B | 621 617B | 307B | 3 2.0 | 304B 304B 304B - 3 | 401 402B 0B 200 | 403.0 | 404B | 412 404B | 413 406B | 821 817B | 407B | 4 2.0 | 404B 404B 404B - 4 | 501 502B 0B 250 | 503.0 | 504B | 512 504B | 513 506B | 1.0K 1017B | 507B | 5 2.0 | 504B 504B 504B - 5 | 601 602B 0B 300 | 603.0 | 604B | 612 604B | 613 606B | 1.2K 1.2KB | 607B | 6 2.0 | 604B 604B 604B - 6 | 701 702B 0B 350 | - | 704B | 712 704B | 713 706B | 1.4K 1.4KB | 707B | 7 2.0 | 704B 704B 704B -total | 2.8K 2.7KB 0B 1.4K | - | 2.8KB | 2.9K 2.8KB | 2.9K 2.8KB | 5.7K 8.4KB | 2.8KB | 28 3.0 | 2.8KB 2.8KB 2.8KB + 0 | 101 102B 0B 101 | 103.0 | 104B | 112 104B | 113 106B | 221 217B | 107B | 1 2.1 | 104B 104B 104B + 1 | 201 202B 0B 201 | 203.0 | 204B | 212 204B | 213 206B | 421 417B | 207B | 2 2.0 | 204B 204B 204B + 2 | 301 302B 0B 301 | 303.0 | 304B | 312 304B | 313 306B | 621 617B | 307B | 3 2.0 | 304B 304B 304B + 3 | 401 402B 0B 401 | 403.0 | 404B | 412 404B | 413 406B | 821 817B | 407B | 4 2.0 | 404B 404B 404B + 4 | 501 502B 0B 501 | 503.0 | 504B | 512 504B | 513 506B | 1.0K 1017B | 507B | 5 2.0 | 504B 504B 504B + 5 | 601 602B 0B 601 | 603.0 | 604B | 612 604B | 613 606B | 1.2K 1.2KB | 607B | 6 2.0 | 604B 604B 604B + 6 | 701 702B 0B 701 | - | 704B | 712 704B | 713 706B | 1.4K 1.4KB | 707B | 7 2.0 | 704B 704B 704B +total | 2.8K 2.7KB 0B 2.8K | - | 2.8KB | 2.9K 2.8KB | 2.9K 2.8KB | 5.7K 8.4KB | 2.8KB | 28 3.0 | 2.8KB 2.8KB 2.8KB --------------------------------------------------------------------------------------------------------------------------------------- WAL: 22 files (24B) in: 25B written: 26B (4% overhead) Flushes: 8 @@ -19,6 +19,7 @@ Compactions: 5 estimated debt: 6B in progress: 2 (7B) MemTables: 12 (11B) zombie: 14 (13B) Zombie tables: 16 (15B) Backing tables: 1 (2.0MB) +Virtual tables: 2807 (2.8KB) Block cache: 2 entries (1B) hit rate: 42.9% Table cache: 18 entries (17B) hit rate: 48.7% Secondary cache: 0 entries (0B) hit rate: 0.0% @@ -65,6 +66,7 @@ Compactions: 0 estimated debt: 0B in progress: 0 (0B) MemTables: 1 (256KB) zombie: 1 (256KB) Zombie tables: 0 (0B) Backing tables: 0 (0B) +Virtual tables: 0 (0B) Block cache: 3 entries (556B) hit rate: 0.0% Table cache: 1 entries (800B) hit rate: 0.0% Secondary cache: 0 entries (0B) hit rate: 0.0% @@ -118,6 +120,7 @@ Compactions: 1 estimated debt: 0B in progress: 0 (0B) MemTables: 1 (256KB) zombie: 2 (512KB) Zombie tables: 2 (1.3KB) Backing tables: 0 (0B) +Virtual tables: 0 (0B) Block cache: 5 entries (1.1KB) hit rate: 42.9% Table cache: 2 entries (1.6KB) hit rate: 66.7% Secondary cache: 0 entries (0B) hit rate: 0.0% @@ -156,6 +159,7 @@ Compactions: 1 estimated debt: 0B in progress: 0 (0B) MemTables: 1 (256KB) zombie: 2 (512KB) Zombie tables: 2 (1.3KB) Backing tables: 0 (0B) +Virtual tables: 0 (0B) Block cache: 5 entries (1.1KB) hit rate: 42.9% Table cache: 2 entries (1.6KB) hit rate: 66.7% Secondary cache: 0 entries (0B) hit rate: 0.0% @@ -191,6 +195,7 @@ Compactions: 1 estimated debt: 0B in progress: 0 (0B) MemTables: 1 (256KB) zombie: 2 (512KB) Zombie tables: 1 (661B) Backing tables: 0 (0B) +Virtual tables: 0 (0B) Block cache: 3 entries (556B) hit rate: 42.9% Table cache: 1 entries (800B) hit rate: 66.7% Secondary cache: 0 entries (0B) hit rate: 0.0% @@ -229,6 +234,7 @@ Compactions: 1 estimated debt: 0B in progress: 0 (0B) MemTables: 1 (256KB) zombie: 1 (256KB) Zombie tables: 0 (0B) Backing tables: 0 (0B) +Virtual tables: 0 (0B) Block cache: 0 entries (0B) hit rate: 42.9% Table cache: 0 entries (0B) hit rate: 66.7% Secondary cache: 0 entries (0B) hit rate: 0.0% @@ -293,6 +299,7 @@ Compactions: 1 estimated debt: 2.9KB in progress: 0 (0B) MemTables: 1 (256KB) zombie: 1 (256KB) Zombie tables: 0 (0B) Backing tables: 0 (0B) +Virtual tables: 0 (0B) Block cache: 0 entries (0B) hit rate: 42.9% Table cache: 0 entries (0B) hit rate: 66.7% Secondary cache: 0 entries (0B) hit rate: 0.0% @@ -341,6 +348,7 @@ Compactions: 2 estimated debt: 0B in progress: 0 (0B) MemTables: 1 (256KB) zombie: 1 (256KB) Zombie tables: 0 (0B) Backing tables: 0 (0B) +Virtual tables: 0 (0B) Block cache: 0 entries (0B) hit rate: 27.3% Table cache: 0 entries (0B) hit rate: 58.3% Secondary cache: 0 entries (0B) hit rate: 0.0% @@ -435,6 +443,7 @@ Compactions: 2 estimated debt: 5.0KB in progress: 0 (0B) MemTables: 1 (1.0MB) zombie: 1 (1.0MB) Zombie tables: 0 (0B) Backing tables: 0 (0B) +Virtual tables: 0 (0B) Block cache: 12 entries (2.4KB) hit rate: 31.1% Table cache: 3 entries (2.3KB) hit rate: 57.9% Secondary cache: 0 entries (0B) hit rate: 0.0% @@ -494,15 +503,20 @@ lsm 000014:[c@15#0,SET-c@14#0,SET] 000028:[z#32,SET-z#32,SET] -# There should be 2 backing tables. +# There should be 2 backing tables. Note that tiny sstables have inaccurate +# virtual sstable sizes. metrics-value num-backing backing-size +num-virtual num-virtual 0 +virtual-size ---- 2 1.3KB 2 +2 +102B # Virtualize a virtual sstable. build ext1 @@ -535,8 +549,36 @@ lsm metrics-value num-backing backing-size +num-virtual num-virtual 0 +virtual-size ---- 2 1.3KB 2 +2 +102B + +compact a-z +---- +6: + 000008:[a#0,SET-b#0,SET] + 000013:[c@20#0,SET-c@16#0,SET] + 000014:[c@15#0,SET-c@14#0,SET] + 000033:[d#0,SET-m#0,SET] + 000028:[z#32,SET-z#32,SET] + 000031:[zz#33,SET-zz#33,SET] + +# Virtual sstables metrics should be gone after the compaction. +metrics-value +num-backing +backing-size +num-virtual +num-virtual 0 +virtual-size +---- +0 +0B +0 +0 +0B diff --git a/tool/testdata/db_lsm b/tool/testdata/db_lsm index d1e1838961..f35bf88103 100644 --- a/tool/testdata/db_lsm +++ b/tool/testdata/db_lsm @@ -29,6 +29,7 @@ Compactions: 0 estimated debt: 0B in progress: 0 (0B) MemTables: 1 (256KB) zombie: 0 (0B) Zombie tables: 0 (0B) Backing tables: 0 (0B) +Virtual tables: 0 (0B) Block cache: 0 entries (0B) hit rate: 0.0% Table cache: 0 entries (0B) hit rate: 0.0% Secondary cache: 0 entries (0B) hit rate: 0.0% diff --git a/version_set.go b/version_set.go index 4a9dc292ed..46f2372306 100644 --- a/version_set.go +++ b/version_set.go @@ -619,6 +619,7 @@ func (vs *versionSet) logAndApply( l := &vs.metrics.Levels[i] l.NumFiles = int64(newVersion.Levels[i].Len()) l.NumVirtualFiles = newVersion.Levels[i].NumVirtual + l.VirtualSize = newVersion.Levels[i].VirtualSize l.Size = int64(newVersion.Levels[i].Size()) l.Sublevels = 0 @@ -636,6 +637,12 @@ func (vs *versionSet) logAndApply( i, l.NumVirtualFiles, nVirtual, ) } + if vSize := levelFiles.VirtualSizeSum(); vSize != l.VirtualSize { + vs.opts.Logger.Fatalf( + "versionSet metrics L%d Virtual size = %d, actual size = %d", + i, l.VirtualSize, vSize, + ) + } } } vs.metrics.Levels[0].Sublevels = int32(len(newVersion.L0SublevelFiles))