Skip to content

Commit

Permalink
SSTable lazy retrieval, 15% speedup
Browse files Browse the repository at this point in the history
  • Loading branch information
tomfran committed Oct 2, 2023
1 parent 281f87d commit 826d331
Show file tree
Hide file tree
Showing 19 changed files with 230 additions and 227 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ The steps are:
1. Use the Bloom filter to test whether the key might be in the table;
2. If the key might be present, use binary search on the index to find the maximum lower bound of the key;
3. Scan the data from the position found in the previous step to find the key-value pair. The search
can stop when the we are seeing a key greater than the one we are looking for.
can stop when we are seeing a key greater than the one we are looking for, or when we reach the end of the table.

The search is as lazy as possible, meaning that we read the minimum amount of data from disk,
for instance, if the next key length is smaller than the one we are looking for, we can skip the whole key-value pair.

### Persistence

Expand All @@ -52,6 +55,7 @@ A table is persisted to disk when it is created. A base filename is defined, and
- `n`: number of entries in the index;
- `o_1, o_2 - o_1, ..., o_n - o_n-1`: offsets of the key-value pairs in the data file, skipping
the first one;
- `s_1, s_2, ..., s_n`: remaining keys after a sparse index entry, used to exit from search;
- `<key_len_1, key_1, ... key_len_n, key_n>`: keys in the index.

**Filter format**
Expand Down
12 changes: 6 additions & 6 deletions src/jmh/java/com/tomfran/lsm/memtable/SkipListBenchmark.java
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
package com.tomfran.lsm.memtable;

import com.tomfran.lsm.types.Item;
import com.tomfran.lsm.types.ByteArrayPair;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;

import java.util.concurrent.TimeUnit;

import static com.tomfran.lsm.TestUtils.getRandomItem;
import static com.tomfran.lsm.TestUtils.getRandomPair;

@OutputTimeUnit(TimeUnit.SECONDS)
@State(Scope.Benchmark)
public class SkipListBenchmark {

SkipList l;
Item[] items;
ByteArrayPair[] items;

int NUM_ITEMS = 200000;
int index = 0;
Expand All @@ -27,16 +27,16 @@ public void setup() {
l = new SkipList(NUM_ITEMS / 2);

// generate random items and insert half
ObjectArrayList<Item> tmp = new ObjectArrayList<>();
ObjectArrayList<ByteArrayPair> tmp = new ObjectArrayList<>();
for (int i = 0; i < NUM_ITEMS; i++) {
var it = getRandomItem();
var it = getRandomPair();
if (i < NUM_ITEMS / 2)
l.add(it);

tmp.add(it);
}

items = tmp.toArray(new Item[0]);
items = tmp.toArray(new ByteArrayPair[0]);

// generate sequence of add/remove operations
addRemove = new boolean[NUM_ITEMS];
Expand Down
20 changes: 10 additions & 10 deletions src/jmh/java/com/tomfran/lsm/sstable/SSTableBenchmark.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.tomfran.lsm.sstable;

import com.tomfran.lsm.comparator.ByteArrayComparator;
import com.tomfran.lsm.types.Item;
import com.tomfran.lsm.types.ByteArrayPair;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
import org.openjdk.jmh.annotations.*;
Expand All @@ -13,7 +13,7 @@
import java.util.Collections;
import java.util.concurrent.TimeUnit;

import static com.tomfran.lsm.TestUtils.getRandomItem;
import static com.tomfran.lsm.TestUtils.getRandomPair;

@OutputTimeUnit(TimeUnit.SECONDS)
@State(Scope.Benchmark)
Expand All @@ -23,8 +23,8 @@ public class SSTableBenchmark {
static final int NUM_ITEMS = 100000;
static final int SAMPLE_SIZE = NUM_ITEMS / 1000;

static Item[] insertedArray;
static Item[] skippedArray;
static ByteArrayPair[] insertedArray;
static ByteArrayPair[] skippedArray;
static SSTable sstable;

static int index = 0;
Expand All @@ -38,18 +38,18 @@ public void setup() throws IOException {
Files.createDirectory(DIR);

// generate random items
var l = new ObjectOpenHashSet<Item>();
var l = new ObjectOpenHashSet<ByteArrayPair>();
for (int i = 0; i < NUM_ITEMS * 2; i++) {
l.add(getRandomItem());
l.add(getRandomPair());
}

// sort and divide into inserted and skipped
var items = l.stream()
.sorted((a, b) -> ByteArrayComparator.compare(a.key(), b.key()))
.toList();

var inserted = new ObjectArrayList<Item>();
var skipped = new ObjectArrayList<Item>();
var inserted = new ObjectArrayList<ByteArrayPair>();
var skipped = new ObjectArrayList<ByteArrayPair>();

for (int i = 0; i < items.size(); i++) {
var e = items.get(i);
Expand All @@ -64,8 +64,8 @@ public void setup() throws IOException {
// shuffle to avoid sequential access
Collections.shuffle(inserted);
Collections.shuffle(skipped);
insertedArray = inserted.toArray(Item[]::new);
skippedArray = skipped.toArray(Item[]::new);
insertedArray = inserted.toArray(ByteArrayPair[]::new);
skippedArray = skipped.toArray(ByteArrayPair[]::new);
}

@TearDown
Expand Down
32 changes: 32 additions & 0 deletions src/main/java/com/tomfran/lsm/io/BaseInputStream.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.tomfran.lsm.io;

import com.tomfran.lsm.types.ByteArrayPair;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;

import java.io.FileInputStream;
Expand Down Expand Up @@ -67,6 +68,37 @@ public byte[] readNBytes(int n) {
}
}

public ByteArrayPair readBytePair() {
try {
int keyLength = readVByteInt();
int valueLength = readVByteInt();

return new ByteArrayPair(
readNBytes(keyLength),
readNBytes(valueLength)
);
} catch (Exception e) {
return null;
}
}

public byte[] readByteArray() {
try {
int len = readVByteInt();
return fis.readNBytes(len);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

public long skip(int n) {
try {
return fis.skip(n);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

public void seek(long offset) {
try {
fis.position(offset);
Expand Down
15 changes: 15 additions & 0 deletions src/main/java/com/tomfran/lsm/io/BaseOutputStream.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.tomfran.lsm.io;

import com.tomfran.lsm.types.ByteArrayPair;
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;

import java.io.FileOutputStream;
Expand Down Expand Up @@ -39,6 +40,20 @@ public int writeLong(long n) {
return write(longToBytes(n));
}

public int writeBytePair(ByteArrayPair pair) {
byte[] key = pair.key(), value = pair.value();
byte[] keyBytes = intToVByte(key.length), valueBytes = intToVByte(value.length);

byte[] result = new byte[keyBytes.length + valueBytes.length + key.length + value.length];

System.arraycopy(keyBytes, 0, result, 0, keyBytes.length);
System.arraycopy(valueBytes, 0, result, keyBytes.length, valueBytes.length);

System.arraycopy(key, 0, result, keyBytes.length + valueBytes.length, key.length);
System.arraycopy(value, 0, result, keyBytes.length + valueBytes.length + key.length, value.length);
return write(result);
}

byte[] intToVByte(int n) {
return longToVByte(n);
}
Expand Down
25 changes: 0 additions & 25 deletions src/main/java/com/tomfran/lsm/io/ItemsInputStream.java

This file was deleted.

29 changes: 0 additions & 29 deletions src/main/java/com/tomfran/lsm/io/ItemsOutputStream.java

This file was deleted.

8 changes: 4 additions & 4 deletions src/main/java/com/tomfran/lsm/memtable/Memtable.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.tomfran.lsm.memtable;

import com.tomfran.lsm.sstable.SSTable;
import com.tomfran.lsm.types.Item;
import com.tomfran.lsm.types.ByteArrayPair;

public class Memtable {

Expand All @@ -17,16 +17,16 @@ public Memtable(int numElements) {
list = new SkipList(numElements);
}

public void add(Item item) {
public void add(ByteArrayPair item) {
list.add(item);
}

public Item get(byte[] key) {
public ByteArrayPair get(byte[] key) {
return list.get(key);
}

public void remove(byte[] key) {
list.add(new Item(key, null));
list.add(new ByteArrayPair(key, null));
}

public int size() {
Expand Down
18 changes: 9 additions & 9 deletions src/main/java/com/tomfran/lsm/memtable/SkipList.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package com.tomfran.lsm.memtable;

import com.tomfran.lsm.types.Item;
import com.tomfran.lsm.types.ByteArrayPair;
import it.unimi.dsi.util.XoRoShiRo128PlusRandom;

import java.util.Iterator;
Expand All @@ -12,7 +12,7 @@
/**
* A skip list implementation of items.
*/
public class SkipList implements Iterable<Item> {
public class SkipList implements Iterable<ByteArrayPair> {

static final int DEFAULT_ELEMENTS = 1 << 16;

Expand Down Expand Up @@ -49,7 +49,7 @@ public SkipList(int numElements) {
*
* @param item The item to add.
*/
public void add(Item item) {
public void add(ByteArrayPair item) {
Node current = sentinel;
for (int i = levels - 1; i >= 0; i--) {
while (current.next[i] != null && current.next[i].value.compareTo(item) < 0)
Expand Down Expand Up @@ -83,7 +83,7 @@ private int randomLevel() {
* @param key The key of the item to retrieve.
* @return The item if found, null otherwise.
*/
public Item get(byte[] key) {
public ByteArrayPair get(byte[] key) {
Node current = sentinel;
for (int i = levels - 1; i >= 0; i--) {
while (current.next[i] != null && compare(current.next[i].value.key(), key) < 0)
Expand Down Expand Up @@ -135,7 +135,7 @@ public int size() {
* @return An iterator over the items in the skip list.
*/
@Override
public Iterator<Item> iterator() {
public Iterator<ByteArrayPair> iterator() {
return new SkipListIterator(sentinel);
}

Expand All @@ -155,24 +155,24 @@ public String toString() {
}

private static final class Node {
Item value;
ByteArrayPair value;
Node[] next;

Node(Item value, int numLevels) {
Node(ByteArrayPair value, int numLevels) {
this.value = value;
this.next = new Node[numLevels];
}
}

private record SkipListIterator(Node node) implements Iterator<Item> {
private record SkipListIterator(Node node) implements Iterator<ByteArrayPair> {

@Override
public boolean hasNext() {
return node.next[0] != null;
}

@Override
public Item next() {
public ByteArrayPair next() {
return node.next[0].value;
}
}
Expand Down
Loading

0 comments on commit 826d331

Please sign in to comment.