Skip to content

Commit

Permalink
LSM tree first draft, flushing not working
Browse files Browse the repository at this point in the history
  • Loading branch information
tomfran committed Oct 5, 2023
1 parent 8fb335b commit 4b37d33
Show file tree
Hide file tree
Showing 18 changed files with 445 additions and 68 deletions.
32 changes: 21 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,19 @@ the operation on the node. All of them have an average time complexity of `O(log
I am using [JMH](https://openjdk.java.net/projects/code-tools/jmh/) to run benchmarks,
the results are obtained on a MacBook Pro (16-inch, 2021) with an M1 Pro processor and 16 GB of RAM.

To run them use `./gradlew jmh`.

### SSTable

- Negative access: the key is not present in the table, hence the Bloom filter will likely stop the search;
- Random access: the key is present in the table, the order of the keys is random.

```
Benchmark Mode Cnt Score Error Units
c.t.l.sstable.SSTableBenchmark.negativeAccess thrpt 10 3541989.316 ± 78933.780 ops/s
c.t.l.sstable.SSTableBenchmark.randomAccess thrpt 10 56157.613 ± 264.314 ops/s
Benchmark Mode Cnt Score Error Units
c.t.l.sstable.SSTableBenchmark.negativeAccess thrpt 10 3541989.316 ± 78933.780 ops/s
c.t.l.sstable.SSTableBenchmark.randomAccess thrpt 10 56157.613 ± 264.314 ops/s
```

### Bloom filter
Expand All @@ -121,9 +125,11 @@ c.t.l.sstable.SSTableBenchmark.randomAccess thrpt 10 56157.613 ± 26
- Contains: test whether the keys are present in the Bloom filter.

```
Benchmark Mode Cnt Score Error Units
c.t.l.bloom.BloomFilterBenchmark.add thrpt 10 9777191.526 ± 168208.916 ops/s
c.t.l.bloom.BloomFilterBenchmark.contains thrpt 10 10724196.205 ± 20411.741 ops/s
Benchmark Mode Cnt Score Error Units
c.t.l.bloom.BloomFilterBenchmark.add thrpt 10 9777191.526 ± 168208.916 ops/s
c.t.l.bloom.BloomFilterBenchmark.contains thrpt 10 10724196.205 ± 20411.741 ops/s
```

### Skip-List
Expand All @@ -132,9 +138,11 @@ c.t.l.bloom.BloomFilterBenchmark.contains thrpt 10 10724196.205 ± 2041
- Add/Remove: add and remove keys from a 100k keys skip-list.

```
Benchmark Mode Cnt Score Error Units
c.t.l.memtable.SkipListBenchmark.addRemove thrpt 10 684885.546 ± 21793.787 ops/s
c.t.l.memtable.SkipListBenchmark.get thrpt 10 823423.128 ± 83028.354 ops/s
Benchmark Mode Cnt Score Error Units
c.t.l.memtable.SkipListBenchmark.addRemove thrpt 10 684885.546 ± 21793.787 ops/s
c.t.l.memtable.SkipListBenchmark.get thrpt 10 823423.128 ± 83028.354 ops/s
```

---
Expand All @@ -149,14 +157,16 @@ c.t.l.memtable.SkipListBenchmark.get thrpt 10 823423.128 ± 8302
- [x] Bloom filter
- [x] Indexes persistence
- [x] File initialization
- [ ] Handle tombstones
- [ ] Skip-List
- [x] Operations
- [x] Iterator
- [ ] Tree
- [ ] Operations
- [x] Operations
- [ ] Background flush
- [ ] Background compaction
- [ ] Benchmarks
- [x] SSTable
- [x] Bloom filter
- [x] Skip-List
- [ ] Tree
- [ ] Tree
17 changes: 15 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
plugins {
id("java")
id "java"
id "me.champeau.jmh" version "0.7.1"
id "application"
}

group = "com.tomfran"
Expand All @@ -27,8 +28,20 @@ jmh {
fork = 1
warmupIterations = 5
iterations = 10
benchmarkMode = ['thrpt']
benchmarkMode = ['avgt']
jmhTimeout = '15s'
jmhVersion = '1.37'
resultFormat = 'JSON'
}

ext {
javaMainClass = "com.tomfran.lsm.Main"
}

application {
mainClassName = javaMainClass
}

run {
standardInput = System.in
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import static com.tomfran.lsm.TestUtils.getRandomByteArray;

@OutputTimeUnit(TimeUnit.SECONDS)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Benchmark)
public class BloomFilterBenchmark {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import static com.tomfran.lsm.TestUtils.getRandomPair;

@OutputTimeUnit(TimeUnit.SECONDS)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Benchmark)
public class SkipListBenchmark {

Expand Down
4 changes: 2 additions & 2 deletions src/jmh/java/com/tomfran/lsm/sstable/SSTableBenchmark.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import static com.tomfran.lsm.TestUtils.getRandomPair;

@OutputTimeUnit(TimeUnit.SECONDS)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Benchmark)
public class SSTableBenchmark {

Expand Down Expand Up @@ -59,7 +59,7 @@ public void setup() throws IOException {
skipped.add(e);
}

sstable = new SSTable(DIR + "/sst", inserted, SAMPLE_SIZE, inserted.size());
sstable = new SSTable(DIR + "/sst", inserted, SAMPLE_SIZE);

// shuffle to avoid sequential access
Collections.shuffle(inserted);
Expand Down
78 changes: 78 additions & 0 deletions src/jmh/java/com/tomfran/lsm/tree/LSMTreeBenchmark.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package com.tomfran.lsm.tree;

import com.tomfran.lsm.types.ByteArrayPair;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.concurrent.TimeUnit;

import static com.tomfran.lsm.TestUtils.getRandomPair;

@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Benchmark)
public class LSMTreeBenchmark {

static final Path DIR = Path.of("tree_benchmark");
static final int NUM_ITEMS = 300000;

static ByteArrayPair[] items;
static int index = 0;

LSMTree tree;

@Setup
public void setup() throws IOException {
// setup directory
if (Files.exists(DIR))
deleteDir();

// generate random items
items = new ByteArrayPair[NUM_ITEMS];
for (int i = 0; i < NUM_ITEMS; i++)
items[i] = getRandomPair();

// setup tree
tree = new LSMTree(1 << 15, DIR.toString());
}

@TearDown
public void teardown() throws IOException {
tree.stop();
deleteDir();
}

private void deleteDir() throws IOException {
try (var files = Files.list(DIR)) {
files.forEach(f -> {
try {
Files.delete(f);
} catch (IOException e) {
e.printStackTrace();
}
});
}
Files.delete(DIR);
}

@Benchmark
public void add() {
var item = items[index];
tree.add(item);

index = (index + 1) % NUM_ITEMS;
}

@Benchmark
public void get(Blackhole bh) {
var item = items[index];
var value = tree.get(item.key());

bh.consume(value);

index = (index + 1) % NUM_ITEMS;
}

}
85 changes: 85 additions & 0 deletions src/main/java/com/tomfran/lsm/Main.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package com.tomfran.lsm;

import com.tomfran.lsm.tree.LSMTree;
import com.tomfran.lsm.types.ByteArrayPair;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Scanner;

public class Main {

static final String DIRECTORY = "LSM-data";

public static void main(String[] args) throws IOException {

if (new File(DIRECTORY).exists())
deleteDir();

LSMTree tree = new LSMTree(3, DIRECTORY);

Scanner scanner = new Scanner(System.in);
scanner.useDelimiter("\n");

System.out.println(
"""
LSM Tree console
Commands:
- ins <key> <value> : insert a key-value pair
- get <key> : get a value for a key
- del <key> : delete a key-value pair
- exit : exit the application
"""
);

boolean exit = false;

while (!exit) {
System.out.print("Enter a command: ");
String command = scanner.nextLine();

var parts = command.split(" ");

switch (parts[0]) {
case "exit" -> {
System.out.println("Exiting...");
exit = true;
}
case "ins" -> tree.add(new ByteArrayPair(parts[1].getBytes(), parts[2].getBytes()));
case "del" -> tree.delete(parts[1].getBytes());
case "get" -> {
String key = parts[1];
byte[] value = tree.get(key.getBytes());

var msg = (value == null || value.length == 0) ? "No value found for key " + key :
"Value for key " + key + " is " + new String(value);
System.out.println(msg);
}
default -> System.out.println("Unknown command: " + command);
}
System.out.println();
}
tree.stop();
scanner.close();

deleteDir();
}

static private void deleteDir() throws IOException {
try (var files = Files.list(Path.of(DIRECTORY))) {
files.forEach(f -> {
try {
Files.delete(f);
} catch (IOException e) {
e.printStackTrace();
}
});
}
Files.delete(Path.of(DIRECTORY));
}

}
11 changes: 11 additions & 0 deletions src/main/java/com/tomfran/lsm/bloom/BloomFilter.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,20 @@
*/
public class BloomFilter {

static final int DEFAULT_SIZE = 1 << 20;

final int size;
final int hashCount;
final long[] bits;

/**
* Create a new Bloom filter with the default size and a false positive rate of 0.1%.
*/
public BloomFilter() {
this(DEFAULT_SIZE, 0.001);
}


/**
* Create a new Bloom filter with the given expected insertions and a false positive rate of 0.1%.
*
Expand All @@ -35,6 +45,7 @@ public BloomFilter(int expectedInsertions) {
this(expectedInsertions, 0.001);
}


/**
* Create a new Bloom filter with the given expected insertions and false positive rate.
*
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/tomfran/lsm/io/BaseInputStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public long readVByteLong() {

shift += 7;
}
return result;
return result - 1;
}

public long readLong() {
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/com/tomfran/lsm/io/BaseOutputStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ byte[] intToVByte(int n) {
}

private byte[] longToVByte(long n) {
n++;

if (n <= 0) {
throw new IllegalArgumentException("n must be greater than 0");
}
Expand Down
17 changes: 11 additions & 6 deletions src/main/java/com/tomfran/lsm/memtable/Memtable.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import com.tomfran.lsm.sstable.SSTable;
import com.tomfran.lsm.types.ByteArrayPair;

public class Memtable {

static final int DEFAULT_SSTABLE_SAMPLE_SIZE = 1 << 10;
import java.util.Iterator;

public class Memtable implements Iterable<ByteArrayPair> {

SkipList list;

public Memtable() {
Expand All @@ -26,15 +26,20 @@ public byte[] get(byte[] key) {
}

public void remove(byte[] key) {
list.add(new ByteArrayPair(key, null));
list.add(new ByteArrayPair(key, new byte[]{}));
}

public int size() {
return list.size();
}

public SSTable flush(String filename) {
return new SSTable(filename, list, DEFAULT_SSTABLE_SAMPLE_SIZE, list.size());
public SSTable flush(String filename, int sampleSize) {
return new SSTable(filename, list, sampleSize);
}

@Override
public Iterator<ByteArrayPair> iterator() {
return list.iterator();
}

}
Loading

0 comments on commit 4b37d33

Please sign in to comment.