diff --git a/README.md b/README.md index fbbc906..3cdbdf7 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,10 @@ The steps are: 1. Use the Bloom filter to test whether the key might be in the table; 2. If the key might be present, use binary search on the index to find the maximum lower bound of the key; 3. Scan the data from the position found in the previous step to find the key-value pair. The search - can stop when the we are seeing a key greater than the one we are looking for. + can stop when we are seeing a key greater than the one we are looking for, or when we reach the end of the table. + +The search is as lazy as possible, meaning that we read the minimum amount of data from disk, +for instance, if the next key length is smaller than the one we are looking for, we can skip the whole key-value pair. ### Persistence @@ -52,6 +55,7 @@ A table is persisted to disk when it is created. A base filename is defined, and - `n`: number of entries in the index; - `o_1, o_2 - o_1, ..., o_n - o_n-1`: offsets of the key-value pairs in the data file, skipping the first one; +- `s_1, s_2, ..., s_n`: remaining keys after a sparse index entry, used to exit from search; - ``: keys in the index. **Filter format** diff --git a/src/jmh/java/com/tomfran/lsm/memtable/SkipListBenchmark.java b/src/jmh/java/com/tomfran/lsm/memtable/SkipListBenchmark.java index ef1a475..febbe2d 100644 --- a/src/jmh/java/com/tomfran/lsm/memtable/SkipListBenchmark.java +++ b/src/jmh/java/com/tomfran/lsm/memtable/SkipListBenchmark.java @@ -1,20 +1,20 @@ package com.tomfran.lsm.memtable; -import com.tomfran.lsm.types.Item; +import com.tomfran.lsm.types.ByteArrayPair; import it.unimi.dsi.fastutil.objects.ObjectArrayList; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.infra.Blackhole; import java.util.concurrent.TimeUnit; -import static com.tomfran.lsm.TestUtils.getRandomItem; +import static com.tomfran.lsm.TestUtils.getRandomPair; @OutputTimeUnit(TimeUnit.SECONDS) @State(Scope.Benchmark) public class SkipListBenchmark { SkipList l; - Item[] items; + ByteArrayPair[] items; int NUM_ITEMS = 200000; int index = 0; @@ -27,16 +27,16 @@ public void setup() { l = new SkipList(NUM_ITEMS / 2); // generate random items and insert half - ObjectArrayList tmp = new ObjectArrayList<>(); + ObjectArrayList tmp = new ObjectArrayList<>(); for (int i = 0; i < NUM_ITEMS; i++) { - var it = getRandomItem(); + var it = getRandomPair(); if (i < NUM_ITEMS / 2) l.add(it); tmp.add(it); } - items = tmp.toArray(new Item[0]); + items = tmp.toArray(new ByteArrayPair[0]); // generate sequence of add/remove operations addRemove = new boolean[NUM_ITEMS]; diff --git a/src/jmh/java/com/tomfran/lsm/sstable/SSTableBenchmark.java b/src/jmh/java/com/tomfran/lsm/sstable/SSTableBenchmark.java index 794c421..007cd06 100644 --- a/src/jmh/java/com/tomfran/lsm/sstable/SSTableBenchmark.java +++ b/src/jmh/java/com/tomfran/lsm/sstable/SSTableBenchmark.java @@ -1,7 +1,7 @@ package com.tomfran.lsm.sstable; import com.tomfran.lsm.comparator.ByteArrayComparator; -import com.tomfran.lsm.types.Item; +import com.tomfran.lsm.types.ByteArrayPair; import it.unimi.dsi.fastutil.objects.ObjectArrayList; import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet; import org.openjdk.jmh.annotations.*; @@ -13,7 +13,7 @@ import java.util.Collections; import java.util.concurrent.TimeUnit; -import static com.tomfran.lsm.TestUtils.getRandomItem; +import static com.tomfran.lsm.TestUtils.getRandomPair; @OutputTimeUnit(TimeUnit.SECONDS) @State(Scope.Benchmark) @@ -23,8 +23,8 @@ public class SSTableBenchmark { static final int NUM_ITEMS = 100000; static final int SAMPLE_SIZE = NUM_ITEMS / 1000; - static Item[] insertedArray; - static Item[] skippedArray; + static ByteArrayPair[] insertedArray; + static ByteArrayPair[] skippedArray; static SSTable sstable; static int index = 0; @@ -38,9 +38,9 @@ public void setup() throws IOException { Files.createDirectory(DIR); // generate random items - var l = new ObjectOpenHashSet(); + var l = new ObjectOpenHashSet(); for (int i = 0; i < NUM_ITEMS * 2; i++) { - l.add(getRandomItem()); + l.add(getRandomPair()); } // sort and divide into inserted and skipped @@ -48,8 +48,8 @@ public void setup() throws IOException { .sorted((a, b) -> ByteArrayComparator.compare(a.key(), b.key())) .toList(); - var inserted = new ObjectArrayList(); - var skipped = new ObjectArrayList(); + var inserted = new ObjectArrayList(); + var skipped = new ObjectArrayList(); for (int i = 0; i < items.size(); i++) { var e = items.get(i); @@ -64,8 +64,8 @@ public void setup() throws IOException { // shuffle to avoid sequential access Collections.shuffle(inserted); Collections.shuffle(skipped); - insertedArray = inserted.toArray(Item[]::new); - skippedArray = skipped.toArray(Item[]::new); + insertedArray = inserted.toArray(ByteArrayPair[]::new); + skippedArray = skipped.toArray(ByteArrayPair[]::new); } @TearDown diff --git a/src/main/java/com/tomfran/lsm/io/BaseInputStream.java b/src/main/java/com/tomfran/lsm/io/BaseInputStream.java index 7c75a74..753cea3 100644 --- a/src/main/java/com/tomfran/lsm/io/BaseInputStream.java +++ b/src/main/java/com/tomfran/lsm/io/BaseInputStream.java @@ -1,5 +1,6 @@ package com.tomfran.lsm.io; +import com.tomfran.lsm.types.ByteArrayPair; import it.unimi.dsi.fastutil.io.FastBufferedInputStream; import java.io.FileInputStream; @@ -67,6 +68,37 @@ public byte[] readNBytes(int n) { } } + public ByteArrayPair readBytePair() { + try { + int keyLength = readVByteInt(); + int valueLength = readVByteInt(); + + return new ByteArrayPair( + readNBytes(keyLength), + readNBytes(valueLength) + ); + } catch (Exception e) { + return null; + } + } + + public byte[] readByteArray() { + try { + int len = readVByteInt(); + return fis.readNBytes(len); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public long skip(int n) { + try { + return fis.skip(n); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + public void seek(long offset) { try { fis.position(offset); diff --git a/src/main/java/com/tomfran/lsm/io/BaseOutputStream.java b/src/main/java/com/tomfran/lsm/io/BaseOutputStream.java index e44962e..078dc9e 100644 --- a/src/main/java/com/tomfran/lsm/io/BaseOutputStream.java +++ b/src/main/java/com/tomfran/lsm/io/BaseOutputStream.java @@ -1,5 +1,6 @@ package com.tomfran.lsm.io; +import com.tomfran.lsm.types.ByteArrayPair; import it.unimi.dsi.fastutil.io.FastBufferedOutputStream; import java.io.FileOutputStream; @@ -39,6 +40,20 @@ public int writeLong(long n) { return write(longToBytes(n)); } + public int writeBytePair(ByteArrayPair pair) { + byte[] key = pair.key(), value = pair.value(); + byte[] keyBytes = intToVByte(key.length), valueBytes = intToVByte(value.length); + + byte[] result = new byte[keyBytes.length + valueBytes.length + key.length + value.length]; + + System.arraycopy(keyBytes, 0, result, 0, keyBytes.length); + System.arraycopy(valueBytes, 0, result, keyBytes.length, valueBytes.length); + + System.arraycopy(key, 0, result, keyBytes.length + valueBytes.length, key.length); + System.arraycopy(value, 0, result, keyBytes.length + valueBytes.length + key.length, value.length); + return write(result); + } + byte[] intToVByte(int n) { return longToVByte(n); } diff --git a/src/main/java/com/tomfran/lsm/io/ItemsInputStream.java b/src/main/java/com/tomfran/lsm/io/ItemsInputStream.java deleted file mode 100644 index 0de87cf..0000000 --- a/src/main/java/com/tomfran/lsm/io/ItemsInputStream.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.tomfran.lsm.io; - -import com.tomfran.lsm.types.Item; - -public class ItemsInputStream extends BaseInputStream { - - public ItemsInputStream(String filename) { - super(filename); - } - - public Item readItem() { - try { - int keyLength = readVByteInt(); - int valueLength = readVByteInt(); - - return new Item( - readNBytes(keyLength), - readNBytes(valueLength) - ); - } catch (Exception e) { - return null; - } - } - -} diff --git a/src/main/java/com/tomfran/lsm/io/ItemsOutputStream.java b/src/main/java/com/tomfran/lsm/io/ItemsOutputStream.java deleted file mode 100644 index 858a589..0000000 --- a/src/main/java/com/tomfran/lsm/io/ItemsOutputStream.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.tomfran.lsm.io; - -import com.tomfran.lsm.types.Item; - -public class ItemsOutputStream extends BaseOutputStream { - - public ItemsOutputStream(String filename) { - super(filename); - } - - public int writeItem(Item item) { - return write(toBytes(item)); - } - - protected byte[] toBytes(Item i) { - byte[] key = i.key(), value = i.value(); - byte[] keyBytes = intToVByte(key.length), valueBytes = intToVByte(value.length); - - byte[] result = new byte[keyBytes.length + valueBytes.length + key.length + value.length]; - - System.arraycopy(keyBytes, 0, result, 0, keyBytes.length); - System.arraycopy(valueBytes, 0, result, keyBytes.length, valueBytes.length); - - System.arraycopy(key, 0, result, keyBytes.length + valueBytes.length, key.length); - System.arraycopy(value, 0, result, keyBytes.length + valueBytes.length + key.length, value.length); - - return result; - } -} diff --git a/src/main/java/com/tomfran/lsm/memtable/Memtable.java b/src/main/java/com/tomfran/lsm/memtable/Memtable.java index d75c693..3a28a0c 100644 --- a/src/main/java/com/tomfran/lsm/memtable/Memtable.java +++ b/src/main/java/com/tomfran/lsm/memtable/Memtable.java @@ -1,7 +1,7 @@ package com.tomfran.lsm.memtable; import com.tomfran.lsm.sstable.SSTable; -import com.tomfran.lsm.types.Item; +import com.tomfran.lsm.types.ByteArrayPair; public class Memtable { @@ -17,16 +17,16 @@ public Memtable(int numElements) { list = new SkipList(numElements); } - public void add(Item item) { + public void add(ByteArrayPair item) { list.add(item); } - public Item get(byte[] key) { + public ByteArrayPair get(byte[] key) { return list.get(key); } public void remove(byte[] key) { - list.add(new Item(key, null)); + list.add(new ByteArrayPair(key, null)); } public int size() { diff --git a/src/main/java/com/tomfran/lsm/memtable/SkipList.java b/src/main/java/com/tomfran/lsm/memtable/SkipList.java index 5195ad1..74ebe86 100644 --- a/src/main/java/com/tomfran/lsm/memtable/SkipList.java +++ b/src/main/java/com/tomfran/lsm/memtable/SkipList.java @@ -1,6 +1,6 @@ package com.tomfran.lsm.memtable; -import com.tomfran.lsm.types.Item; +import com.tomfran.lsm.types.ByteArrayPair; import it.unimi.dsi.util.XoRoShiRo128PlusRandom; import java.util.Iterator; @@ -12,7 +12,7 @@ /** * A skip list implementation of items. */ -public class SkipList implements Iterable { +public class SkipList implements Iterable { static final int DEFAULT_ELEMENTS = 1 << 16; @@ -49,7 +49,7 @@ public SkipList(int numElements) { * * @param item The item to add. */ - public void add(Item item) { + public void add(ByteArrayPair item) { Node current = sentinel; for (int i = levels - 1; i >= 0; i--) { while (current.next[i] != null && current.next[i].value.compareTo(item) < 0) @@ -83,7 +83,7 @@ private int randomLevel() { * @param key The key of the item to retrieve. * @return The item if found, null otherwise. */ - public Item get(byte[] key) { + public ByteArrayPair get(byte[] key) { Node current = sentinel; for (int i = levels - 1; i >= 0; i--) { while (current.next[i] != null && compare(current.next[i].value.key(), key) < 0) @@ -135,7 +135,7 @@ public int size() { * @return An iterator over the items in the skip list. */ @Override - public Iterator iterator() { + public Iterator iterator() { return new SkipListIterator(sentinel); } @@ -155,16 +155,16 @@ public String toString() { } private static final class Node { - Item value; + ByteArrayPair value; Node[] next; - Node(Item value, int numLevels) { + Node(ByteArrayPair value, int numLevels) { this.value = value; this.next = new Node[numLevels]; } } - private record SkipListIterator(Node node) implements Iterator { + private record SkipListIterator(Node node) implements Iterator { @Override public boolean hasNext() { @@ -172,7 +172,7 @@ public boolean hasNext() { } @Override - public Item next() { + public ByteArrayPair next() { return node.next[0].value; } } diff --git a/src/main/java/com/tomfran/lsm/sstable/SSTable.java b/src/main/java/com/tomfran/lsm/sstable/SSTable.java index b1ef4c4..51d2853 100644 --- a/src/main/java/com/tomfran/lsm/sstable/SSTable.java +++ b/src/main/java/com/tomfran/lsm/sstable/SSTable.java @@ -3,10 +3,9 @@ import com.tomfran.lsm.bloom.BloomFilter; import com.tomfran.lsm.io.BaseInputStream; import com.tomfran.lsm.io.BaseOutputStream; -import com.tomfran.lsm.io.ItemsInputStream; -import com.tomfran.lsm.io.ItemsOutputStream; -import com.tomfran.lsm.types.Item; +import com.tomfran.lsm.types.ByteArrayPair; import com.tomfran.lsm.utils.IteratorMerger; +import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.longs.LongArrayList; import it.unimi.dsi.fastutil.objects.ObjectArrayList; @@ -21,10 +20,11 @@ public class SSTable { public static final String INDEX_FILE_EXTENSION = ".index"; String filename; - ItemsInputStream is; + BaseInputStream is; int size; LongArrayList sparseOffsets; + IntArrayList sparseSizeCount; ObjectArrayList sparseKeys; BloomFilter bloomFilter; @@ -36,10 +36,10 @@ public class SSTable { * @param sampleSize The number of items to skip between sparse index entries. * @param numItems The number of items in the SSTable. */ - public SSTable(String filename, Iterable items, int sampleSize, int numItems) { + public SSTable(String filename, Iterable items, int sampleSize, int numItems) { this.filename = filename; writeItems(filename, items, sampleSize, numItems); - is = new ItemsInputStream(filename + DATA_FILE_EXTENSION); + is = new BaseInputStream(filename + DATA_FILE_EXTENSION); } /** @@ -63,7 +63,7 @@ public SSTable(String filename) { static SSTable merge(String filename, int sampleSize, SSTable... tables) { int newSize = 0; - Iterator[] iterators = new Iterator[tables.length]; + Iterator[] iterators = new Iterator[tables.length]; for (int i = 0; i < tables.length; i++) { iterators[i] = tables[i].iterator(); newSize += tables[i].size; @@ -76,10 +76,11 @@ static SSTable merge(String filename, int sampleSize, SSTable... tables) { private void initializeFromDisk(String filename) { // items file - is = new ItemsInputStream(filename + DATA_FILE_EXTENSION); + is = new BaseInputStream(filename + DATA_FILE_EXTENSION); // sparse index sparseOffsets = new LongArrayList(); + sparseSizeCount = new IntArrayList(); sparseKeys = new ObjectArrayList<>(); BaseInputStream indexIs = new BaseInputStream(filename + INDEX_FILE_EXTENSION); @@ -93,10 +94,16 @@ private void initializeFromDisk(String filename) { sparseOffsets.add(offsetsCumulative); } + int sizeCumulative = 0; + sparseSizeCount.add(sizeCumulative); + for (int i = 0; i < sparseSize; i++) { + sizeCumulative += indexIs.readVByteInt(); + sparseSizeCount.add(sizeCumulative); + } + for (int i = 0; i < sparseSize; i++) sparseKeys.add(indexIs.readNBytes(indexIs.readVByteInt())); - is.close(); // bloom filter @@ -109,22 +116,50 @@ private void initializeFromDisk(String filename) { * @param key The key of the item to read. * @return The item with the given key, or null if no such item exists. */ - public Item get(byte[] key) { + public byte[] get(byte[] key) { if (!bloomFilter.mightContain(key)) return null; - long offset = getCandidateOffset(key); + int offsetIndex = getCandidateOffsetIndex(key); + long offset = sparseOffsets.getLong(offsetIndex); + int remaining = size - sparseSizeCount.getInt(offsetIndex); is.seek(offset); - Item it; int cmp = 1; + int searchKeyLen = key.length, readKeyLen, readValueLen; + + byte[] readKey; + + while (cmp > 0 && remaining > 0) { + + remaining--; + readKeyLen = is.readVByteInt(); + + // gone too far + if (readKeyLen > searchKeyLen) { + return null; + } + + // gone too short + if (readKeyLen < searchKeyLen) { + readValueLen = is.readVByteInt(); + is.skip(readKeyLen + readValueLen); + continue; + } + + // read full key, compare, if equal read value + readValueLen = is.readVByteInt(); + readKey = is.readNBytes(readKeyLen); + cmp = compare(key, readKey); - while ((it = is.readItem()) != null && - it.key().length > 0 && - (cmp = compare(key, it.key())) > 0) { + if (cmp == 0) { + return is.readNBytes(readValueLen); + } else { + is.skip(readValueLen); + } } - return cmp == 0 ? it : null; + return null; } /** @@ -132,7 +167,7 @@ public Item get(byte[] key) { * * @return Table iterator */ - public Iterator iterator() { + public Iterator iterator() { is.seek(0); return new SSTableIterator(this); } @@ -144,7 +179,7 @@ public void close() { is.close(); } - private long getCandidateOffset(byte[] key) { + private int getCandidateOffsetIndex(byte[] key) { int low = 0; int high = sparseOffsets.size() - 1; @@ -157,29 +192,31 @@ private long getCandidateOffset(byte[] key) { else if (cmp > 0) low = mid; else - return sparseOffsets.getLong(mid); + return mid; } - return sparseOffsets.getLong(low); + return low; } - private void writeItems(String filename, Iterable items, int sampleSize, int numItems) { - ItemsOutputStream ios = new ItemsOutputStream(filename + DATA_FILE_EXTENSION); + private void writeItems(String filename, Iterable items, int sampleSize, int numItems) { + BaseOutputStream ios = new BaseOutputStream(filename + DATA_FILE_EXTENSION); sparseOffsets = new LongArrayList(); + sparseSizeCount = new IntArrayList(); sparseKeys = new ObjectArrayList<>(); bloomFilter = new BloomFilter(numItems); // write items and populate indexes int size = 0; long offset = 0L; - for (Item item : items) { + for (ByteArrayPair item : items) { if (size % sampleSize == 0) { sparseOffsets.add(offset); + sparseSizeCount.add(size); sparseKeys.add(item.key()); } bloomFilter.add(item.key()); - offset += ios.writeItem(item); + offset += ios.writeBytePair(item); size++; } ios.close(); @@ -191,13 +228,21 @@ private void writeItems(String filename, Iterable items, int sampleSize, i BaseOutputStream indexOs = new BaseOutputStream(filename + INDEX_FILE_EXTENSION); indexOs.writeVByteInt(size); - indexOs.writeVByteInt(sparseOffsets.size()); + + int sparseSize = sparseOffsets.size(); + indexOs.writeVByteInt(sparseSize); // skip first offset, always 0 - long prev = 0L; - for (int i = 1; i < sparseOffsets.size(); i++) { - indexOs.writeVByteLong(sparseOffsets.getLong(i) - prev); - prev = sparseOffsets.getLong(i); + long prevOffset = 0L; + for (int i = 1; i < sparseSize; i++) { + indexOs.writeVByteLong(sparseOffsets.getLong(i) - prevOffset); + prevOffset = sparseOffsets.getLong(i); + } + + int prevSize = 0; + for (int i = 1; i < sparseSize; i++) { + indexOs.writeVByteInt(sparseSizeCount.getInt(i) - prevSize); + prevSize = sparseSizeCount.getInt(i); } for (byte[] key : sparseKeys) { @@ -208,16 +253,25 @@ private void writeItems(String filename, Iterable items, int sampleSize, i indexOs.close(); } - private record SSTableIterator(SSTable table) implements Iterator { + private static class SSTableIterator implements Iterator { + + private final SSTable table; + int remaining; + + public SSTableIterator(SSTable table) { + this.table = table; + remaining = table.size; + } @Override public boolean hasNext() { - return table.is.hasNext(); + return remaining > 0; } @Override - public Item next() { - return table.is.readItem(); + public ByteArrayPair next() { + remaining--; + return table.is.readBytePair(); } } @@ -228,12 +282,12 @@ public Item next() { * keeping track of the last key we saw, and skipping over any keys that are * equal to the last key. */ - private static class SSTableMergerIterator extends IteratorMerger implements Iterable { + private static class SSTableMergerIterator extends IteratorMerger implements Iterable { - private Item last; + private ByteArrayPair last; @SafeVarargs - public SSTableMergerIterator(Iterator... iterators) { + public SSTableMergerIterator(Iterator... iterators) { super(iterators); last = super.next(); } @@ -244,19 +298,19 @@ public boolean hasNext() { } @Override - public Item next() { - Item next = super.next(); + public ByteArrayPair next() { + ByteArrayPair next = super.next(); while (next != null && last.compareTo(next) == 0) next = super.next(); - Item toReturn = last; + ByteArrayPair toReturn = last; last = next; return toReturn; } @Override - public Iterator iterator() { + public Iterator iterator() { return this; } } diff --git a/src/main/java/com/tomfran/lsm/tree/LSMTree.java b/src/main/java/com/tomfran/lsm/tree/LSMTree.java index 0f99735..297e843 100644 --- a/src/main/java/com/tomfran/lsm/tree/LSMTree.java +++ b/src/main/java/com/tomfran/lsm/tree/LSMTree.java @@ -2,7 +2,7 @@ import com.tomfran.lsm.memtable.Memtable; import com.tomfran.lsm.sstable.SSTable; -import com.tomfran.lsm.types.Item; +import com.tomfran.lsm.types.ByteArrayPair; import java.util.LinkedList; @@ -20,7 +20,7 @@ public LSMTree() { tables = new LinkedList<>(); } - public void add(Item item) { + public void add(ByteArrayPair item) { mutableMemtable.add(item); checkMemtableSize(); } @@ -37,8 +37,8 @@ private void checkMemtableSize() { } } - public Item get(byte[] key) { - Item result; + public ByteArrayPair get(byte[] key) { + ByteArrayPair result; if ((result = mutableMemtable.get(key)) != null) return result; @@ -47,9 +47,10 @@ public Item get(byte[] key) { if ((result = memtable.get(key)) != null) return result; + byte[] tmp; for (SSTable table : tables) - if ((result = table.get(key)) != null) - return result; + if ((tmp = table.get(key)) != null) + return new ByteArrayPair(key, tmp); return null; } diff --git a/src/main/java/com/tomfran/lsm/types/Item.java b/src/main/java/com/tomfran/lsm/types/ByteArrayPair.java similarity index 83% rename from src/main/java/com/tomfran/lsm/types/Item.java rename to src/main/java/com/tomfran/lsm/types/ByteArrayPair.java index 97c8fad..82e9203 100644 --- a/src/main/java/com/tomfran/lsm/types/Item.java +++ b/src/main/java/com/tomfran/lsm/types/ByteArrayPair.java @@ -4,7 +4,7 @@ import static com.tomfran.lsm.comparator.ByteArrayComparator.compare; -public record Item(byte[] key, byte[] value) implements Comparable { +public record ByteArrayPair(byte[] key, byte[] value) implements Comparable { @Override public int hashCode() { @@ -12,7 +12,7 @@ public int hashCode() { } @Override - public int compareTo(Item o) { + public int compareTo(ByteArrayPair o) { return compare(key, o.key); } diff --git a/src/test/java/com/tomfran/lsm/TestUtils.java b/src/test/java/com/tomfran/lsm/TestUtils.java index c9cf1cd..d97f2ba 100644 --- a/src/test/java/com/tomfran/lsm/TestUtils.java +++ b/src/test/java/com/tomfran/lsm/TestUtils.java @@ -1,6 +1,6 @@ package com.tomfran.lsm; -import com.tomfran.lsm.types.Item; +import com.tomfran.lsm.types.ByteArrayPair; import java.util.List; import java.util.Random; @@ -13,8 +13,8 @@ public class TestUtils { static final int MIN_BYTES_LENGTH = 10, MAX_BYTES_LENGTH = 500; static Random rn = new Random(); - public static Item getRandomItem() { - return new Item( + public static ByteArrayPair getRandomPair() { + return new ByteArrayPair( getRandomByteArray(), getRandomByteArray() ); @@ -38,7 +38,7 @@ public static List getRandomByteArrayList(int size) { .toList(); } - public static void assertItemEquals(Item a, Item b) { + public static void assertPairEqual(ByteArrayPair a, ByteArrayPair b) { assert compare(a.key(), b.key()) == 0; assert compare(a.value(), b.value()) == 0; } diff --git a/src/test/java/com/tomfran/lsm/io/BaseStreamsTest.java b/src/test/java/com/tomfran/lsm/io/BaseStreamsTest.java index 201dc24..be35e4d 100644 --- a/src/test/java/com/tomfran/lsm/io/BaseStreamsTest.java +++ b/src/test/java/com/tomfran/lsm/io/BaseStreamsTest.java @@ -1,5 +1,6 @@ package com.tomfran.lsm.io; +import com.tomfran.lsm.TestUtils; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -25,6 +26,9 @@ public void shouldReadWrite() { var longList = Stream.generate(rn::nextLong).map(Math::abs).limit(1000).toList(); longList.forEach(os::writeVByteLong); + var pairList = Stream.generate(TestUtils::getRandomPair).limit(1000).toList(); + pairList.forEach(os::writeBytePair); + os.close(); var is = new BaseInputStream(tempDirectory + "stream"); @@ -38,6 +42,12 @@ public void shouldReadWrite() { var read = is.readVByteLong(); assert read == i; }); + + pairList.forEach(i -> { + var read = is.readBytePair(); + assert read != null; + TestUtils.assertPairEqual(i, read); + }); } } diff --git a/src/test/java/com/tomfran/lsm/io/ItemsStreamsTest.java b/src/test/java/com/tomfran/lsm/io/ItemsStreamsTest.java deleted file mode 100644 index 537ebe6..0000000 --- a/src/test/java/com/tomfran/lsm/io/ItemsStreamsTest.java +++ /dev/null @@ -1,59 +0,0 @@ -package com.tomfran.lsm.io; - -import com.tomfran.lsm.types.Item; -import it.unimi.dsi.fastutil.objects.ObjectArrayList; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -import java.nio.file.Path; -import java.util.stream.IntStream; - -import static com.tomfran.lsm.TestUtils.assertItemEquals; -import static com.tomfran.lsm.TestUtils.getRandomItem; - -class ItemsStreamsTest { - - static final String FILENAME = "/items.data"; - @TempDir - static Path tempDirectory; - static ItemsInputStream in; - static ItemsOutputStream out; - - static ObjectArrayList items; - - @BeforeAll - public static void setup() { - out = new ItemsOutputStream(tempDirectory + "/items.data"); - - items = new ObjectArrayList<>(); - - IntStream.range(0, 1000) - .mapToObj(i -> getRandomItem()) - .forEach(e -> { - items.add(e); - out.writeItem(e); - }); - - out.close(); - - in = new ItemsInputStream(tempDirectory + FILENAME); - } - - @AfterAll - public static void teardown() { - in.close(); - out.close(); - } - - @Test - public void shouldReadItem() { - for (var item : items) { - var it = in.readItem(); - assert it != null; - assertItemEquals(item, it); - } - } - -} \ No newline at end of file diff --git a/src/test/java/com/tomfran/lsm/memtable/SkipListTest.java b/src/test/java/com/tomfran/lsm/memtable/SkipListTest.java index 62154fe..58ac74a 100644 --- a/src/test/java/com/tomfran/lsm/memtable/SkipListTest.java +++ b/src/test/java/com/tomfran/lsm/memtable/SkipListTest.java @@ -1,7 +1,7 @@ package com.tomfran.lsm.memtable; import com.tomfran.lsm.TestUtils; -import com.tomfran.lsm.types.Item; +import com.tomfran.lsm.types.ByteArrayPair; import it.unimi.dsi.fastutil.objects.ObjectArrayList; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Order; @@ -16,14 +16,14 @@ class SkipListTest { static SkipList l; - static ObjectArrayList items; + static ObjectArrayList items; @BeforeAll public static void setup() { l = new SkipList(100); items = new ObjectArrayList<>(); - Stream.generate(TestUtils::getRandomItem) + Stream.generate(TestUtils::getRandomPair) .limit(100) .forEach(items::add); @@ -33,7 +33,7 @@ public static void setup() { @Test @Order(1) public void shouldFind() { - for (Item item : items) { + for (ByteArrayPair item : items) { var found = l.get(item.key()); assert found != null; assert compare(found.key(), item.key()) == 0; @@ -44,7 +44,7 @@ public void shouldFind() { @Test @Order(2) public void shouldRemove() { - for (Item item : items.subList(0, 50)) { + for (ByteArrayPair item : items.subList(0, 50)) { l.remove(item.key()); } diff --git a/src/test/java/com/tomfran/lsm/sstable/SSTableMergeTest.java b/src/test/java/com/tomfran/lsm/sstable/SSTableMergeTest.java index 71e0a26..745ca0b 100644 --- a/src/test/java/com/tomfran/lsm/sstable/SSTableMergeTest.java +++ b/src/test/java/com/tomfran/lsm/sstable/SSTableMergeTest.java @@ -1,6 +1,7 @@ package com.tomfran.lsm.sstable; -import com.tomfran.lsm.types.Item; +import com.tomfran.lsm.comparator.ByteArrayComparator; +import com.tomfran.lsm.types.ByteArrayPair; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -10,7 +11,7 @@ import java.util.ArrayList; import java.util.List; -import static com.tomfran.lsm.TestUtils.assertItemEquals; +import static com.tomfran.lsm.TestUtils.assertPairEqual; public class SSTableMergeTest { @@ -18,7 +19,7 @@ public class SSTableMergeTest { @TempDir static Path tempDirectory; static SSTable merge, first, second; - static List firstItems, secondItems, expectedItems; + static List firstItems, secondItems, expectedItems; @BeforeAll public static void setup() { @@ -29,8 +30,8 @@ public static void setup() { // generate overlapping items int n = 10; - firstItems = generateItems(0, n, false); - secondItems = generateItems(n - n / 2, n * 2, true); + firstItems = generatePairList(0, n, false); + secondItems = generatePairList(n - n / 2, n * 2, true); // expected is all first items and all second items except the first n/2 overlapping ones expectedItems.addAll(firstItems); @@ -49,10 +50,10 @@ public static void teardown() { second.close(); } - private static List generateItems(int start, int end, boolean incr) { - ArrayList result = new ArrayList<>(); + private static List generatePairList(int start, int end, boolean incr) { + ArrayList result = new ArrayList<>(); for (int i = start; i < end; i++) - result.add(new Item(new byte[]{(byte) i}, new byte[]{(byte) (i + (incr ? 1 : 0))})); + result.add(new ByteArrayPair(new byte[]{(byte) i}, new byte[]{(byte) (i + (incr ? 1 : 0))})); return result; } @@ -60,9 +61,9 @@ private static List generateItems(int start, int end, boolean incr) { @Test public void shouldGetItems() { for (var item : expectedItems) { - var it = merge.get(item.key()); - assert it != null; - assertItemEquals(item, it); + var val = merge.get(item.key()); + assert val != null; + assert ByteArrayComparator.compare(item.value(), val) == 0; } } @@ -74,7 +75,7 @@ public void shouldGetItemsInOrder() { while (it.hasNext()) { var item = it.next(); var expected = expectedItems.get(i++); - assertItemEquals(expected, item); + assertPairEqual(expected, item); } assert i == expectedItems.size() : "expected " + expectedItems.size() + " items, got " + i; diff --git a/src/test/java/com/tomfran/lsm/sstable/SSTableReconstructTest.java b/src/test/java/com/tomfran/lsm/sstable/SSTableReconstructTest.java index f9c149e..5f9df10 100644 --- a/src/test/java/com/tomfran/lsm/sstable/SSTableReconstructTest.java +++ b/src/test/java/com/tomfran/lsm/sstable/SSTableReconstructTest.java @@ -1,6 +1,6 @@ package com.tomfran.lsm.sstable; -import com.tomfran.lsm.types.Item; +import com.tomfran.lsm.types.ByteArrayPair; import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -11,7 +11,7 @@ import java.nio.file.Path; import java.util.List; -import static com.tomfran.lsm.TestUtils.getRandomItem; +import static com.tomfran.lsm.TestUtils.getRandomPair; import static com.tomfran.lsm.comparator.ByteArrayComparator.compare; import static com.tomfran.lsm.sstable.SSTable.*; @@ -25,9 +25,9 @@ public class SSTableReconstructTest { @BeforeAll static void setup() throws IOException { - var l = new ObjectOpenHashSet(); + var l = new ObjectOpenHashSet(); for (int i = 0; i < 10; i++) { - l.add(getRandomItem()); + l.add(getRandomPair()); } var items = l.stream() diff --git a/src/test/java/com/tomfran/lsm/sstable/SSTableTest.java b/src/test/java/com/tomfran/lsm/sstable/SSTableTest.java index 5d039fa..539d421 100644 --- a/src/test/java/com/tomfran/lsm/sstable/SSTableTest.java +++ b/src/test/java/com/tomfran/lsm/sstable/SSTableTest.java @@ -1,7 +1,7 @@ package com.tomfran.lsm.sstable; import com.tomfran.lsm.comparator.ByteArrayComparator; -import com.tomfran.lsm.types.Item; +import com.tomfran.lsm.types.ByteArrayPair; import it.unimi.dsi.fastutil.objects.ObjectArrayList; import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet; import org.junit.jupiter.api.AfterAll; @@ -11,28 +11,28 @@ import java.nio.file.Path; -import static com.tomfran.lsm.TestUtils.getRandomItem; +import static com.tomfran.lsm.TestUtils.getRandomPair; import static com.tomfran.lsm.comparator.ByteArrayComparator.compare; class SSTableTest { static final String TEST_FILE = "/sstable"; - static final int NUM_ITEMS = 100; + static final int NUM_ITEMS = 10; static final int SAMPLE_SIZE = NUM_ITEMS / 3; @TempDir static Path tempDirectory; static SSTable t; - static ObjectArrayList inserted; - static ObjectArrayList skipped; + static ObjectArrayList inserted; + static ObjectArrayList skipped; @BeforeAll public static void setup() { // generate random items - var l = new ObjectOpenHashSet(); + var l = new ObjectOpenHashSet(); for (int i = 0; i < NUM_ITEMS * 2; i++) { - l.add(getRandomItem()); + l.add(getRandomPair()); } // sort and divide into inserted and skipped @@ -62,10 +62,9 @@ public static void teardown() { @Test public void shouldFindItems() { for (var item : inserted) { - var it = t.get(item.key()); - assert it != null; - assert compare(item.key(), it.key()) == 0; - assert compare(item.value(), it.value()) == 0; + var val = t.get(item.key()); + assert val != null; + assert compare(item.value(), val) == 0; } }