Skip to content

Commit

Permalink
Compress postings blocks with PFOR-delta.
Browse files Browse the repository at this point in the history
  • Loading branch information
jpountz committed Jan 23, 2024
1 parent 65e6691 commit 75474ba
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ private class PostingsWriter {

private final long[] docBuffer = new long[POSTINGS_BLOCK_SIZE];
private final long[] freqBuffer = new long[POSTINGS_BLOCK_SIZE];
private final PForUtil pforUtil = new PForUtil(new ForUtil());

PostingsWriter(boolean hasFreqs, boolean hasPositions, boolean hasOffsets) {
this.hasFreqs = hasFreqs;
Expand Down Expand Up @@ -226,20 +227,22 @@ void write(PostingsEnum pe, ByteBuffersDataOutput index) throws IOException {
if (++docBufferSize == POSTINGS_BLOCK_SIZE) {
// Write the last doc in the block first, which we can use as skip data, to know whether or not to decompress the block
index.writeVInt(doc - lastDocInPrevBlock - POSTINGS_BLOCK_SIZE);
lastDocInPrevBlock = doc;
if (hasPositions) {
index.writeVLong(prox.getFilePointer() - lastProxOffset);
lastProxOffset = prox.getFilePointer();
}
for (int i = 0; i < POSTINGS_BLOCK_SIZE - 1; ++i) {
index.writeInt((int) docBuffer[i]);
// Delta-code postings
for (int i = ForUtil.BLOCK_SIZE - 1; i > 0; --i) {
docBuffer[i] -= docBuffer[i - 1];
}
docBuffer[0] -= lastDocInPrevBlock;
pforUtil.encode(docBuffer, index);
if (hasFreqs) {
for (int i = 0; i < POSTINGS_BLOCK_SIZE; ++i) {
index.writeInt((int) freqBuffer[i]);
}
pforUtil.encode(freqBuffer, index);
index.writeVLong(freqBuffer[ForUtil.BLOCK_SIZE]);
}
docBufferSize = 0;
lastDocInPrevBlock = doc;
}
}
// Tail postings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,7 @@ private static class InlinePostingsEnum extends PostingsEnum {
private int docBufferIndex;
private int skipDoc; // last doc in the current block
private long nextBlockProxOffset; // start offset of proximity data for the next block
private final PForUtil pforUtil = new PForUtil(new ForUtil());

InlinePostingsEnum(ES814InlineFieldsProducer producer, int flags, IndexInput index, IndexInput prox) {
this.producer = Objects.requireNonNull(producer);
Expand Down Expand Up @@ -480,27 +481,26 @@ private void refillDocs(int target) throws IOException {
final int remaining = docFreq - docIndex;
if (remaining >= POSTINGS_BLOCK_SIZE) {
// Full block
final int lastDocInPrevBlock = skipDoc;
skipDoc += index.readVInt() + POSTINGS_BLOCK_SIZE;
if (options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
nextBlockProxOffset += index.readVLong();
}
if (skipDoc >= target) {
docBuffer[POSTINGS_BLOCK_SIZE - 1] = skipDoc;
for (int i = 0; i < POSTINGS_BLOCK_SIZE - 1; ++i) {
docBuffer[i] = index.readInt();
}
pforUtil.decodeAndPrefixSum(index, lastDocInPrevBlock, docBuffer);
docBuffer[ForUtil.BLOCK_SIZE] = skipDoc;
if (options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
for (int i = 0; i < POSTINGS_BLOCK_SIZE; ++i) {
freqBuffer[i] = index.readInt();
}
pforUtil.decode(index, freqBuffer);
freqBuffer[ForUtil.BLOCK_SIZE] = index.readVLong();
} else {
Arrays.fill(freqBuffer, -1L);
Arrays.fill(freqBuffer, 1L);
}
} else {
// Skip block
index.skipBytes((POSTINGS_BLOCK_SIZE - 1) * Integer.BYTES);
pforUtil.skip(index);
if (options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
index.skipBytes(POSTINGS_BLOCK_SIZE * Integer.BYTES);
pforUtil.skip(index);
index.readVLong();
}
}
} else {
Expand Down

0 comments on commit 75474ba

Please sign in to comment.