Skip to content

Commit

Permalink
Cut over stored fields to ZSTD for compression.
Browse files Browse the repository at this point in the history
This cuts over stored fields with `index.codec: best_speed` (default) to ZSTD
with level 0 and blocks of at most 128 documents or 16kB, and `index.codec:
best_compression` to ZSTD with level 9 and blocks of at most 4,096 documents or
512kB.

Compared with the current codecs, this would yield ~ equal indexing speed, much
better space efficiency and somewhat slower retrieval speed. This is deemed
acceptable as we are currently quite conservative when it comes to trading
retrieval speed for space efficiency.

The Lucene codec infrastructure records the codec on a per-segment basis and
ensures that this change is backward-compatible. Segments will get
progressively migrated to ZSTD as they get merged in the background.

Bindings for ZSTD are provided by JNA, which we are already relying on to have
access to the standard library.

This change is not complete yet. TODO:
 - Ship ZSTD as part of Elasticsearch instead of relying on it being installed
   on the system.
 - Figure out SecurityManager permissions.
  • Loading branch information
jpountz committed Dec 13, 2023
1 parent ad9a054 commit 43d5f5e
Show file tree
Hide file tree
Showing 13 changed files with 525 additions and 47 deletions.
1 change: 1 addition & 0 deletions server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@
exports org.elasticsearch.index.codec;
exports org.elasticsearch.index.codec.tsdb;
exports org.elasticsearch.index.codec.bloomfilter;
exports org.elasticsearch.index.codec.zstd;
exports org.elasticsearch.index.engine;
exports org.elasticsearch.index.fielddata;
exports org.elasticsearch.index.fielddata.fieldcomparator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
package org.elasticsearch.index.codec;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.codec.zstd.Zstd813StoredFieldsFormat;
import org.elasticsearch.index.mapper.MapperService;

import java.util.HashMap;
Expand All @@ -35,11 +35,14 @@ public class CodecService {
public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays) {
final var codecs = new HashMap<String, Codec>();
if (mapperService == null) {
codecs.put(DEFAULT_CODEC, new Lucene99Codec());
codecs.put(BEST_COMPRESSION_CODEC, new Lucene99Codec(Lucene99Codec.Mode.BEST_COMPRESSION));
codecs.put(DEFAULT_CODEC, new Elasticsearch813Codec(Zstd813StoredFieldsFormat.Mode.BEST_SPEED));
codecs.put(BEST_COMPRESSION_CODEC, new Elasticsearch813Codec(Zstd813StoredFieldsFormat.Mode.BEST_COMPRESSION));
} else {
codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Lucene99Codec.Mode.BEST_SPEED, mapperService, bigArrays));
codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMapperCodec(Lucene99Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays));
codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Zstd813StoredFieldsFormat.Mode.BEST_SPEED, mapperService, bigArrays));
codecs.put(
BEST_COMPRESSION_CODEC,
new PerFieldMapperCodec(Zstd813StoredFieldsFormat.Mode.BEST_COMPRESSION, mapperService, bigArrays)
);
}
codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault());
for (String codec : Codec.availableCodecs()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.index.codec;

import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.elasticsearch.index.codec.zstd.Zstd813StoredFieldsFormat;

/**
* Elasticsearch codec as of 8.13. This extends the Lucene 9.9 codec to compressed stored fields with ZSTD instead of LZ4/DEFLATE. See
* {@link Zstd813StoredFieldsFormat}.
*/
public class Elasticsearch813Codec extends FilterCodec {

private final StoredFieldsFormat storedFieldsFormat;

private final PostingsFormat defaultPostingsFormat;
private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return Elasticsearch813Codec.this.getPostingsFormatForField(field);
}
};

private final DocValuesFormat defaultDVFormat;
private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return Elasticsearch813Codec.this.getDocValuesFormatForField(field);
}
};

private final KnnVectorsFormat defaultKnnVectorsFormat;
private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return Elasticsearch813Codec.this.getKnnVectorsFormatForField(field);
}
};

/** Public no-arg constructor, needed for SPI loading at read-time. */
public Elasticsearch813Codec() {
this(Zstd813StoredFieldsFormat.Mode.BEST_SPEED);
}

/**
* Constructor. Takes a {@link Zstd813StoredFieldsFormat.Mode} that describes whether to optimize for retrieval speed at the expense of worse space-efficiency or vice-versa.
*/
public Elasticsearch813Codec(Zstd813StoredFieldsFormat.Mode mode) {
super("Elasticsearch813", new Lucene99Codec());
this.storedFieldsFormat = new Zstd813StoredFieldsFormat(mode);
this.defaultPostingsFormat = new Lucene99PostingsFormat();
this.defaultDVFormat = new Lucene90DocValuesFormat();
this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
}

@Override
public StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
}

@Override
public final PostingsFormat postingsFormat() {
return postingsFormat;
}

@Override
public final DocValuesFormat docValuesFormat() {
return docValuesFormat;
}

@Override
public final KnnVectorsFormat knnVectorsFormat() {
return knnVectorsFormat;
}

/**
* Returns the postings format that should be used for writing new segments of <code>field</code>.
*
* <p>The default implementation always returns "Lucene99".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation,
*/
public PostingsFormat getPostingsFormatForField(String field) {
return defaultPostingsFormat;
}

/**
* Returns the docvalues format that should be used for writing new segments of <code>field</code>
* .
*
* <p>The default implementation always returns "Lucene99".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation.
*/
public DocValuesFormat getDocValuesFormatForField(String field) {
return defaultDVFormat;
}

/**
* Returns the vectors format that should be used for writing new segments of <code>field</code>
*
* <p>The default implementation always returns "Lucene95".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation.
*/
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return defaultKnnVectorsFormat;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
import org.elasticsearch.index.codec.zstd.Zstd813StoredFieldsFormat;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper;
Expand All @@ -37,20 +37,19 @@
* per index in real time via the mapping API. If no specific postings format or vector format is
* configured for a specific field the default postings or vector format is used.
*/
public final class PerFieldMapperCodec extends Lucene99Codec {
public final class PerFieldMapperCodec extends Elasticsearch813Codec {

private final MapperService mapperService;
private final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
private final ES87BloomFilterPostingsFormat bloomFilterPostingsFormat;
private final ES87TSDBDocValuesFormat tsdbDocValuesFormat;

static {
assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMapperCodec.class)
: "PerFieldMapperCodec must subclass the latest lucene codec: " + Lucene.LATEST_CODEC;
}

public PerFieldMapperCodec(Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
public PerFieldMapperCodec(Zstd813StoredFieldsFormat.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
super(compressionMode);
// If the below assertion fails, it is a sign that Lucene released a new codec. You must create a copy of the current Elasticsearch
// codec that delegates to this new Lucene codec, and make PerFieldMapperCodec extend this new Elasticsearch codec.
assert Codec.forName(Lucene.LATEST_CODEC).getClass() == delegate.getClass()
: "PerFieldMapperCodec must be on the latest lucene codec: " + Lucene.LATEST_CODEC;
this.mapperService = mapperService;
this.bloomFilterPostingsFormat = new ES87BloomFilterPostingsFormat(bigArrays, this::internalGetPostingsFormatForField);
this.tsdbDocValuesFormat = new ES87TSDBDocValuesFormat();
Expand Down
86 changes: 86 additions & 0 deletions server/src/main/java/org/elasticsearch/index/codec/zstd/Zstd.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.index.codec.zstd;

import com.sun.jna.Library;
import com.sun.jna.Native;

import java.nio.ByteBuffer;

/** JNA bindings for ZSTD. */
final class Zstd {

// TODO: Move this under libs/ and make it public so that we can progressively replace all our usage of DEFLATE with ZSTD?

private static final ZstdLibrary LIBRARY;

static {
LIBRARY = Native.load("zstd", ZstdLibrary.class);
}

private interface ZstdLibrary extends Library {

long ZSTD_compressBound(int scrLen);

long ZSTD_compress(ByteBuffer dst, int dstLen, ByteBuffer src, int srcLen, int compressionLevel);

boolean ZSTD_isError(long code);

String ZSTD_getErrorName(long code);

long ZSTD_decompress(ByteBuffer dst, int dstLen, ByteBuffer src, int srcLen);

}

/**
* Compress the content of {@code src} into {@code dst} at compression level {@code level}, and return the number of compressed bytes.
* {@link ByteBuffer#position()} and {@link ByteBuffer#limit()} of both {@link ByteBuffer}s are left unmodified.
*/
public static int compress(ByteBuffer dst, ByteBuffer src, int level) {
long ret = LIBRARY.ZSTD_compress(dst, dst.remaining(), src, src.remaining(), level);
if (LIBRARY.ZSTD_isError(ret)) {
throw new IllegalArgumentException(LIBRARY.ZSTD_getErrorName(ret));
} else if (ret < 0 || ret > Integer.MAX_VALUE) {
throw new IllegalStateException("Integer overflow? ret=" + ret);
}
return (int) ret;
}

/**
* Compress the content of {@code src} into {@code dst}, and return the number of decompressed bytes. {@link ByteBuffer#position()} and
* {@link ByteBuffer#limit()} of both {@link ByteBuffer}s are left unmodified.
*/
public static int decompress(ByteBuffer dst, ByteBuffer src) {
long ret = LIBRARY.ZSTD_decompress(dst, dst.remaining(), src, src.remaining());
if (LIBRARY.ZSTD_isError(ret)) {
throw new IllegalArgumentException(LIBRARY.ZSTD_getErrorName(ret));
} else if (ret < 0 || ret > Integer.MAX_VALUE) {
throw new IllegalStateException("Integer overflow? ret=" + ret);
}
return (int) ret;
}

/**
* Return the maximum number of compressed bytes given an input length.
*/
public static int getMaxCompressedLen(int srcLen) {
long ret = LIBRARY.ZSTD_compressBound(srcLen);
if (LIBRARY.ZSTD_isError(ret)) {
throw new IllegalArgumentException(LIBRARY.ZSTD_getErrorName(ret));
} else if (ret < 0 || ret > Integer.MAX_VALUE) {
throw new IllegalArgumentException(
srcLen
+ " bytes may require up to "
+ Long.toUnsignedString(ret)
+ " bytes, which overflows the maximum capacity of a ByteBuffer"
);
}
return (int) ret;
}
}
Loading

0 comments on commit 43d5f5e

Please sign in to comment.