Cut over stored fields to ZSTD for compression.

This cuts over stored fields with `index.codec: best_speed` (default) to ZSTD with level 0 and blocks of at most 128 documents or 16kB, and `index.codec: best_compression` to ZSTD with level 9 and blocks of at most 4,096 documents or 512kB. Compared with the current codecs, this would yield ~ equal indexing speed, much better space efficiency and somewhat slower retrieval speed. This is deemed acceptable as we are currently quite conservative when it comes to trading retrieval speed for space efficiency. The Lucene codec infrastructure records the codec on a per-segment basis and ensures that this change is backward-compatible. Segments will get progressively migrated to ZSTD as they get merged in the background. Bindings for ZSTD are provided by JNA, which we are already relying on to have access to the standard library. This change is not complete yet. TODO: - Ship ZSTD as part of Elasticsearch instead of relying on it being installed on the system. - Figure out SecurityManager permissions.
jpountz · Dec 13, 2023 · 43d5f5e · 43d5f5e
1 parent ad9a054
commit 43d5f5e
Show file tree

Hide file tree

Showing 13 changed files with 525 additions and 47 deletions.
diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java
@@ -241,6 +241,7 @@
     exports org.elasticsearch.index.codec;
     exports org.elasticsearch.index.codec.tsdb;
     exports org.elasticsearch.index.codec.bloomfilter;
+    exports org.elasticsearch.index.codec.zstd;
     exports org.elasticsearch.index.engine;
     exports org.elasticsearch.index.fielddata;
     exports org.elasticsearch.index.fielddata.fieldcomparator;

diff --git a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java
@@ -9,9 +9,9 @@
 package org.elasticsearch.index.codec;
 
 import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.lucene99.Lucene99Codec;
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.core.Nullable;
+import org.elasticsearch.index.codec.zstd.Zstd813StoredFieldsFormat;
 import org.elasticsearch.index.mapper.MapperService;
 
 import java.util.HashMap;
@@ -35,11 +35,14 @@ public class CodecService {
     public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays) {
         final var codecs = new HashMap<String, Codec>();
         if (mapperService == null) {
-            codecs.put(DEFAULT_CODEC, new Lucene99Codec());
-            codecs.put(BEST_COMPRESSION_CODEC, new Lucene99Codec(Lucene99Codec.Mode.BEST_COMPRESSION));
+            codecs.put(DEFAULT_CODEC, new Elasticsearch813Codec(Zstd813StoredFieldsFormat.Mode.BEST_SPEED));
+            codecs.put(BEST_COMPRESSION_CODEC, new Elasticsearch813Codec(Zstd813StoredFieldsFormat.Mode.BEST_COMPRESSION));
         } else {
-            codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Lucene99Codec.Mode.BEST_SPEED, mapperService, bigArrays));
-            codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMapperCodec(Lucene99Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays));
+            codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Zstd813StoredFieldsFormat.Mode.BEST_SPEED, mapperService, bigArrays));
+            codecs.put(
+                BEST_COMPRESSION_CODEC,
+                new PerFieldMapperCodec(Zstd813StoredFieldsFormat.Mode.BEST_COMPRESSION, mapperService, bigArrays)
+            );
         }
         codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault());
         for (String codec : Codec.availableCodecs()) {

diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch813Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch813Codec.java
@@ -0,0 +1,129 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.codec;
+
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
+import org.apache.lucene.codecs.lucene99.Lucene99Codec;
+import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
+import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
+import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
+import org.elasticsearch.index.codec.zstd.Zstd813StoredFieldsFormat;
+
+/**
+ * Elasticsearch codec as of 8.13. This extends the Lucene 9.9 codec to compressed stored fields with ZSTD instead of LZ4/DEFLATE. See
+ * {@link Zstd813StoredFieldsFormat}.
+ */
+public class Elasticsearch813Codec extends FilterCodec {
+
+    private final StoredFieldsFormat storedFieldsFormat;
+
+    private final PostingsFormat defaultPostingsFormat;
+    private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
+        @Override
+        public PostingsFormat getPostingsFormatForField(String field) {
+            return Elasticsearch813Codec.this.getPostingsFormatForField(field);
+        }
+    };
+
+    private final DocValuesFormat defaultDVFormat;
+    private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
+        @Override
+        public DocValuesFormat getDocValuesFormatForField(String field) {
+            return Elasticsearch813Codec.this.getDocValuesFormatForField(field);
+        }
+    };
+
+    private final KnnVectorsFormat defaultKnnVectorsFormat;
+    private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() {
+        @Override
+        public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
+            return Elasticsearch813Codec.this.getKnnVectorsFormatForField(field);
+        }
+    };
+
+    /** Public no-arg constructor, needed for SPI loading at read-time. */
+    public Elasticsearch813Codec() {
+        this(Zstd813StoredFieldsFormat.Mode.BEST_SPEED);
+    }
+
+    /**
+     * Constructor. Takes a {@link Zstd813StoredFieldsFormat.Mode} that describes whether to optimize for retrieval speed at the expense of worse space-efficiency or vice-versa.
+     */
+    public Elasticsearch813Codec(Zstd813StoredFieldsFormat.Mode mode) {
+        super("Elasticsearch813", new Lucene99Codec());
+        this.storedFieldsFormat = new Zstd813StoredFieldsFormat(mode);
+        this.defaultPostingsFormat = new Lucene99PostingsFormat();
+        this.defaultDVFormat = new Lucene90DocValuesFormat();
+        this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
+    }
+
+    @Override
+    public StoredFieldsFormat storedFieldsFormat() {
+        return storedFieldsFormat;
+    }
+
+    @Override
+    public final PostingsFormat postingsFormat() {
+        return postingsFormat;
+    }
+
+    @Override
+    public final DocValuesFormat docValuesFormat() {
+        return docValuesFormat;
+    }
+
+    @Override
+    public final KnnVectorsFormat knnVectorsFormat() {
+        return knnVectorsFormat;
+    }
+
+    /**
+     * Returns the postings format that should be used for writing new segments of <code>field</code>.
+     *
+     * <p>The default implementation always returns "Lucene99".
+     *
+     * <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
+     * future version of Lucene are only guaranteed to be able to read the default implementation,
+     */
+    public PostingsFormat getPostingsFormatForField(String field) {
+        return defaultPostingsFormat;
+    }
+
+    /**
+     * Returns the docvalues format that should be used for writing new segments of <code>field</code>
+     * .
+     *
+     * <p>The default implementation always returns "Lucene99".
+     *
+     * <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
+     * future version of Lucene are only guaranteed to be able to read the default implementation.
+     */
+    public DocValuesFormat getDocValuesFormatForField(String field) {
+        return defaultDVFormat;
+    }
+
+    /**
+     * Returns the vectors format that should be used for writing new segments of <code>field</code>
+     *
+     * <p>The default implementation always returns "Lucene95".
+     *
+     * <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
+     * future version of Lucene are only guaranteed to be able to read the default implementation.
+     */
+    public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
+        return defaultKnnVectorsFormat;
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java
@@ -13,13 +13,13 @@
 import org.apache.lucene.codecs.KnnVectorsFormat;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
-import org.apache.lucene.codecs.lucene99.Lucene99Codec;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.index.IndexMode;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
 import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
+import org.elasticsearch.index.codec.zstd.Zstd813StoredFieldsFormat;
 import org.elasticsearch.index.mapper.IdFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.Mapper;
@@ -37,20 +37,19 @@
  * per index in real time via the mapping API. If no specific postings format or vector format is
  * configured for a specific field the default postings or vector format is used.
  */
-public final class PerFieldMapperCodec extends Lucene99Codec {
+public final class PerFieldMapperCodec extends Elasticsearch813Codec {
 
     private final MapperService mapperService;
     private final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
     private final ES87BloomFilterPostingsFormat bloomFilterPostingsFormat;
     private final ES87TSDBDocValuesFormat tsdbDocValuesFormat;
 
-    static {
-        assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMapperCodec.class)
-            : "PerFieldMapperCodec must subclass the latest lucene codec: " + Lucene.LATEST_CODEC;
-    }
-
-    public PerFieldMapperCodec(Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
+    public PerFieldMapperCodec(Zstd813StoredFieldsFormat.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
         super(compressionMode);
+        // If the below assertion fails, it is a sign that Lucene released a new codec. You must create a copy of the current Elasticsearch
+        // codec that delegates to this new Lucene codec, and make PerFieldMapperCodec extend this new Elasticsearch codec.
+        assert Codec.forName(Lucene.LATEST_CODEC).getClass() == delegate.getClass()
+            : "PerFieldMapperCodec must be on the latest lucene codec: " + Lucene.LATEST_CODEC;
         this.mapperService = mapperService;
         this.bloomFilterPostingsFormat = new ES87BloomFilterPostingsFormat(bigArrays, this::internalGetPostingsFormatForField);
         this.tsdbDocValuesFormat = new ES87TSDBDocValuesFormat();

diff --git a/server/src/main/java/org/elasticsearch/index/codec/zstd/Zstd.java b/server/src/main/java/org/elasticsearch/index/codec/zstd/Zstd.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.codec.zstd;
+
+import com.sun.jna.Library;
+import com.sun.jna.Native;
+
+import java.nio.ByteBuffer;
+
+/** JNA bindings for ZSTD. */
+final class Zstd {
+
+    // TODO: Move this under libs/ and make it public so that we can progressively replace all our usage of DEFLATE with ZSTD?
+
+    private static final ZstdLibrary LIBRARY;
+
+    static {
+        LIBRARY = Native.load("zstd", ZstdLibrary.class);
+    }
+
+    private interface ZstdLibrary extends Library {
+
+        long ZSTD_compressBound(int scrLen);
+
+        long ZSTD_compress(ByteBuffer dst, int dstLen, ByteBuffer src, int srcLen, int compressionLevel);
+
+        boolean ZSTD_isError(long code);
+
+        String ZSTD_getErrorName(long code);
+
+        long ZSTD_decompress(ByteBuffer dst, int dstLen, ByteBuffer src, int srcLen);
+
+    }
+
+    /**
+     * Compress the content of {@code src} into {@code dst} at compression level {@code level}, and return the number of compressed bytes.
+     * {@link ByteBuffer#position()} and {@link ByteBuffer#limit()} of both {@link ByteBuffer}s are left unmodified.
+     */
+    public static int compress(ByteBuffer dst, ByteBuffer src, int level) {
+        long ret = LIBRARY.ZSTD_compress(dst, dst.remaining(), src, src.remaining(), level);
+        if (LIBRARY.ZSTD_isError(ret)) {
+            throw new IllegalArgumentException(LIBRARY.ZSTD_getErrorName(ret));
+        } else if (ret < 0 || ret > Integer.MAX_VALUE) {
+            throw new IllegalStateException("Integer overflow? ret=" + ret);
+        }
+        return (int) ret;
+    }
+
+    /**
+     * Compress the content of {@code src} into {@code dst}, and return the number of decompressed bytes. {@link ByteBuffer#position()} and
+     * {@link ByteBuffer#limit()} of both {@link ByteBuffer}s are left unmodified.
+     */
+    public static int decompress(ByteBuffer dst, ByteBuffer src) {
+        long ret = LIBRARY.ZSTD_decompress(dst, dst.remaining(), src, src.remaining());
+        if (LIBRARY.ZSTD_isError(ret)) {
+            throw new IllegalArgumentException(LIBRARY.ZSTD_getErrorName(ret));
+        } else if (ret < 0 || ret > Integer.MAX_VALUE) {
+            throw new IllegalStateException("Integer overflow? ret=" + ret);
+        }
+        return (int) ret;
+    }
+
+    /**
+     * Return the maximum number of compressed bytes given an input length.
+     */
+    public static int getMaxCompressedLen(int srcLen) {
+        long ret = LIBRARY.ZSTD_compressBound(srcLen);
+        if (LIBRARY.ZSTD_isError(ret)) {
+            throw new IllegalArgumentException(LIBRARY.ZSTD_getErrorName(ret));
+        } else if (ret < 0 || ret > Integer.MAX_VALUE) {
+            throw new IllegalArgumentException(
+                srcLen
+                    + " bytes may require up to "
+                    + Long.toUnsignedString(ret)
+                    + " bytes, which overflows the maximum capacity of a ByteBuffer"
+            );
+        }
+        return (int) ret;
+    }
+}