Skip to content

Commit

Permalink
Bump codec to Lucene912.
Browse files Browse the repository at this point in the history
Lucene recently upgraded its default codec from `Lucene99` to `Lucene912`.
  • Loading branch information
jpountz committed Aug 2, 2024
1 parent 1ae40ee commit 1bac0de
Show file tree
Hide file tree
Showing 24 changed files with 76 additions and 54 deletions.
6 changes: 4 additions & 2 deletions server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
* Side Public License, v 1.
*/

import org.elasticsearch.index.codec.Elasticsearch814Codec;
import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
import org.elasticsearch.plugins.internal.RestExtension;

Expand Down Expand Up @@ -454,7 +453,10 @@
org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat,
org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat;

provides org.apache.lucene.codecs.Codec with Elasticsearch814Codec;
provides org.apache.lucene.codecs.Codec
with
org.elasticsearch.index.codec.Elasticsearch814Codec,
org.elasticsearch.index.codec.Elasticsearch816Codec;

provides org.apache.logging.log4j.core.util.ContextDataProvider with org.elasticsearch.common.logging.DynamicContextDataProvider;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@
import org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat;
import org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.DirectoryReader;
Expand Down Expand Up @@ -303,6 +304,9 @@ private static void readProximity(Terms terms, PostingsEnum postings) throws IOE
private static BlockTermState getBlockTermState(TermsEnum termsEnum, BytesRef term) throws IOException {
if (term != null && termsEnum.seekExact(term)) {
final TermState termState = termsEnum.termState();
if (termState instanceof final Lucene912PostingsFormat.IntBlockTermState blockTermState) {
return new BlockTermState(blockTermState.docStartFP, blockTermState.posStartFP, blockTermState.payStartFP);
}
if (termState instanceof final ES812PostingsFormat.IntBlockTermState blockTermState) {
return new BlockTermState(blockTermState.docStartFP, blockTermState.posStartFP, blockTermState.payStartFP);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
import java.util.Objects;

public class Lucene {
public static final String LATEST_CODEC = "Lucene99";
public static final String LATEST_CODEC = "Lucene912";

public static final String SOFT_DELETES_FIELD = "__soft_deletes";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.FeatureFlag;
import org.elasticsearch.core.Nullable;
Expand Down Expand Up @@ -45,15 +45,15 @@ public class CodecService implements CodecProvider {
public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays) {
final var codecs = new HashMap<String, Codec>();

Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene99Codec.Mode.BEST_SPEED, mapperService, bigArrays);
Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene912Codec.Mode.BEST_SPEED, mapperService, bigArrays);
if (ZSTD_STORED_FIELDS_FEATURE_FLAG.isEnabled()) {
codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED, mapperService, bigArrays));
} else {
codecs.put(DEFAULT_CODEC, legacyBestSpeedCodec);
}
codecs.put(LEGACY_DEFAULT_CODEC, legacyBestSpeedCodec);

Codec legacyBestCompressionCodec = new LegacyPerFieldMapperCodec(Lucene99Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays);
Codec legacyBestCompressionCodec = new LegacyPerFieldMapperCodec(Lucene912Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays);
if (ZSTD_STORED_FIELDS_FEATURE_FLAG.isEnabled()) {
codecs.put(
BEST_COMPRESSION_CODEC,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@

package org.elasticsearch.index.codec;

import org.apache.lucene.backward_codecs.lucene99.Lucene99Codec;
import org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.mapper.MapperService;
Expand All @@ -21,11 +21,11 @@
* Legacy version of {@link PerFieldMapperCodec}. This codec is preserved to give an escape hatch in case we encounter issues with new
* changes in {@link PerFieldMapperCodec}.
*/
public final class LegacyPerFieldMapperCodec extends Lucene99Codec {
public final class LegacyPerFieldMapperCodec extends Lucene912Codec {

private final PerFieldFormatSupplier formatSupplier;

public LegacyPerFieldMapperCodec(Lucene99Codec.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
public LegacyPerFieldMapperCodec(Lucene912Codec.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
super(compressionMode);
this.formatSupplier = new PerFieldFormatSupplier(mapperService, bigArrays);
// If the below assertion fails, it is a sign that Lucene released a new codec. You must create a copy of the current Elasticsearch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
* per index in real time via the mapping API. If no specific postings format or vector format is
* configured for a specific field the default postings or vector format is used.
*/
public final class PerFieldMapperCodec extends Elasticsearch814Codec {
public final class PerFieldMapperCodec extends Elasticsearch816Codec {

private final PerFieldFormatSupplier formatSupplier;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ public CompletionFieldType fieldType() {
}

static PostingsFormat postingsFormat() {
return PostingsFormat.forName("Completion99");
return PostingsFormat.forName("Completion912");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public enum LuceneFilesExtensions {
NVM("nvm", "Norms Metadata", true, false),
PAY("pay", "Payloads", false, false),
POS("pos", "Positions", false, false),
PSM("psm", "Postings Metadata", true, false),
SI("si", "Segment Info", true, false),
// Term dictionaries are typically performance-sensitive and hot in the page
// cache, so we use mmap, which provides better performance.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,19 @@ public int nextSetBit(int index) {
return next;
}

@Override
public int nextSetBit(int index, int upperBound) {
assert index >= 0 && index < length : "index=" + index + " numBits=" + length();
int next = first.nextSetBit(index, upperBound);
while (next != DocIdSetIterator.NO_MORE_DOCS && second.get(next) == false) {
if (next == length() - 1) {
return DocIdSetIterator.NO_MORE_DOCS;
}
next = first.nextSetBit(next + 1, upperBound);
}
return next;
}

@Override
public long ramBytesUsed() {
return first.ramBytesUsed();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ public int nextSetBit(int index) {
return index;
}

@Override
public int nextSetBit(int index, int upperBound) {
assert index < upperBound;
return index;
}

@Override
public long ramBytesUsed() {
return RAM_BYTES_USED;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -407,13 +407,8 @@ private static class TimeExceededException extends RuntimeException {
// This exception should never be re-thrown, but we fill in the stacktrace to be able to trace where it does not get properly caught
}

/**
* Lower-level search API.
*
* {@link LeafCollector#collect(int)} is called for every matching document in
* the provided <code>ctx</code>.
*/
private void searchLeaf(LeafReaderContext ctx, Weight weight, Collector collector) throws IOException {
@Override
protected void searchLeaf(LeafReaderContext ctx, Weight weight, Collector collector) throws IOException {
cancellable.checkCancelled();
final LeafCollector leafCollector;
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
org.elasticsearch.index.codec.Elasticsearch814Codec
org.elasticsearch.index.codec.Elasticsearch816Codec
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
Expand Down Expand Up @@ -53,7 +53,7 @@
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.suggest.document.Completion99PostingsFormat;
import org.apache.lucene.search.suggest.document.Completion912PostingsFormat;
import org.apache.lucene.search.suggest.document.CompletionPostingsFormat;
import org.apache.lucene.search.suggest.document.SuggestField;
import org.apache.lucene.store.Directory;
Expand Down Expand Up @@ -326,11 +326,11 @@ public void testTriangle() throws Exception {
public void testCompletionField() throws Exception {
IndexWriterConfig config = new IndexWriterConfig().setCommitOnClose(true)
.setUseCompoundFile(false)
.setCodec(new Lucene99Codec(Lucene99Codec.Mode.BEST_SPEED) {
.setCodec(new Lucene912Codec(Lucene912Codec.Mode.BEST_SPEED) {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (field.startsWith("suggest_")) {
return new Completion99PostingsFormat(randomFrom(CompletionPostingsFormat.FSTLoadMode.values()));
return new Completion912PostingsFormat(randomFrom(CompletionPostingsFormat.FSTLoadMode.values()));
} else {
return super.postingsFormat();
}
Expand Down Expand Up @@ -413,25 +413,25 @@ private static void addFieldsToDoc(Document doc, IndexableField[] fields) {
enum CodecMode {
BEST_SPEED {
@Override
Lucene99Codec.Mode mode() {
return Lucene99Codec.Mode.BEST_SPEED;
Lucene912Codec.Mode mode() {
return Lucene912Codec.Mode.BEST_SPEED;
}
},

BEST_COMPRESSION {
@Override
Lucene99Codec.Mode mode() {
return Lucene99Codec.Mode.BEST_COMPRESSION;
Lucene912Codec.Mode mode() {
return Lucene912Codec.Mode.BEST_COMPRESSION;
}
};

abstract Lucene99Codec.Mode mode();
abstract Lucene912Codec.Mode mode();
}

static void indexRandomly(Directory directory, CodecMode codecMode, int numDocs, Consumer<Document> addFields) throws IOException {
IndexWriterConfig config = new IndexWriterConfig().setCommitOnClose(true)
.setUseCompoundFile(randomBoolean())
.setCodec(new Lucene99Codec(codecMode.mode()));
.setCodec(new Lucene912Codec(codecMode.mode()));
try (IndexWriter writer = new IndexWriter(directory, config)) {
for (int i = 0; i < numDocs; i++) {
final Document doc = new Document();
Expand Down Expand Up @@ -639,7 +639,7 @@ static void rewriteIndexWithPerFieldCodec(Directory source, CodecMode mode, Dire
try (DirectoryReader reader = DirectoryReader.open(source)) {
IndexWriterConfig config = new IndexWriterConfig().setSoftDeletesField(Lucene.SOFT_DELETES_FIELD)
.setUseCompoundFile(randomBoolean())
.setCodec(new Lucene99Codec(mode.mode()) {
.setCodec(new Lucene912Codec(mode.mode()) {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return new ES812PostingsFormat();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public void testResolveDefaultCodecs() throws Exception {
assumeTrue("Only when zstd_stored_fields feature flag is enabled", CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG.isEnabled());
CodecService codecService = createCodecService();
assertThat(codecService.codec("default"), instanceOf(PerFieldMapperCodec.class));
assertThat(codecService.codec("default"), instanceOf(Elasticsearch814Codec.class));
assertThat(codecService.codec("default"), instanceOf(Elasticsearch816Codec.class));
}

public void testDefault() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
import org.elasticsearch.common.logging.LogConfigurator;

Expand All @@ -23,7 +23,7 @@ public class ES813FlatVectorFormatTests extends BaseKnnVectorsFormatTestCase {

@Override
protected Codec getCodec() {
return new Lucene99Codec() {
return new Lucene912Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new ES813FlatVectorFormat();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
import org.elasticsearch.common.logging.LogConfigurator;

Expand All @@ -23,7 +23,7 @@ public class ES813Int8FlatVectorFormatTests extends BaseKnnVectorsFormatTestCase

@Override
protected Codec getCodec() {
return new Lucene99Codec() {
return new Lucene912Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new ES813Int8FlatVectorFormat();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.KnnFloatVectorField;
Expand Down Expand Up @@ -40,7 +40,7 @@ public class ES814HnswScalarQuantizedVectorsFormatTests extends BaseKnnVectorsFo

@Override
protected Codec getCodec() {
return new Lucene99Codec() {
return new Lucene912Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new ES814HnswScalarQuantizedVectorsFormat();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.junit.Before;

public class ES815BitFlatVectorFormatTests extends BaseKnnBitVectorsFormatTestCase {

@Override
protected Codec getCodec() {
return new Lucene99Codec() {
return new Lucene912Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new ES815BitFlatVectorFormat();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.junit.Before;

public class ES815HnswBitVectorsFormatTests extends BaseKnnBitVectorsFormatTestCase {

@Override
protected Codec getCodec() {
return new Lucene99Codec() {
return new Lucene912Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new ES815HnswBitVectorsFormat();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.tests.index.BaseStoredFieldsFormatTestCase;
import org.elasticsearch.index.codec.Elasticsearch814Codec;
import org.elasticsearch.index.codec.Elasticsearch816Codec;

public class Zstd814BestCompressionStoredFieldsFormatTests extends BaseStoredFieldsFormatTestCase {

private final Codec codec = new Elasticsearch814Codec(Zstd814StoredFieldsFormat.Mode.BEST_COMPRESSION);
private final Codec codec = new Elasticsearch816Codec(Zstd814StoredFieldsFormat.Mode.BEST_COMPRESSION);

@Override
protected Codec getCodec() {
Expand Down
Loading

0 comments on commit 1bac0de

Please sign in to comment.