Skip to content

Commit

Permalink
Add initial _all field support to match_only_test field type.
Browse files Browse the repository at this point in the history
  • Loading branch information
martijnvg committed Jan 26, 2024
1 parent 25ca1ee commit bea189c
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOFunction;
Expand All @@ -39,6 +40,7 @@
import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
import org.elasticsearch.index.mapper.AllFieldMapper;
import org.elasticsearch.index.mapper.BlockLoader;
import org.elasticsearch.index.mapper.BlockSourceReader;
import org.elasticsearch.index.mapper.BlockStoredFieldsReader;
Expand All @@ -63,12 +65,15 @@
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

import static org.elasticsearch.index.mapper.AllFieldMapper.toAllFieldTerm;

/**
* A {@link FieldMapper} for full-text fields that only indexes
* {@link IndexOptions#DOCS} and runs positional queries by looking at the
Expand Down Expand Up @@ -131,7 +136,8 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
tsi,
indexAnalyzer,
context.isSourceSynthetic(),
meta.getValue()
meta.getValue(),
context.isIndexIntoAllField()
);
return ft;
}
Expand All @@ -140,7 +146,16 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
public MatchOnlyTextFieldMapper build(MapperBuilderContext context) {
MatchOnlyTextFieldType tft = buildFieldType(context);
MultiFields multiFields = multiFieldsBuilder.build(this, context);
return new MatchOnlyTextFieldMapper(name, Defaults.FIELD_TYPE, tft, multiFields, copyTo, context.isSourceSynthetic(), this);
return new MatchOnlyTextFieldMapper(
name,
Defaults.FIELD_TYPE,
tft,
multiFields,
copyTo,
context.isSourceSynthetic(),
context.isIndexIntoAllField(),
this
);
}
}

Expand All @@ -150,17 +165,20 @@ public static class MatchOnlyTextFieldType extends StringFieldType {

private final Analyzer indexAnalyzer;
private final TextFieldType textFieldType;
private final boolean indexIntoAllField;

public MatchOnlyTextFieldType(
String name,
TextSearchInfo tsi,
Analyzer indexAnalyzer,
boolean isSyntheticSource,
Map<String, String> meta
Map<String, String> meta,
boolean indexIntoAllField
) {
super(name, true, false, false, tsi, meta);
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
this.textFieldType = new TextFieldType(name, isSyntheticSource);
this.indexIntoAllField = indexIntoAllField;
}

public MatchOnlyTextFieldType(String name) {
Expand All @@ -169,7 +187,8 @@ public MatchOnlyTextFieldType(String name) {
new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
Lucene.STANDARD_ANALYZER,
false,
Collections.emptyMap()
Collections.emptyMap(),
false
);
}

Expand Down Expand Up @@ -235,10 +254,30 @@ private IntervalsSource toIntervalsSource(
return new SourceIntervalsSource(source, approximation, getValueFetcherProvider(searchExecutionContext), indexAnalyzer);
}

// TODO: fix other query types to work with _all
@Override
public Query termQuery(Object value, SearchExecutionContext context) {
// Disable scoring
return new ConstantScoreQuery(super.termQuery(value, context));
if (indexIntoAllField) {
return new ConstantScoreQuery(
new TermQuery(new Term(AllFieldMapper.NAME, toAllFieldTerm(indexedValueForSearch(value), new BytesRef(name()))))
);
} else {
// Disable scoring
return new ConstantScoreQuery(super.termQuery(value, context));
}
}

@Override
public Query termsQuery(Collection<?> values, SearchExecutionContext context) {
if (indexIntoAllField) {
BytesRef[] bytesRefs = values.stream()
.map(value -> toAllFieldTerm(indexedValueForSearch(value), new BytesRef(name())))
.toArray(BytesRef[]::new);
return new ConstantScoreQuery(new TermInSetQuery(AllFieldMapper.NAME, bytesRefs));
} else {
// Disable scoring
return new ConstantScoreQuery(super.termsQuery(values, context));
}
}

@Override
Expand Down Expand Up @@ -368,6 +407,7 @@ private String storedFieldNameForSyntheticSource() {
private final NamedAnalyzer indexAnalyzer;
private final int positionIncrementGap;
private final boolean storeSource;
private final boolean indexIntoAllField;
private final FieldType fieldType;

private MatchOnlyTextFieldMapper(
Expand All @@ -377,6 +417,7 @@ private MatchOnlyTextFieldMapper(
MultiFields multiFields,
CopyTo copyTo,
boolean storeSource,
boolean indexIntoAllField,
Builder builder
) {
super(simpleName, mappedFieldType, multiFields, copyTo, false, null);
Expand All @@ -388,6 +429,7 @@ private MatchOnlyTextFieldMapper(
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
this.storeSource = storeSource;
this.indexIntoAllField = indexIntoAllField;
}

@Override
Expand All @@ -408,8 +450,14 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
return;
}

Field field = new Field(fieldType().name(), value, fieldType);
context.doc().add(field);
if (indexIntoAllField) {
Field field = new Field(fieldType().name(), value, fieldType);
AllFieldMapper allFieldMapper = (AllFieldMapper) context.getMetadataMapper(AllFieldMapper.NAME);
allFieldMapper.addToAll(context, field);
} else {
Field field = new Field(fieldType().name(), value, fieldType);
context.doc().add(field);
}
context.addToFieldNames(fieldType().name());

if (storeSource) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.store.Directory;
Expand All @@ -22,7 +25,9 @@
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Strings;
import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.mapper.AllFieldMapper;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.LuceneDocument;
Expand Down Expand Up @@ -259,6 +264,34 @@ public void testDocValuesLoadedFromSynthetic() throws IOException {
assertScriptDocValues(mapper, "foo", equalTo(List.of("foo")));
}

public void testAllField() throws Exception {
DocumentMapper docMapper = createDocumentMapper(allFieldMapping(true, b -> {
b.startObject("field1");
b.field("type", "match_only_text");
b.endObject();
}));

ParsedDocument doc = docMapper.parse(source(b -> b.field("field1", "value1")));
assertThat(doc.rootDoc().getFields(AllFieldMapper.NAME).size(), equalTo(1));
assertThat(doc.rootDoc().getFields(AllFieldMapper.NAME).get(0).binaryValue(), equalTo(new BytesRef("value1\0field1")));

Query query = docMapper.mappers().getFieldType("field1").termQuery("value1", null);
assertThat(query, instanceOf(ConstantScoreQuery.class));
TermQuery termQuery = (TermQuery) ((ConstantScoreQuery) query).getQuery();
assertThat(termQuery.getTerm().field(), equalTo(AllFieldMapper.NAME));
assertThat(termQuery.getTerm().bytes(), equalTo(new BytesRef("value1\0field1")));
}

private static XContentBuilder allFieldMapping(boolean enabled, CheckedConsumer<XContentBuilder, IOException> propertiesBuilder)
throws IOException {
return topMapping(b -> {
b.startObject(AllFieldMapper.NAME).field("enabled", enabled).endObject();
b.startObject("properties");
propertiesBuilder.accept(b);
b.endObject();
});
}

@Override
protected IngestScriptSupport ingestScriptSupport() {
throw new AssumptionViolatedException("not supported");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

package org.elasticsearch.index.mapper;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
Expand Down Expand Up @@ -136,6 +138,7 @@ public void postParse(DocumentParserContext context) throws IOException {
for (int i = 0; i < fields.size(); i++) {
IndexableField indexableField = fields.get(i);
var mappedFieldType = context.mappingLookup().getFieldType(indexableField.name());
// TODO: make keyword push like match_onlu_text:
if (mappedFieldType != null && "keyword".equals(mappedFieldType.typeName())) {
BytesRef value = toAllFieldTerm(indexableField.binaryValue(), new BytesRef(indexableField.name()));
if (value.length > MAX_TERM_LENGTH) {
Expand All @@ -147,6 +150,26 @@ public void postParse(DocumentParserContext context) throws IOException {

}

public void addToAll(DocumentParserContext context, IndexableField indexableField) throws IOException {
if (enabled == false) {
return;
}

// TODO: do we need to use index analyzer from MatchOnlyTextFieldMapper here?
try (TokenStream tokenStream = indexableField.tokenStream(Lucene.STANDARD_ANALYZER, null)) {
CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
tokenStream.reset();
while (tokenStream.incrementToken()) {
BytesRef value = toAllFieldTerm(new BytesRef(termAtt.toString()), new BytesRef(indexableField.name()));
if (value.length > MAX_TERM_LENGTH) {
// TODO
}
context.doc().add(new KeywordFieldMapper.KeywordField(NAME, value, Defaults.FIELD_TYPE));
}
tokenStream.end();
}
}

public static BytesRef toAllFieldTerm(BytesRef fieldValueBytes, BytesRef fieldNameBytes) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(fieldValueBytes);
Expand Down

0 comments on commit bea189c

Please sign in to comment.