Skip to content

Commit

Permalink
Optimize indexing points with index and doc values set to true (elast…
Browse files Browse the repository at this point in the history
…ic#120271)

Introducing at LonPoint and XYPoint that can add doc values sto improve indexing perfromance.
  • Loading branch information
iverase authored Jan 17, 2025
1 parent cfeeb1a commit fb5d364
Show file tree
Hide file tree
Showing 11 changed files with 189 additions and 66 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/120271.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 120271
summary: Optimize indexing points with index and doc values set to true
area: Geo
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,20 @@
*/
package org.elasticsearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.document.ShapeField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.geo.LatLonGeometry;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.geo.GeoFormatterFactory;
Expand Down Expand Up @@ -282,17 +287,24 @@ public FieldMapper.Builder getMergeBuilder() {

@Override
protected void index(DocumentParserContext context, GeoPoint geometry) throws IOException {
if (fieldType().isIndexed()) {
context.doc().add(new LatLonPoint(fieldType().name(), geometry.lat(), geometry.lon()));
}
if (fieldType().hasDocValues()) {
final boolean indexed = fieldType().isIndexed();
final boolean hasDocValues = fieldType().hasDocValues();
final boolean store = fieldType().isStored();
if (indexed && hasDocValues) {
context.doc().add(new LatLonPointWithDocValues(fieldType().name(), geometry.lat(), geometry.lon()));
} else if (hasDocValues) {
context.doc().add(new LatLonDocValuesField(fieldType().name(), geometry.lat(), geometry.lon()));
} else if (fieldType().isStored() || fieldType().isIndexed()) {
context.addToFieldNames(fieldType().name());
} else if (indexed) {
context.doc().add(new LatLonPoint(fieldType().name(), geometry.lat(), geometry.lon()));
}
if (fieldType().isStored()) {
if (store) {
context.doc().add(new StoredField(fieldType().name(), geometry.toString()));
}
if (hasDocValues == false && (indexed || store)) {
// When the field doesn't have doc values so that we can run exists queries, we also need to index the field name separately.
context.addToFieldNames(fieldType().name());
}

// TODO phase out geohash (which is currently used in the CompletionSuggester)
// we only expose the geohash value and disallow advancing tokens, hence we can reuse the same parser throughout multiple sub-fields
DocumentParserContext parserContext = context.switchParser(new GeoHashMultiFieldParser(context.parser(), geometry.geohash()));
Expand Down Expand Up @@ -622,4 +634,60 @@ protected void writeValue(XContentBuilder b, long value) throws IOException {

return super.syntheticSourceSupport();
}

/**
* Utility class that allows adding index and doc values in one field
*/
public static class LatLonPointWithDocValues extends Field {

public static final FieldType TYPE = new FieldType();

static {
TYPE.setDimensions(2, Integer.BYTES);
TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC);
TYPE.freeze();
}

// holds the doc value value.
private final long docValue;

public LatLonPointWithDocValues(String name, double latitude, double longitude) {
super(name, TYPE);
final byte[] bytes;
if (fieldsData == null) {
bytes = new byte[8];
fieldsData = new BytesRef(bytes);
} else {
bytes = ((BytesRef) fieldsData).bytes;
}

final int latitudeEncoded = GeoEncodingUtils.encodeLatitude(latitude);
final int longitudeEncoded = GeoEncodingUtils.encodeLongitude(longitude);
NumericUtils.intToSortableBytes(latitudeEncoded, bytes, 0);
NumericUtils.intToSortableBytes(longitudeEncoded, bytes, Integer.BYTES);
docValue = (((long) latitudeEncoded) << 32) | (longitudeEncoded & 0xFFFFFFFFL);
}

@Override
public Number numericValue() {
return docValue;
}

@Override
public String toString() {
StringBuilder result = new StringBuilder();
result.append(getClass().getSimpleName());
result.append(" <");
result.append(name);
result.append(':');

byte[] bytes = ((BytesRef) fieldsData).bytes;
result.append(GeoEncodingUtils.decodeLatitude(bytes, 0));
result.append(',');
result.append(GeoEncodingUtils.decodeLongitude(bytes, Integer.BYTES));

result.append('>');
return result.toString();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ public Set<String> parseContext(LuceneDocument document) {
if (field instanceof StringField) {
spare.resetFromString(field.stringValue());
geohashes.add(spare.geohash());
} else if (field instanceof LatLonDocValuesField) {
} else if (field instanceof LatLonDocValuesField || field instanceof GeoPointFieldMapper.LatLonPointWithDocValues) {
spare.resetFromEncoded(field.numericValue().longValue());
geohashes.add(spare.geohash());
} else if (field instanceof LatLonPoint) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ public void testGeoHashWithSubCompletionAndStringInsert() throws Exception {
ParsedDocument parsedDocument = defaultMapper.parse(source(b -> b.field("field", "drm3btev3e86")));

LuceneDocument indexableFields = parsedDocument.rootDoc();
assertThat(indexableFields.getFields("field"), hasSize(2));
assertThat(indexableFields.getFields("field"), hasSize(1));
assertThat(indexableFields.getFields("field.analyzed"), containsInAnyOrder(suggestField("drm3btev3e86")));
// unable to assert about geofield content, covered in a REST test
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -726,10 +726,10 @@ public void testCopyToGeoPoint() throws Exception {
LuceneDocument doc = docMapper.parse(new SourceToParse("1", json, XContentType.JSON)).rootDoc();

List<IndexableField> fields = doc.getFields("geopoint");
assertThat(fields.size(), equalTo(2));
assertThat(fields.size(), equalTo(1));

fields = doc.getFields("geopoint_copy");
assertThat(fields.size(), equalTo(2));
assertThat(fields.size(), equalTo(1));
}
}
// check failure for object/array type representations
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
package org.elasticsearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexableField;
Expand Down Expand Up @@ -1120,15 +1118,13 @@ public void testWithDynamicTemplates() throws Exception {

ParsedDocument doc = mapper.parse(source("1", b -> b.field(field, "41.12,-71.34"), null, Map.of(field, "points")));
List<IndexableField> fields = doc.rootDoc().getFields(field);
assertThat(fields, hasSize(2));
assertThat(fields.get(0).fieldType(), sameInstance(LatLonPoint.TYPE));
assertThat(fields.get(1).fieldType(), sameInstance(LatLonDocValuesField.TYPE));
assertThat(fields, hasSize(1));
assertThat(fields.get(0).fieldType(), sameInstance(GeoPointFieldMapper.LatLonPointWithDocValues.TYPE));

doc = mapper.parse(source("1", b -> b.field(field, new double[] { -71.34, 41.12 }), null, Map.of(field, "points")));
fields = doc.rootDoc().getFields(field);
assertThat(fields, hasSize(2));
assertThat(fields.get(0).fieldType(), sameInstance(LatLonPoint.TYPE));
assertThat(fields.get(1).fieldType(), sameInstance(LatLonDocValuesField.TYPE));
assertThat(fields, hasSize(1));
assertThat(fields.get(0).fieldType(), sameInstance(GeoPointFieldMapper.LatLonPointWithDocValues.TYPE));

doc = mapper.parse(source("1", b -> {
b.startObject(field);
Expand All @@ -1137,16 +1133,13 @@ public void testWithDynamicTemplates() throws Exception {
b.endObject();
}, null, Map.of(field, "points")));
fields = doc.rootDoc().getFields(field);
assertThat(fields, hasSize(2));
assertThat(fields.get(0).fieldType(), sameInstance(LatLonPoint.TYPE));
assertThat(fields.get(1).fieldType(), sameInstance(LatLonDocValuesField.TYPE));
assertThat(fields, hasSize(1));
assertThat(fields.get(0).fieldType(), sameInstance(GeoPointFieldMapper.LatLonPointWithDocValues.TYPE));
doc = mapper.parse(source("1", b -> b.field(field, new String[] { "41.12,-71.34", "43,-72.34" }), null, Map.of(field, "points")));
fields = doc.rootDoc().getFields(field);
assertThat(fields, hasSize(4));
assertThat(fields.get(0).fieldType(), sameInstance(LatLonPoint.TYPE));
assertThat(fields.get(1).fieldType(), sameInstance(LatLonDocValuesField.TYPE));
assertThat(fields.get(2).fieldType(), sameInstance(LatLonPoint.TYPE));
assertThat(fields.get(3).fieldType(), sameInstance(LatLonDocValuesField.TYPE));
assertThat(fields, hasSize(2));
assertThat(fields.get(0).fieldType(), sameInstance(GeoPointFieldMapper.LatLonPointWithDocValues.TYPE));
assertThat(fields.get(1).fieldType(), sameInstance(GeoPointFieldMapper.LatLonPointWithDocValues.TYPE));

doc = mapper.parse(source("1", b -> {
b.startArray(field);
Expand All @@ -1162,21 +1155,18 @@ public void testWithDynamicTemplates() throws Exception {
b.endArray();
}, null, Map.of(field, "points")));
fields = doc.rootDoc().getFields(field);
assertThat(fields, hasSize(4));
assertThat(fields.get(0).fieldType(), sameInstance(LatLonPoint.TYPE));
assertThat(fields.get(1).fieldType(), sameInstance(LatLonDocValuesField.TYPE));
assertThat(fields.get(2).fieldType(), sameInstance(LatLonPoint.TYPE));
assertThat(fields.get(3).fieldType(), sameInstance(LatLonDocValuesField.TYPE));
assertThat(fields, hasSize(2));
assertThat(fields.get(0).fieldType(), sameInstance(GeoPointFieldMapper.LatLonPointWithDocValues.TYPE));
assertThat(fields.get(1).fieldType(), sameInstance(GeoPointFieldMapper.LatLonPointWithDocValues.TYPE));

doc = mapper.parse(source("1", b -> {
b.startObject("address");
b.field("home", "43,-72.34");
b.endObject();
}, null, Map.of("address.home", "points")));
fields = doc.rootDoc().getFields("address.home");
assertThat(fields, hasSize(2));
assertThat(fields.get(0).fieldType(), sameInstance(LatLonPoint.TYPE));
assertThat(fields.get(1).fieldType(), sameInstance(LatLonDocValuesField.TYPE));
assertThat(fields, hasSize(1));
assertThat(fields.get(0).fieldType(), sameInstance(GeoPointFieldMapper.LatLonPointWithDocValues.TYPE));
}

public void testDynamicTemplatesNotFound() throws Exception {
Expand Down Expand Up @@ -3003,7 +2993,7 @@ public void testSubobjectsFalseDocsWithInnerObjectMappedAsFieldThatCanParseNativ
assertNotNull(location);
assertNull(parsedDocument.rootDoc().getField("metrics.service.location.lat"));
assertNull(parsedDocument.rootDoc().getField("metrics.service.location.lon"));
assertTrue(location instanceof LatLonPoint);
assertTrue(location instanceof GeoPointFieldMapper.LatLonPointWithDocValues);
Mapper locationMapper = mapper.mappers().getMapper("metrics.service.location");
assertNotNull(locationMapper);
assertTrue(locationMapper instanceof GeoPointFieldMapper);
Expand Down Expand Up @@ -3108,7 +3098,10 @@ public void testSubobjectsFalseDocsWithInnerObjectThatCanBeParsedNatively() thro
"""));
assertNull(parsedDocument.rootDoc().getField("metrics.service.location.lat"));
assertNull(parsedDocument.rootDoc().getField("metrics.service.location.lon"));
assertThat(parsedDocument.rootDoc().getField("metrics.service.location"), instanceOf(LatLonPoint.class));
assertThat(
parsedDocument.rootDoc().getField("metrics.service.location"),
instanceOf(GeoPointFieldMapper.LatLonPointWithDocValues.class)
);
assertThat(mapper.mappers().getMapper("metrics.service.location"), instanceOf(GeoPointFieldMapper.class));
}

Expand Down Expand Up @@ -3222,7 +3215,7 @@ public void testSubobjectsFalseDocsWithGeoPointFromDynamicTemplate() throws Exce
}
"""));

assertThat(parsedDocument.rootDoc().getField("location"), instanceOf(LatLonPoint.class));
assertThat(parsedDocument.rootDoc().getField("location"), instanceOf(GeoPointFieldMapper.LatLonPointWithDocValues.class));
RootObjectMapper root = parsedDocument.dynamicMappingsUpdate().getRoot();
assertEquals(1, root.mappers.size());
assertThat(root.getMapper("location"), instanceOf(GeoPointFieldMapper.class));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@ public void testTemplateWithoutMatchPredicates() throws Exception {
XContentMeteringParserDecorator.NOOP
)
);
assertThat(doc.rootDoc().getFields("foo"), hasSize(2));
assertThat(doc.rootDoc().getFields("foo"), hasSize(1));
assertThat(doc.rootDoc().getFields("bar"), hasSize(1));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.cluster.metadata.IndexMetadata;
Expand Down Expand Up @@ -194,16 +193,16 @@ public void testMetricAndMultiValues() throws Exception {
new Object[] { new Double[] { pointA.getX(), pointA.getY() }, new Double[] { pointB.getX(), pointB.getY() } },
new Object[] { pointA.getY() + "," + pointA.getX(), pointB.getY() + "," + pointB.getX() },
new Object[] { GeoJson.toMap(pointA), GeoJson.toMap(pointB) } };
IndexableField expectedPointA = new LatLonPoint("field", pointA.getY(), pointA.getX());
IndexableField expectedPointB = new LatLonPoint("field", pointB.getY(), pointB.getX());
IndexableField expectedPointA = new GeoPointFieldMapper.LatLonPointWithDocValues("field", pointA.getY(), pointA.getX());
IndexableField expectedPointB = new GeoPointFieldMapper.LatLonPointWithDocValues("field", pointB.getY(), pointB.getX());

// Verify that metric and non-metric mappers behave the same on single valued fields
for (Object[] values : data) {
for (DocumentMapper mapper : new DocumentMapper[] { nonMetricMapper, metricMapper }) {
ParsedDocument doc = mapper.parse(source(b -> b.field("field", values[0])));
assertThat(doc.rootDoc().getField("field"), notNullValue());
IndexableField field = doc.rootDoc().getField("field");
assertThat(field, instanceOf(LatLonPoint.class));
assertThat(field, instanceOf(GeoPointFieldMapper.LatLonPointWithDocValues.class));
assertThat(field.toString(), equalTo(expectedPointA.toString()));
}
}
Expand All @@ -214,15 +213,11 @@ public void testMetricAndMultiValues() throws Exception {
{
ParsedDocument doc = nonMetricMapper.parse(source(b -> b.field("field", values)));
assertThat(doc.rootDoc().getField("field"), notNullValue());
Object[] fields = doc.rootDoc()
.getFields()
.stream()
.filter(f -> f.name().equals("field") && f.fieldType().docValuesType() == DocValuesType.NONE)
.toArray();
Object[] fields = doc.rootDoc().getFields().stream().filter(f -> f.name().equals("field")).toArray();
assertThat(fields.length, equalTo(2));
assertThat(fields[0], instanceOf(LatLonPoint.class));
assertThat(fields[0], instanceOf(GeoPointFieldMapper.LatLonPointWithDocValues.class));
assertThat(fields[0].toString(), equalTo(expectedPointA.toString()));
assertThat(fields[1], instanceOf(LatLonPoint.class));
assertThat(fields[1], instanceOf(GeoPointFieldMapper.LatLonPointWithDocValues.class));
assertThat(fields[1].toString(), equalTo(expectedPointB.toString()));
}
// Metric mapper rejects multi-valued data
Expand Down Expand Up @@ -328,7 +323,7 @@ public void testLonLatArrayDynamic() throws Exception {
public void testLonLatArrayStored() throws Exception {
DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "geo_point").field("store", true)));
ParsedDocument doc = mapper.parse(source(b -> b.startArray("field").value(1.3).value(1.2).endArray()));
assertThat(doc.rootDoc().getFields("field"), hasSize(3));
assertThat(doc.rootDoc().getFields("field"), hasSize(2));
}

public void testLonLatArrayArrayStored() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,9 @@ protected GeoPointFieldScript.Factory multipleValuesScript() {

@Override
protected void assertMultipleValues(List<IndexableField> fields) {
assertEquals(4, fields.size());
assertEquals("LatLonPoint <field:-1.000000024214387,0.9999999403953552>", fields.get(0).toString());
assertEquals("LatLonDocValuesField <field:-1.000000024214387,0.9999999403953552>", fields.get(1).toString());
assertEquals("LatLonPoint <field:-2.000000006519258,1.9999999646097422>", fields.get(2).toString());
assertEquals("LatLonDocValuesField <field:-2.000000006519258,1.9999999646097422>", fields.get(3).toString());
assertEquals(2, fields.size());
assertEquals("LatLonPointWithDocValues <field:-1.000000024214387,0.9999999403953552>", fields.get(0).toString());
assertEquals("LatLonPointWithDocValues <field:-2.000000006519258,1.9999999646097422>", fields.get(1).toString());
}

@Override
Expand Down
Loading

0 comments on commit fb5d364

Please sign in to comment.