Skip to content

Commit

Permalink
Fixed a bug that does not get a space type from index setting when it…
Browse files Browse the repository at this point in the history
… is empty for compatibility.

Signed-off-by: Dooyong Kim <[email protected]>
  • Loading branch information
Dooyong Kim committed Dec 5, 2024
1 parent 9276c77 commit f189275
Show file tree
Hide file tree
Showing 11 changed files with 394 additions and 68 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Introduced a writing layer in native engines where relies on the writing interface to process IO. (#2241)[https://github.com/opensearch-project/k-NN/pull/2241]
### Bug Fixes
* Fixing the bug when a segment has no vector field present for disk based vector search (#2282)[https://github.com/opensearch-project/k-NN/pull/2282]
* Fixing it to retrieve space_type from index setting when both method and top level don't have the value. [#2309](https://github.com/opensearch-project/k-NN/pull/2309)
### Infrastructure
* Updated C++ version in JNI from c++11 to c++17 [#2259](https://github.com/opensearch-project/k-NN/pull/2259)
* Upgrade bytebuddy and objenesis version to match OpenSearch core and, update github ci runner for macos [#2279](https://github.com/opensearch-project/k-NN/pull/2279)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@

import org.apache.logging.log4j.util.Strings;
import org.opensearch.index.mapper.MapperParsingException;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.common.settings.Settings;

import java.util.Locale;

Expand All @@ -25,24 +27,30 @@ public final class SpaceTypeResolver {
private SpaceTypeResolver() {}

/**
* Resolves space type from configuration details. It is guaranteed not to return either null or
* {@link SpaceType#UNDEFINED}
* Resolves space type from configuration details. It is guaranteed not to return null.
* When space is not in either method and top level, UNDEFINED will be returned.
* This can happen when it is defined at index level which is deprecated and no longer allowed in the future.
* In this case, UNDEFINED will be returned.
*
* @param knnMethodContext Method context
* @param vectorDataType Vectordatatype
* @param knnMethodContext Method context
* @param topLevelSpaceTypeString Alternative top-level space type
* @return {@link SpaceType} for the method
*/
public SpaceType resolveSpaceType(
final KNNMethodContext knnMethodContext,
final VectorDataType vectorDataType,
final String topLevelSpaceTypeString
final String topLevelSpaceTypeString,
final Settings indexSettings,
final VectorDataType vectorDataType
) {
SpaceType methodSpaceType = getSpaceTypeFromMethodContext(knnMethodContext);
SpaceType topLevelSpaceType = getSpaceTypeFromString(topLevelSpaceTypeString);

if (isSpaceTypeConfigured(methodSpaceType) == false && isSpaceTypeConfigured(topLevelSpaceType) == false) {
return getSpaceTypeFromVectorDataType(vectorDataType);
final String spaceType = indexSettings.get(KNNSettings.INDEX_KNN_SPACE_TYPE.getKey());
if (spaceType == null) {
return getDefaultSpaceType(vectorDataType);
}
return SpaceType.getSpace(spaceType);
}

if (isSpaceTypeConfigured(methodSpaceType) == false) {
Expand All @@ -67,6 +75,13 @@ public SpaceType resolveSpaceType(
);
}

public static SpaceType getDefaultSpaceType(final VectorDataType vectorDataType) {
if (vectorDataType == VectorDataType.BINARY) {
return SpaceType.DEFAULT_BINARY;
}
return SpaceType.DEFAULT;
}

private SpaceType getSpaceTypeFromMethodContext(final KNNMethodContext knnMethodContext) {
if (knnMethodContext == null) {
return SpaceType.UNDEFINED;
Expand All @@ -75,13 +90,6 @@ private SpaceType getSpaceTypeFromMethodContext(final KNNMethodContext knnMethod
return knnMethodContext.getSpaceType();
}

private SpaceType getSpaceTypeFromVectorDataType(final VectorDataType vectorDataType) {
if (vectorDataType == VectorDataType.BINARY) {
return SpaceType.DEFAULT_BINARY;
}
return SpaceType.DEFAULT;
}

private SpaceType getSpaceTypeFromString(final String spaceType) {
if (Strings.isEmpty(spaceType)) {
return SpaceType.UNDEFINED;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -387,13 +387,20 @@ public Mapper.Builder<?> parse(String name, Map<String, Object> node, ParserCont
// If the original knnMethodContext is not null, resolve its space type and engine from the rest of the
// configuration. This is consistent with the existing behavior for space type in 2.16 where we modify the
// parsed value
SpaceType resolvedSpaceType = SpaceTypeResolver.INSTANCE.resolveSpaceType(
final SpaceType resolvedSpaceType = SpaceTypeResolver.INSTANCE.resolveSpaceType(
builder.originalParameters.getKnnMethodContext(),
builder.vectorDataType.get(),
builder.topLevelSpaceType.get()
builder.topLevelSpaceType.get(),
parserContext.getSettings(),
builder.vectorDataType.get()
);

// Set space type to the original knnMethodContext. Since the resolved one can be UNDEFINED,
// we must wrap it and pick up the default when it is UNDEFINED.
setSpaceType(builder.originalParameters.getKnnMethodContext(), resolvedSpaceType);
validateSpaceType(builder);

// Resolve method component. For the legacy case where space type can be configured at index level,
// it first tries to use the given one then tries to get it from index setting when the space type is UNDEFINED.
resolveKNNMethodComponents(builder, parserContext, resolvedSpaceType);
validateFromKNNMethod(builder);
}
Expand Down Expand Up @@ -484,11 +491,7 @@ private void validateCompressionAndModeNotSet(KNNVectorFieldMapper.Builder build
}
}

private void resolveKNNMethodComponents(
KNNVectorFieldMapper.Builder builder,
ParserContext parserContext,
SpaceType resolvedSpaceType
) {
private void resolveKNNMethodComponents(Builder builder, ParserContext parserContext, SpaceType resolvedSpaceType) {
// Setup the initial configuration that is used to help resolve parameters.
builder.setKnnMethodConfigContext(
KNNMethodConfigContext.builder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ static boolean useLuceneKNNVectorsFormat(final Version indexCreatedVersion) {
return indexCreatedVersion.onOrAfter(Version.V_2_17_0);
}

private static SpaceType getSpaceType(final Settings indexSettings) {
public static SpaceType getSpaceType(final Settings indexSettings) {
String spaceType = indexSettings.get(KNNSettings.INDEX_KNN_SPACE_TYPE.getKey());
if (spaceType == null) {
spaceType = KNNSettings.INDEX_KNN_DEFAULT_SPACE_TYPE;
Expand Down Expand Up @@ -196,15 +196,11 @@ private static int getEfConstruction(Settings indexSettings, Version indexVersio
static KNNMethodContext createKNNMethodContextFromLegacy(
Settings indexSettings,
Version indexCreatedVersion,
SpaceType topLevelSpaceType
SpaceType resolvedSpaceType
) {
// If top level spaceType is set then use that spaceType otherwise default to spaceType from index-settings
final SpaceType finalSpaceToSet = topLevelSpaceType != SpaceType.UNDEFINED
? topLevelSpaceType
: KNNVectorFieldMapperUtil.getSpaceType(indexSettings);
return new KNNMethodContext(
KNNEngine.NMSLIB,
finalSpaceToSet,
resolvedSpaceType,
new MethodComponentContext(
METHOD_HNSW,
Map.of(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ && ensureSpaceTypeNotSet(topLevelSpaceType)) {
vectorDataType = VectorDataType.DEFAULT;
}

if ((knnMethodContext == null || knnMethodContext.getSpaceType() == SpaceType.UNDEFINED)
&& topLevelSpaceType == SpaceType.UNDEFINED) {
topLevelSpaceType = SpaceTypeResolver.getDefaultSpaceType(vectorDataType);
}

ensureIfSetThenEquals(
MODE_PARAMETER,
mode,
Expand All @@ -163,8 +168,9 @@ && ensureSpaceTypeNotSet(topLevelSpaceType)) {
);
SpaceType resolvedSpaceType = SpaceTypeResolver.INSTANCE.resolveSpaceType(
knnMethodContext,
vectorDataType,
topLevelSpaceType.getValue()
topLevelSpaceType.getValue(),
null,
vectorDataType
);
setSpaceType(knnMethodContext, resolvedSpaceType);
TrainingModelRequest trainingModelRequest = new TrainingModelRequest(
Expand Down
72 changes: 69 additions & 3 deletions src/test/java/org/opensearch/knn/index/KNNESSettingsTestIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,31 @@

package org.opensearch.knn.index;

import org.opensearch.knn.KNNRestTestCase;
import org.opensearch.knn.index.query.KNNQueryBuilder;
import org.opensearch.knn.plugin.stats.StatNames;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.opensearch.client.Response;
import org.opensearch.client.ResponseException;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.core.rest.RestStatus;
import org.opensearch.knn.KNNRestTestCase;
import org.opensearch.knn.KNNResult;
import org.opensearch.knn.index.query.KNNQueryBuilder;
import org.opensearch.knn.plugin.stats.StatNames;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static org.hamcrest.Matchers.containsString;
import static org.opensearch.knn.TestUtils.KNN_VECTOR;
import static org.opensearch.knn.TestUtils.PROPERTIES;
import static org.opensearch.knn.TestUtils.VECTOR_TYPE;
import static org.opensearch.knn.common.KNNConstants.DIMENSION;

public class KNNESSettingsTestIT extends KNNRestTestCase {

Expand Down Expand Up @@ -149,4 +160,59 @@ public void testCacheRebuiltAfterUpdateIndexSettings() throws Exception {

assertEquals(0, indicesInCache.size());
}

public void testKNNLegacySpaceTypeIndexingTest() throws IOException, ParseException {
// Configure space_type at index level. This is deprecated and will be removed in the future.
final Settings indexSettings = Settings.builder()
.put("index.knn", true)
.put("knn.algo_param.ef_search", 100)
.put("knn.space_type", SpaceType.INNER_PRODUCT.getValue())
.build();

// This mapping does not have method.
final String testField = "knn_field";
final String testIndex = "knn_index";
final String mapping = XContentFactory.jsonBuilder()
.startObject()
.startObject(PROPERTIES)
.startObject(testField)
.field(VECTOR_TYPE, KNN_VECTOR)
.field(DIMENSION, 2)
.endObject()
.endObject()
.endObject()
.toString();

createKnnIndex(testIndex, indexSettings, mapping);

// Ingest data.
float[] queryVector = new float[] { 2, 3 };
final int k = 2;

float[][] vectorData = new float[5][2];
vectorData[0] = new float[] { 11.7f, 2.7f }; // distance=31.5
vectorData[1] = new float[] { 20.9f, 3.9f }; // distance=53.5 <- answer
vectorData[2] = new float[] { 3.77f, 4.22f }; // distance=20.2
vectorData[3] = new float[] { 15, 6 }; // distance=48 <- answer
vectorData[4] = new float[] { 4.7f, 5.9f }; // distance=27.1

bulkAddKnnDocs(testIndex, testField, vectorData, vectorData.length);
flushIndex(testIndex);

// Send a query and verify scores are correct.
Response searchResponse = searchKNNIndex(
testIndex,
KNNQueryBuilder.builder().k(k).fieldName(testField).vector(queryVector).build(),
k
);

List<KNNResult> results = parseSearchResponse(EntityUtils.toString(searchResponse.getEntity()), testField);

assertEquals(k, results.size());
Set<String> docIds = new HashSet<>();
for (KNNResult result : results) {
docIds.add(result.getDocId());
}
assertEquals(new HashSet<>(Arrays.asList("1", "3")), docIds);
}
}
1 change: 1 addition & 0 deletions src/test/java/org/opensearch/knn/index/OpenSearchIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ public void testVectorMappingValidation_invalidDimension() {
createKnnIndexMapping(FIELD_NAME, KNNEngine.getMaxDimensionByEngine(KNNEngine.DEFAULT) + 1)
)
);

assertThat(
ex.getMessage(),
containsString(
Expand Down
Loading

0 comments on commit f189275

Please sign in to comment.