Skip to content

Commit

Permalink
Fixed a bug that does not get a space type from index setting when it…
Browse files Browse the repository at this point in the history
… is empty for compatibility.

Signed-off-by: Dooyong Kim <[email protected]>
  • Loading branch information
Dooyong Kim committed Dec 5, 2024
1 parent 9276c77 commit 62c2a36
Show file tree
Hide file tree
Showing 11 changed files with 226 additions and 81 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Introduced a writing layer in native engines where relies on the writing interface to process IO. (#2241)[https://github.com/opensearch-project/k-NN/pull/2241]
### Bug Fixes
* Fixing the bug when a segment has no vector field present for disk based vector search (#2282)[https://github.com/opensearch-project/k-NN/pull/2282]
* Fixing it to retrieve space_type from index setting when both method and top level don't have the value. [#2309](https://github.com/opensearch-project/k-NN/pull/2309)
### Infrastructure
* Updated C++ version in JNI from c++11 to c++17 [#2259](https://github.com/opensearch-project/k-NN/pull/2259)
* Upgrade bytebuddy and objenesis version to match OpenSearch core and, update github ci runner for macos [#2279](https://github.com/opensearch-project/k-NN/pull/2279)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,21 @@ public final class SpaceTypeResolver {
private SpaceTypeResolver() {}

/**
* Resolves space type from configuration details. It is guaranteed not to return either null or
* {@link SpaceType#UNDEFINED}
* Resolves space type from configuration details. It is guaranteed not to return null.
* When space is not in either method and top level, UNDEFINED will be returned.
* This can happen when it is defined at index level which is deprecated and no longer allowed in the future.
* In this case, UNDEFINED will be returned.
*
* @param knnMethodContext Method context
* @param vectorDataType Vectordatatype
* @param knnMethodContext Method context
* @param topLevelSpaceTypeString Alternative top-level space type
* @return {@link SpaceType} for the method
*/
public SpaceType resolveSpaceType(
final KNNMethodContext knnMethodContext,
final VectorDataType vectorDataType,
final String topLevelSpaceTypeString
) {
public SpaceType resolveSpaceType(final KNNMethodContext knnMethodContext, final String topLevelSpaceTypeString) {
SpaceType methodSpaceType = getSpaceTypeFromMethodContext(knnMethodContext);
SpaceType topLevelSpaceType = getSpaceTypeFromString(topLevelSpaceTypeString);

if (isSpaceTypeConfigured(methodSpaceType) == false && isSpaceTypeConfigured(topLevelSpaceType) == false) {
return getSpaceTypeFromVectorDataType(vectorDataType);
return SpaceType.UNDEFINED;
}

if (isSpaceTypeConfigured(methodSpaceType) == false) {
Expand Down Expand Up @@ -75,13 +72,6 @@ private SpaceType getSpaceTypeFromMethodContext(final KNNMethodContext knnMethod
return knnMethodContext.getSpaceType();
}

private SpaceType getSpaceTypeFromVectorDataType(final VectorDataType vectorDataType) {
if (vectorDataType == VectorDataType.BINARY) {
return SpaceType.DEFAULT_BINARY;
}
return SpaceType.DEFAULT;
}

private SpaceType getSpaceTypeFromString(final String spaceType) {
if (Strings.isEmpty(spaceType)) {
return SpaceType.UNDEFINED;
Expand All @@ -93,4 +83,23 @@ private SpaceType getSpaceTypeFromString(final String spaceType) {
private boolean isSpaceTypeConfigured(final SpaceType spaceType) {
return spaceType != null && spaceType != SpaceType.UNDEFINED;
}

/**
* If undefined space type was given, it returns default space type depending on vector data type.
* Binary default space type will be returned for binary vector data type, otherwise it returns default space type.
*
* @param spaceType Vector metric space type.
* @param vectorDataType Vector data type.
* @return Will return default space type when it is undefined depending on the given vector data type.
*/
public SpaceType pickDefaultSpaceTypeWhenUndefined(SpaceType spaceType, VectorDataType vectorDataType) {
if (spaceType != SpaceType.UNDEFINED) {
return spaceType;
} else {
if (vectorDataType == VectorDataType.BINARY) {
return SpaceType.DEFAULT_BINARY;
}
return SpaceType.DEFAULT;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -387,14 +387,22 @@ public Mapper.Builder<?> parse(String name, Map<String, Object> node, ParserCont
// If the original knnMethodContext is not null, resolve its space type and engine from the rest of the
// configuration. This is consistent with the existing behavior for space type in 2.16 where we modify the
// parsed value
SpaceType resolvedSpaceType = SpaceTypeResolver.INSTANCE.resolveSpaceType(
final SpaceType resolvedSpaceType = SpaceTypeResolver.INSTANCE.resolveSpaceType(
builder.originalParameters.getKnnMethodContext(),
builder.vectorDataType.get(),
builder.topLevelSpaceType.get()
);
setSpaceType(builder.originalParameters.getKnnMethodContext(), resolvedSpaceType);

// Set space type to the original knnMethodContext. Since the resolved one can be UNDEFINED,
// we must wrap it and pick up the default when it is UNDEFINED.
setSpaceType(
builder.originalParameters.getKnnMethodContext(),
SpaceTypeResolver.INSTANCE.pickDefaultSpaceTypeWhenUndefined(resolvedSpaceType, builder.vectorDataType.get())
);
validateSpaceType(builder);
resolveKNNMethodComponents(builder, parserContext, resolvedSpaceType);

// Resolve method component. For the legacy case where space type can be configured at index level,
// it first tries to use the given one then tries to get it from index setting when the space type is UNDEFINED.
resolveKNNMethodComponents(builder, parserContext, resolvedSpaceType, builder.vectorDataType.get());
validateFromKNNMethod(builder);
}

Expand Down Expand Up @@ -485,9 +493,10 @@ private void validateCompressionAndModeNotSet(KNNVectorFieldMapper.Builder build
}

private void resolveKNNMethodComponents(
KNNVectorFieldMapper.Builder builder,
Builder builder,
ParserContext parserContext,
SpaceType resolvedSpaceType
SpaceType resolvedSpaceType,
VectorDataType vectorDataType
) {
// Setup the initial configuration that is used to help resolve parameters.
builder.setKnnMethodConfigContext(
Expand All @@ -500,7 +509,7 @@ private void resolveKNNMethodComponents(
.build()
);

if (useKNNMethodContextFromLegacy(builder, parserContext)) {
if (useKNNMethodContextFromLegacy(builder)) {
// Then create KNNMethodContext to be used from the legacy index settings
builder.originalParameters.setResolvedKnnMethodContext(
createKNNMethodContextFromLegacy(parserContext.getSettings(), parserContext.indexVersionCreated(), resolvedSpaceType)
Expand All @@ -520,7 +529,7 @@ private void resolveKNNMethodComponents(
builder.originalParameters.getResolvedKnnMethodContext(),
builder.knnMethodConfigContext,
false,
resolvedSpaceType
SpaceTypeResolver.INSTANCE.pickDefaultSpaceTypeWhenUndefined(resolvedSpaceType, vectorDataType)
);

// The original parameters stores both the resolveMethodContext as well as the original provided by the
Expand Down Expand Up @@ -549,10 +558,8 @@ private void setEngine(final KNNMethodContext knnMethodContext, KNNEngine knnEng
}
}

static boolean useKNNMethodContextFromLegacy(Builder builder, Mapper.TypeParser.ParserContext parserContext) {
// If the original parameters are from legacy, and it is created on or before 2_17_2 since default is changed to
// FAISS starting 2_18, which doesn't support accepting algo params from index settings
return parserContext.indexVersionCreated().onOrBefore(Version.V_2_17_2) && builder.originalParameters.isLegacyMapping();
static boolean useKNNMethodContextFromLegacy(Builder builder) {
return builder.originalParameters.isLegacyMapping();
}

// We store the version of the index with the mapper as different version of Opensearch has different default
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,12 +196,10 @@ private static int getEfConstruction(Settings indexSettings, Version indexVersio
static KNNMethodContext createKNNMethodContextFromLegacy(
Settings indexSettings,
Version indexCreatedVersion,
SpaceType topLevelSpaceType
SpaceType resolvedSpaceType
) {
// If top level spaceType is set then use that spaceType otherwise default to spaceType from index-settings
final SpaceType finalSpaceToSet = topLevelSpaceType != SpaceType.UNDEFINED
? topLevelSpaceType
: KNNVectorFieldMapperUtil.getSpaceType(indexSettings);
// If resolved space type is set then use it otherwise get it from index-settings.
final SpaceType finalSpaceToSet = resolvedSpaceType != SpaceType.UNDEFINED ? resolvedSpaceType : getSpaceType(indexSettings);
return new KNNMethodContext(
KNNEngine.NMSLIB,
finalSpaceToSet,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,8 @@ && ensureSpaceTypeNotSet(topLevelSpaceType)) {
vectorDataType,
VectorDataType.FLOAT.getValue()
);
SpaceType resolvedSpaceType = SpaceTypeResolver.INSTANCE.resolveSpaceType(
knnMethodContext,
vectorDataType,
topLevelSpaceType.getValue()
);
SpaceType resolvedSpaceType = SpaceTypeResolver.INSTANCE.resolveSpaceType(knnMethodContext, topLevelSpaceType.getValue());
resolvedSpaceType = SpaceTypeResolver.INSTANCE.pickDefaultSpaceTypeWhenUndefined(resolvedSpaceType, vectorDataType);
setSpaceType(knnMethodContext, resolvedSpaceType);
TrainingModelRequest trainingModelRequest = new TrainingModelRequest(
modelId,
Expand Down
74 changes: 71 additions & 3 deletions src/test/java/org/opensearch/knn/index/KNNESSettingsTestIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,31 @@

package org.opensearch.knn.index;

import org.opensearch.knn.KNNRestTestCase;
import org.opensearch.knn.index.query.KNNQueryBuilder;
import org.opensearch.knn.plugin.stats.StatNames;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.opensearch.client.Response;
import org.opensearch.client.ResponseException;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.core.rest.RestStatus;
import org.opensearch.knn.KNNRestTestCase;
import org.opensearch.knn.KNNResult;
import org.opensearch.knn.index.query.KNNQueryBuilder;
import org.opensearch.knn.plugin.stats.StatNames;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static org.hamcrest.Matchers.containsString;
import static org.opensearch.knn.TestUtils.KNN_VECTOR;
import static org.opensearch.knn.TestUtils.PROPERTIES;
import static org.opensearch.knn.TestUtils.VECTOR_TYPE;
import static org.opensearch.knn.common.KNNConstants.DIMENSION;

public class KNNESSettingsTestIT extends KNNRestTestCase {

Expand Down Expand Up @@ -149,4 +160,61 @@ public void testCacheRebuiltAfterUpdateIndexSettings() throws Exception {

assertEquals(0, indicesInCache.size());
}

public void testKNNLegacySpaceTypeIndexingTest() throws IOException, ParseException {
// Configure space_type at index level. This is deprecated and will be removed in the future.
final Settings indexSettings = Settings.builder()
.put("index.knn", true)
.put("knn.algo_param.ef_search", 100)
.put("knn.space_type", SpaceType.COSINESIMIL.getValue())
.build();

// This mapping does not have method.
final String testField = "knn_field";
final String testIndex = "knn_index";
final String mapping = XContentFactory.jsonBuilder()
.startObject()
.startObject(PROPERTIES)
.startObject(testField)
.field(VECTOR_TYPE, KNN_VECTOR)
.field(DIMENSION, 2)
.endObject()
.endObject()
.endObject()
.toString();

createKnnIndex(testIndex, indexSettings, mapping);

// Ingest data.
float[] queryVector = new float[] { 2, 3 };
final int k = 2;

float[][] vectorData = new float[5][2];
vectorData[0] = new float[] { 11.7f, 2.7f }; // distance=0.7275
vectorData[1] = new float[] { 20.9f, 3.9f }; // distance=0.6979
vectorData[2] = new float[] { 3.77f, 4.22f }; // distance=0.99, answer
vectorData[3] = new float[] { 15, 6 }; // distance=0.824
vectorData[4] = new float[] { 4.7f, 5.9f }; // distance=0.9964, answer

bulkAddKnnDocs(testIndex, testField, vectorData, vectorData.length);
flushIndex(testIndex);

// Send a query and verify scores are correct.
Response searchResponse = searchKNNIndex(
testIndex,
KNNQueryBuilder.builder().k(k).fieldName(testField).vector(queryVector).build(),
k
);

List<KNNResult> results = parseSearchResponse(EntityUtils.toString(searchResponse.getEntity()), testField);

assertEquals(k, results.size());
Set<String> docIds = new HashSet<>();
for (KNNResult result : results) {
assertTrue(result.getScore() >= 0.99);
docIds.add(result.getDocId());
}

assertEquals(new HashSet<>(Arrays.asList("2", "4")), docIds);
}
}
1 change: 1 addition & 0 deletions src/test/java/org/opensearch/knn/index/OpenSearchIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ public void testVectorMappingValidation_invalidDimension() {
createKnnIndexMapping(FIELD_NAME, KNNEngine.getMaxDimensionByEngine(KNNEngine.DEFAULT) + 1)
)
);

assertThat(
ex.getMessage(),
containsString(
Expand Down
Loading

0 comments on commit 62c2a36

Please sign in to comment.