Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed a bug that does not get a space type from index setting when it is empty for compatibility. #2309

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Introduced a writing layer in native engines where relies on the writing interface to process IO. (#2241)[https://github.com/opensearch-project/k-NN/pull/2241]
### Bug Fixes
* Fixing the bug when a segment has no vector field present for disk based vector search (#2282)[https://github.com/opensearch-project/k-NN/pull/2282]
* Fixing it to retrieve space_type from index setting when both method and top level don't have the value. [#2309](https://github.com/opensearch-project/k-NN/pull/2309)
### Infrastructure
* Updated C++ version in JNI from c++11 to c++17 [#2259](https://github.com/opensearch-project/k-NN/pull/2259)
* Upgrade bytebuddy and objenesis version to match OpenSearch core and, update github ci runner for macos [#2279](https://github.com/opensearch-project/k-NN/pull/2279)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,14 +154,14 @@ public void testKNNIndexBinaryForceMerge() throws Exception {
}

// Custom Legacy Field Mapping
// space_type : "linf", engine : "nmslib", m : 2, ef_construction : 2
// space_type : "innerproduct", engine : "nmslib", m : 2, ef_construction : 2
public void testKNNIndexCustomLegacyFieldMapping() throws Exception {

// When the cluster is in old version, create a KNN index with custom legacy field mapping settings
// and add documents into that index
if (isRunningAgainstOldCluster()) {
Settings.Builder indexMappingSettings = createKNNIndexCustomLegacyFieldMappingIndexSettingsBuilder(
SpaceType.LINF,
SpaceType.INNER_PRODUCT,
KNN_ALGO_PARAM_M_MIN_VALUE,
KNN_ALGO_PARAM_EF_CONSTRUCTION_MIN_VALUE
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,22 +64,6 @@ public void testKNNL1ScriptScore() throws Exception {
}
}

// KNN script scoring for space_type "linf"
public void testKNNLinfScriptScore() throws Exception {
if (isRunningAgainstOldCluster()) {
createKnnIndex(testIndex, createKNNDefaultScriptScoreSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS));
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS);
} else {
QUERY_COUNT = NUM_DOCS;
DOC_ID = NUM_DOCS;
validateKNNScriptScoreSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K, SpaceType.LINF);
addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS);
QUERY_COUNT = QUERY_COUNT + NUM_DOCS;
validateKNNScriptScoreSearch(testIndex, TEST_FIELD, DIMENSIONS, QUERY_COUNT, K, SpaceType.LINF);
deleteKNNIndex(testIndex);
}
}

// KNN script scoring for space_type "innerproduct"
public void testKNNInnerProductScriptScore() throws Exception {
if (isRunningAgainstOldCluster()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,14 @@ public void testKNNWarmupDefaultLegacyFieldMapping() throws Exception {
}

// Custom Legacy Field Mapping
// space_type : "linf", engine : "nmslib", m : 2, ef_construction : 2
// space_type : "innerproduct", engine : "nmslib", m : 2, ef_construction : 2
public void testKNNWarmupCustomLegacyFieldMapping() throws Exception {

// When the cluster is in old version, create a KNN index with custom legacy field mapping settings
// and add documents into that index
if (isRunningAgainstOldCluster()) {
Settings.Builder indexMappingSettings = createKNNIndexCustomLegacyFieldMappingIndexSettingsBuilder(
SpaceType.LINF,
SpaceType.INNER_PRODUCT,
KNN_ALGO_PARAM_M_MIN_VALUE,
KNN_ALGO_PARAM_EF_CONSTRUCTION_MIN_VALUE
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@

import org.apache.logging.log4j.util.Strings;
import org.opensearch.index.mapper.MapperParsingException;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.common.settings.Settings;

import java.util.Locale;

Expand All @@ -25,24 +27,30 @@ public final class SpaceTypeResolver {
private SpaceTypeResolver() {}

/**
* Resolves space type from configuration details. It is guaranteed not to return either null or
* {@link SpaceType#UNDEFINED}
* Resolves space type from configuration details. It is guaranteed not to return null.
* When space is not in either method and top level, UNDEFINED will be returned.
* This can happen when it is defined at index level which is deprecated and no longer allowed in the future.
* In this case, UNDEFINED will be returned.
*
* @param knnMethodContext Method context
* @param vectorDataType Vectordatatype
* @param knnMethodContext Method context
* @param topLevelSpaceTypeString Alternative top-level space type
* @return {@link SpaceType} for the method
*/
public SpaceType resolveSpaceType(
final KNNMethodContext knnMethodContext,
final VectorDataType vectorDataType,
final String topLevelSpaceTypeString
final String topLevelSpaceTypeString,
final Settings indexSettings,
final VectorDataType vectorDataType
) {
SpaceType methodSpaceType = getSpaceTypeFromMethodContext(knnMethodContext);
SpaceType topLevelSpaceType = getSpaceTypeFromString(topLevelSpaceTypeString);

if (isSpaceTypeConfigured(methodSpaceType) == false && isSpaceTypeConfigured(topLevelSpaceType) == false) {
return getSpaceTypeFromVectorDataType(vectorDataType);
final String spaceType = indexSettings.get(KNNSettings.INDEX_KNN_SPACE_TYPE.getKey());
if (spaceType == null) {
return getDefaultSpaceType(vectorDataType);
}
return SpaceType.getSpace(spaceType);
}

if (isSpaceTypeConfigured(methodSpaceType) == false) {
Expand All @@ -67,6 +75,13 @@ public SpaceType resolveSpaceType(
);
}

public static SpaceType getDefaultSpaceType(final VectorDataType vectorDataType) {
if (vectorDataType == VectorDataType.BINARY) {
return SpaceType.DEFAULT_BINARY;
}
return SpaceType.DEFAULT;
}

private SpaceType getSpaceTypeFromMethodContext(final KNNMethodContext knnMethodContext) {
if (knnMethodContext == null) {
return SpaceType.UNDEFINED;
Expand All @@ -75,13 +90,6 @@ private SpaceType getSpaceTypeFromMethodContext(final KNNMethodContext knnMethod
return knnMethodContext.getSpaceType();
}

private SpaceType getSpaceTypeFromVectorDataType(final VectorDataType vectorDataType) {
if (vectorDataType == VectorDataType.BINARY) {
return SpaceType.DEFAULT_BINARY;
}
return SpaceType.DEFAULT;
}

private SpaceType getSpaceTypeFromString(final String spaceType) {
if (Strings.isEmpty(spaceType)) {
return SpaceType.UNDEFINED;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -387,13 +387,20 @@ public Mapper.Builder<?> parse(String name, Map<String, Object> node, ParserCont
// If the original knnMethodContext is not null, resolve its space type and engine from the rest of the
// configuration. This is consistent with the existing behavior for space type in 2.16 where we modify the
// parsed value
SpaceType resolvedSpaceType = SpaceTypeResolver.INSTANCE.resolveSpaceType(
final SpaceType resolvedSpaceType = SpaceTypeResolver.INSTANCE.resolveSpaceType(
builder.originalParameters.getKnnMethodContext(),
builder.vectorDataType.get(),
builder.topLevelSpaceType.get()
builder.topLevelSpaceType.get(),
parserContext.getSettings(),
builder.vectorDataType.get()
);

// Set space type to the original knnMethodContext. Since the resolved one can be UNDEFINED,
// we must wrap it and pick up the default when it is UNDEFINED.
setSpaceType(builder.originalParameters.getKnnMethodContext(), resolvedSpaceType);
validateSpaceType(builder);

// Resolve method component. For the legacy case where space type can be configured at index level,
// it first tries to use the given one then tries to get it from index setting when the space type is UNDEFINED.
resolveKNNMethodComponents(builder, parserContext, resolvedSpaceType);
validateFromKNNMethod(builder);
}
Expand Down Expand Up @@ -484,11 +491,7 @@ private void validateCompressionAndModeNotSet(KNNVectorFieldMapper.Builder build
}
}

private void resolveKNNMethodComponents(
KNNVectorFieldMapper.Builder builder,
ParserContext parserContext,
SpaceType resolvedSpaceType
) {
private void resolveKNNMethodComponents(Builder builder, ParserContext parserContext, SpaceType resolvedSpaceType) {
// Setup the initial configuration that is used to help resolve parameters.
builder.setKnnMethodConfigContext(
KNNMethodConfigContext.builder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ static boolean useLuceneKNNVectorsFormat(final Version indexCreatedVersion) {
return indexCreatedVersion.onOrAfter(Version.V_2_17_0);
}

private static SpaceType getSpaceType(final Settings indexSettings) {
public static SpaceType getSpaceType(final Settings indexSettings) {
String spaceType = indexSettings.get(KNNSettings.INDEX_KNN_SPACE_TYPE.getKey());
if (spaceType == null) {
spaceType = KNNSettings.INDEX_KNN_DEFAULT_SPACE_TYPE;
Expand Down Expand Up @@ -196,15 +196,11 @@ private static int getEfConstruction(Settings indexSettings, Version indexVersio
static KNNMethodContext createKNNMethodContextFromLegacy(
Settings indexSettings,
Version indexCreatedVersion,
SpaceType topLevelSpaceType
SpaceType resolvedSpaceType
) {
// If top level spaceType is set then use that spaceType otherwise default to spaceType from index-settings
final SpaceType finalSpaceToSet = topLevelSpaceType != SpaceType.UNDEFINED
? topLevelSpaceType
: KNNVectorFieldMapperUtil.getSpaceType(indexSettings);
return new KNNMethodContext(
KNNEngine.NMSLIB,
finalSpaceToSet,
resolvedSpaceType,
new MethodComponentContext(
METHOD_HNSW,
Map.of(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ && ensureSpaceTypeNotSet(topLevelSpaceType)) {
vectorDataType = VectorDataType.DEFAULT;
}

if ((knnMethodContext == null || knnMethodContext.getSpaceType() == SpaceType.UNDEFINED)
&& topLevelSpaceType == SpaceType.UNDEFINED) {
topLevelSpaceType = SpaceTypeResolver.getDefaultSpaceType(vectorDataType);
}

ensureIfSetThenEquals(
MODE_PARAMETER,
mode,
Expand All @@ -163,8 +168,9 @@ && ensureSpaceTypeNotSet(topLevelSpaceType)) {
);
SpaceType resolvedSpaceType = SpaceTypeResolver.INSTANCE.resolveSpaceType(
knnMethodContext,
vectorDataType,
topLevelSpaceType.getValue()
topLevelSpaceType.getValue(),
null,
vectorDataType
);
setSpaceType(knnMethodContext, resolvedSpaceType);
TrainingModelRequest trainingModelRequest = new TrainingModelRequest(
Expand Down
72 changes: 69 additions & 3 deletions src/test/java/org/opensearch/knn/index/KNNESSettingsTestIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,31 @@

package org.opensearch.knn.index;

import org.opensearch.knn.KNNRestTestCase;
import org.opensearch.knn.index.query.KNNQueryBuilder;
import org.opensearch.knn.plugin.stats.StatNames;
import org.apache.hc.core5.http.ParseException;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.opensearch.client.Response;
import org.opensearch.client.ResponseException;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.core.rest.RestStatus;
import org.opensearch.knn.KNNRestTestCase;
import org.opensearch.knn.KNNResult;
import org.opensearch.knn.index.query.KNNQueryBuilder;
import org.opensearch.knn.plugin.stats.StatNames;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static org.hamcrest.Matchers.containsString;
import static org.opensearch.knn.TestUtils.KNN_VECTOR;
import static org.opensearch.knn.TestUtils.PROPERTIES;
import static org.opensearch.knn.TestUtils.VECTOR_TYPE;
import static org.opensearch.knn.common.KNNConstants.DIMENSION;

public class KNNESSettingsTestIT extends KNNRestTestCase {

Expand Down Expand Up @@ -149,4 +160,59 @@ public void testCacheRebuiltAfterUpdateIndexSettings() throws Exception {

assertEquals(0, indicesInCache.size());
}

public void testKNNLegacySpaceTypeIndexingTest() throws IOException, ParseException {
// Configure space_type at index level. This is deprecated and will be removed in the future.
final Settings indexSettings = Settings.builder()
.put("index.knn", true)
.put("knn.algo_param.ef_search", 100)
.put("knn.space_type", SpaceType.INNER_PRODUCT.getValue())
.build();

// This mapping does not have method.
final String testField = "knn_field";
final String testIndex = "knn_index";
final String mapping = XContentFactory.jsonBuilder()
.startObject()
.startObject(PROPERTIES)
.startObject(testField)
.field(VECTOR_TYPE, KNN_VECTOR)
.field(DIMENSION, 2)
.endObject()
.endObject()
.endObject()
.toString();

createKnnIndex(testIndex, indexSettings, mapping);

// Ingest data.
float[] queryVector = new float[] { 2, 3 };
final int k = 2;

float[][] vectorData = new float[5][2];
vectorData[0] = new float[] { 11.7f, 2.7f }; // distance=31.5
vectorData[1] = new float[] { 20.9f, 3.9f }; // distance=53.5 <- answer
vectorData[2] = new float[] { 3.77f, 4.22f }; // distance=20.2
vectorData[3] = new float[] { 15, 6 }; // distance=48 <- answer
vectorData[4] = new float[] { 4.7f, 5.9f }; // distance=27.1

bulkAddKnnDocs(testIndex, testField, vectorData, vectorData.length);
flushIndex(testIndex);

// Send a query and verify scores are correct.
Response searchResponse = searchKNNIndex(
testIndex,
KNNQueryBuilder.builder().k(k).fieldName(testField).vector(queryVector).build(),
k
);

List<KNNResult> results = parseSearchResponse(EntityUtils.toString(searchResponse.getEntity()), testField);

assertEquals(k, results.size());
Set<String> docIds = new HashSet<>();
for (KNNResult result : results) {
docIds.add(result.getDocId());
}
assertEquals(new HashSet<>(Arrays.asList("1", "3")), docIds);
}
}
1 change: 1 addition & 0 deletions src/test/java/org/opensearch/knn/index/OpenSearchIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ public void testVectorMappingValidation_invalidDimension() {
createKnnIndexMapping(FIELD_NAME, KNNEngine.getMaxDimensionByEngine(KNNEngine.DEFAULT) + 1)
)
);

assertThat(
ex.getMessage(),
containsString(
Expand Down
Loading
Loading