Skip to content

Commit

Permalink
Added the fix for NPE happening during merges when all vector fields …
Browse files Browse the repository at this point in the history
…docs are deleted in the segments getting merged (opensearch-project#2365)

Signed-off-by: Navneet Verma <[email protected]>
  • Loading branch information
navneet1v authored Jan 6, 2025
1 parent 84cfa8e commit 9fb7a5a
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 48 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Use one formula to calculate cosine similarity (#2357)[https://github.com/opensearch-project/k-NN/pull/2357]
### Bug Fixes
* Fixing the bug when a segment has no vector field present for disk based vector search (#2282)[https://github.com/opensearch-project/k-NN/pull/2282]
* Fix for NPE while merging segments after all the vector fields docs are deleted (#2365)[https://github.com/opensearch-project/k-NN/pull/2365]
* Allow validation for non knn index only after 2.17.0 (#2315)[https://github.com/opensearch-project/k-NN/pull/2315]
* Release query vector memory after execution (#2346)[https://github.com/opensearch-project/k-NN/pull/2346]
* Fix shard level rescoring disabled setting flag (#2352)[https://github.com/opensearch-project/k-NN/pull/2352]
Expand Down
15 changes: 0 additions & 15 deletions src/main/java/org/opensearch/knn/common/KNNVectorUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@

import lombok.AccessLevel;
import lombok.NoArgsConstructor;
import org.opensearch.knn.index.vectorvalues.KNNVectorValues;

import java.io.IOException;
import java.util.List;
import java.util.Objects;

Expand Down Expand Up @@ -62,17 +60,4 @@ public static int[] intListToArray(final List<Integer> integerList) {
}
return intArray;
}

/**
* Iterates vector values once if it is not at start of the location,
* Intended to be done to make sure dimension and bytesPerVector are available
* @param vectorValues
* @throws IOException
*/
public static void iterateVectorValuesOnce(final KNNVectorValues<?> vectorValues) throws IOException {
if (vectorValues.docId() == -1) {
vectorValues.nextDoc();
vectorValues.getVector();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.opensearch.knn.common.KNNConstants.MODEL_ID;
import static org.opensearch.knn.common.KNNVectorUtil.intListToArray;
import static org.opensearch.knn.common.KNNVectorUtil.iterateVectorValuesOnce;
import static org.opensearch.knn.index.codec.util.KNNCodecUtil.initializeVectorValues;
import static org.opensearch.knn.index.codec.transfer.OffHeapVectorTransferFactory.getVectorTransfer;

/**
Expand Down Expand Up @@ -52,7 +52,7 @@ public static DefaultIndexBuildStrategy getInstance() {
public void buildAndWriteIndex(final BuildIndexParams indexInfo) throws IOException {
final KNNVectorValues<?> knnVectorValues = indexInfo.getVectorValues();
// Needed to make sure we don't get 0 dimensions while initializing index
iterateVectorValuesOnce(knnVectorValues);
initializeVectorValues(knnVectorValues);
IndexBuildSetup indexBuildSetup = QuantizationIndexUtils.prepareIndexBuild(knnVectorValues, indexInfo);

try (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.opensearch.knn.common.KNNVectorUtil.intListToArray;
import static org.opensearch.knn.common.KNNVectorUtil.iterateVectorValuesOnce;
import static org.opensearch.knn.index.codec.util.KNNCodecUtil.initializeVectorValues;
import static org.opensearch.knn.index.codec.transfer.OffHeapVectorTransferFactory.getVectorTransfer;

/**
Expand Down Expand Up @@ -53,7 +53,7 @@ public static MemOptimizedNativeIndexBuildStrategy getInstance() {
public void buildAndWriteIndex(final BuildIndexParams indexInfo) throws IOException {
final KNNVectorValues<?> knnVectorValues = indexInfo.getVectorValues();
// Needed to make sure we don't get 0 dimensions while initializing index
iterateVectorValuesOnce(knnVectorValues);
initializeVectorValues(knnVectorValues);
KNNEngine engine = indexInfo.getKnnEngine();
Map<String, Object> indexParameters = indexInfo.getParameters();
IndexBuildSetup indexBuildSetup = QuantizationIndexUtils.prepareIndexBuild(knnVectorValues, indexInfo);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
import static org.opensearch.knn.common.FieldInfoExtractor.extractVectorDataType;
import static org.opensearch.knn.common.KNNConstants.MODEL_ID;
import static org.opensearch.knn.common.KNNConstants.PARAMETERS;
import static org.opensearch.knn.common.KNNVectorUtil.iterateVectorValuesOnce;
import static org.opensearch.knn.index.codec.util.KNNCodecUtil.initializeVectorValues;
import static org.opensearch.knn.index.codec.util.KNNCodecUtil.buildEngineFileName;
import static org.opensearch.knn.index.engine.faiss.Faiss.FAISS_BINARY_INDEX_DESCRIPTION_PREFIX;

Expand Down Expand Up @@ -100,7 +100,7 @@ public static NativeIndexWriter getWriter(
* @throws IOException
*/
public void flushIndex(final KNNVectorValues<?> knnVectorValues, int totalLiveDocs) throws IOException {
iterateVectorValuesOnce(knnVectorValues);
initializeVectorValues(knnVectorValues);
buildAndWriteIndex(knnVectorValues, totalLiveDocs);
recordRefreshStats();
}
Expand All @@ -111,7 +111,7 @@ public void flushIndex(final KNNVectorValues<?> knnVectorValues, int totalLiveDo
* @throws IOException
*/
public void mergeIndex(final KNNVectorValues<?> knnVectorValues, int totalLiveDocs) throws IOException {
iterateVectorValuesOnce(knnVectorValues);
initializeVectorValues(knnVectorValues);
if (knnVectorValues.docId() == NO_MORE_DOCS) {
// This is in place so we do not add metrics
log.debug("Skipping mergeIndex, vector values are already iterated for {}", fieldInfo.name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.search.DocIdSetIterator;
import org.opensearch.knn.common.FieldInfoExtractor;
import org.opensearch.knn.common.KNNConstants;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.knn.index.codec.KNN80Codec.KNN80BinaryDocValues;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.knn.index.vectorvalues.KNNVectorValues;

import java.io.IOException;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -116,6 +119,31 @@ public static String getNativeEngineFileFromFieldInfo(FieldInfo field, SegmentIn
}
}

/**
* Positions the vectorValuesIterator to the first vector document ID if not already positioned there.
* This initialization is crucial for setting up vector dimensions and other properties in VectorValues.
* <p>
* If the VectorValues contains no vector documents, the iterator will be positioned at
* {@link DocIdSetIterator#NO_MORE_DOCS}
*
* @param vectorValues {@link KNNVectorValues}
* @throws IOException if there is an error while accessing the vector values
*/
public static void initializeVectorValues(final KNNVectorValues<?> vectorValues) throws IOException {
// The docId will be set to -1 if next doc has never been called yet. If it has already been called,
// no need to advance the vector values
if (vectorValues.docId() != -1) {
return;
}
// Ensure that we are not getting the next vector if there are no more docs
vectorValues.nextDoc();
if (vectorValues.docId() == DocIdSetIterator.NO_MORE_DOCS) {
// Ensure that we are not getting the vector if there are no more docs
return;
}
vectorValues.getVector();
}

/**
* Get KNNEngine From FieldInfo
*
Expand Down
26 changes: 0 additions & 26 deletions src/test/java/org/opensearch/knn/common/KNNVectorUtilTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,10 @@

package org.opensearch.knn.common;

import lombok.SneakyThrows;
import org.opensearch.knn.KNNTestCase;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.knn.index.vectorvalues.KNNVectorValues;
import org.opensearch.knn.index.vectorvalues.KNNVectorValuesFactory;
import org.opensearch.knn.index.vectorvalues.TestVectorValues;

import java.util.List;

import static org.opensearch.knn.common.KNNVectorUtil.iterateVectorValuesOnce;

public class KNNVectorUtilTests extends KNNTestCase {
public void testByteZeroVector() {
assertTrue(KNNVectorUtil.isZeroVector(new byte[] { 0, 0, 0 }));
Expand All @@ -38,23 +31,4 @@ public void testIntListToArray() {
assertNull(KNNVectorUtil.intListToArray(List.of()));
assertNull(KNNVectorUtil.intListToArray(null));
}

@SneakyThrows
public void testInit() {
// Give
final List<float[]> floatArray = List.of(new float[] { 1, 2 }, new float[] { 2, 3 });
final int dimension = floatArray.get(0).length;
final TestVectorValues.PreDefinedFloatVectorValues randomVectorValues = new TestVectorValues.PreDefinedFloatVectorValues(
floatArray
);
final KNNVectorValues<float[]> knnVectorValues = KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, randomVectorValues);

// When
iterateVectorValuesOnce(knnVectorValues);

// Then
assertNotEquals(-1, knnVectorValues.docId());
assertArrayEquals(floatArray.get(0), knnVectorValues.getVector(), 0.001f);
assertEquals(dimension, knnVectorValues.dimension());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,24 @@
package org.opensearch.knn.index.codec.util;

import junit.framework.TestCase;
import lombok.SneakyThrows;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.search.DocIdSetIterator;
import org.junit.Assert;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.knn.index.vectorvalues.KNNVectorValues;
import org.opensearch.knn.index.vectorvalues.KNNVectorValuesFactory;
import org.opensearch.knn.index.vectorvalues.TestVectorValues;

import java.util.Collections;
import java.util.List;
import java.util.Set;

import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import static org.opensearch.knn.index.codec.util.KNNCodecUtil.calculateArraySize;
import static org.opensearch.knn.index.codec.util.KNNCodecUtil.initializeVectorValues;

public class KNNCodecUtilTests extends TestCase {

Expand Down Expand Up @@ -46,4 +54,38 @@ public void testGetKNNEngines() {
assertEquals(engineFiles.size(), 2);
assertTrue(engineFiles.get(0).equals("_0_2011_target_field.faissc"));
}

@SneakyThrows
public void testInitializeVectorValues_whenValidVectorValues_thenSuccess() {
// Give
final List<float[]> floatArray = List.of(new float[] { 1, 2 }, new float[] { 2, 3 });
final int dimension = floatArray.get(0).length;
final TestVectorValues.PreDefinedFloatVectorValues randomVectorValues = new TestVectorValues.PreDefinedFloatVectorValues(
floatArray
);
final KNNVectorValues<float[]> knnVectorValues = KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, randomVectorValues);

// When
initializeVectorValues(knnVectorValues);

// Then
Assert.assertNotEquals(-1, knnVectorValues.docId());
Assert.assertArrayEquals(floatArray.get(0), knnVectorValues.getVector(), 0.001f);
assertEquals(dimension, knnVectorValues.dimension());
}

@SneakyThrows
public void testInitializeVectorValues_whenNoDocs_thenSuccess() {
// Give
final List<float[]> floatArray = Collections.emptyList();
final TestVectorValues.PreDefinedFloatVectorValues randomVectorValues = new TestVectorValues.PreDefinedFloatVectorValues(
floatArray
);
final KNNVectorValues<float[]> knnVectorValues = KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, randomVectorValues);

// When
initializeVectorValues(knnVectorValues);
// Then
Assert.assertEquals(DocIdSetIterator.NO_MORE_DOCS, knnVectorValues.docId());
}
}

0 comments on commit 9fb7a5a

Please sign in to comment.