From 1c649ef13020a6dae64a7ee9e970cd019ff0c4c6 Mon Sep 17 00:00:00 2001 From: Ali Safaya Date: Tue, 17 Dec 2024 01:06:53 +0300 Subject: [PATCH 1/4] Fix IndexIVFFastScan reconstruct_from_offset method --- faiss/IndexIVFFastScan.cpp | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/faiss/IndexIVFFastScan.cpp b/faiss/IndexIVFFastScan.cpp index f95ad354a7..d5406a53c6 100644 --- a/faiss/IndexIVFFastScan.cpp +++ b/faiss/IndexIVFFastScan.cpp @@ -1355,22 +1355,16 @@ void IndexIVFFastScan::reconstruct_from_offset( // unpack codes InvertedLists::ScopedCodes list_codes(invlists, list_no); std::vector code(code_size, 0); - BitstringWriter bsw(code.data(), code_size); + encode_listno(list_no, code.data()); + BitstringWriter bsw(code.data() + coarse_code_size(), code_size); + for (size_t m = 0; m < M; m++) { uint8_t c = pq4_get_packed_element(list_codes.get(), bbs, M2, offset, m); bsw.write(c, nbits); } - sa_decode(1, code.data(), recons); - // add centroid to it - if (by_residual) { - std::vector centroid(d); - quantizer->reconstruct(list_no, centroid.data()); - for (int i = 0; i < d; ++i) { - recons[i] += centroid[i]; - } - } + sa_decode(1, code.data(), recons); } void IndexIVFFastScan::reconstruct_orig_invlists() { From f6ae3de2c33b9f5e7ecdebc323ba6714eeb3fae6 Mon Sep 17 00:00:00 2001 From: Ali Safaya Date: Tue, 17 Dec 2024 14:59:31 +0300 Subject: [PATCH 2/4] Update IndexIVFFastScan.cpp: fix the size of the buffer vector --- faiss/IndexIVFFastScan.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/faiss/IndexIVFFastScan.cpp b/faiss/IndexIVFFastScan.cpp index d5406a53c6..126eb65085 100644 --- a/faiss/IndexIVFFastScan.cpp +++ b/faiss/IndexIVFFastScan.cpp @@ -1353,10 +1353,11 @@ void IndexIVFFastScan::reconstruct_from_offset( int64_t offset, float* recons) const { // unpack codes - InvertedLists::ScopedCodes list_codes(invlists, list_no); - std::vector code(code_size, 0); + size_t coarse_size = coarse_code_size(); + std::vector code(coarse_size + code_size, 0); encode_listno(list_no, code.data()); - BitstringWriter bsw(code.data() + coarse_code_size(), code_size); + InvertedLists::ScopedCodes list_codes(invlists, list_no); + BitstringWriter bsw(code.data() + coarse_size, code_size); for (size_t m = 0; m < M; m++) { uint8_t c = From 178db8167425feba7ac389c6c1bae6ac1a3937d2 Mon Sep 17 00:00:00 2001 From: Ali Safaya Date: Tue, 7 Jan 2025 16:46:51 +0300 Subject: [PATCH 3/4] added unit tests --- faiss/IndexIVFFastScan.cpp | 3 ++- faiss/IndexIVFPQFastScan.cpp | 1 + tests/test_fast_scan_ivf.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/faiss/IndexIVFFastScan.cpp b/faiss/IndexIVFFastScan.cpp index 126eb65085..2b6d7abc19 100644 --- a/faiss/IndexIVFFastScan.cpp +++ b/faiss/IndexIVFFastScan.cpp @@ -1372,10 +1372,11 @@ void IndexIVFFastScan::reconstruct_orig_invlists() { FAISS_THROW_IF_NOT(orig_invlists != nullptr); FAISS_THROW_IF_NOT(orig_invlists->list_size(0) == 0); +#pragma omp parallel for if (nlist > 100) for (size_t list_no = 0; list_no < nlist; list_no++) { InvertedLists::ScopedCodes codes(invlists, list_no); InvertedLists::ScopedIds ids(invlists, list_no); - size_t list_size = orig_invlists->list_size(list_no); + size_t list_size = invlists->list_size(list_no); std::vector code(code_size, 0); for (size_t offset = 0; offset < list_size; offset++) { diff --git a/faiss/IndexIVFPQFastScan.cpp b/faiss/IndexIVFPQFastScan.cpp index 9d1cdfcae3..c1fd206ee2 100644 --- a/faiss/IndexIVFPQFastScan.cpp +++ b/faiss/IndexIVFPQFastScan.cpp @@ -76,6 +76,7 @@ IndexIVFPQFastScan::IndexIVFPQFastScan(const IndexIVFPQ& orig, int bbs) precomputed_table.nbytes()); } +#pragma omp parallel for if (nlist > 100) for (size_t i = 0; i < nlist; i++) { size_t nb = orig.invlists->list_size(i); size_t nb2 = roundup(nb, bbs); diff --git a/tests/test_fast_scan_ivf.py b/tests/test_fast_scan_ivf.py index 55de784ad6..c7450bd7eb 100644 --- a/tests/test_fast_scan_ivf.py +++ b/tests/test_fast_scan_ivf.py @@ -8,6 +8,7 @@ import unittest import tempfile +import faiss.invlists import numpy as np import faiss @@ -543,6 +544,37 @@ def test_by_residual_odd_dim(self): self.do_test(by_residual=True, d=30) +class TestReconstruct(unittest.TestCase): + + def do_test(self, by_residual=False): + d = 32 + metric = faiss.METRIC_L2 + + ds = datasets.SyntheticDataset(d, 2000, 5000, 200) + + index = faiss.IndexIVFPQFastScan(faiss.IndexFlatL2(d), d, 50, d // 2, 4, metric) + index.by_residual = by_residual + index.make_direct_map(True) + index.train(ds.get_train()) + index.add(ds.get_database()) + + # Test reconstruction + index.reconstruct(123) # single id + index.reconstruct_n(123, 10) # single id + index.reconstruct_batch(np.arange(10)) + + # Test original list reconstruction + index.orig_invlists = faiss.ArrayInvertedLists(index.nlist, index.code_size) + index.reconstruct_orig_invlists() + assert index.orig_invlists.compute_ntotal() == index.ntotal + + def test_no_residual(self): + self.do_test(by_residual=False) + + def test_by_residual(self): + self.do_test(by_residual=True) + + class TestIsTrained(unittest.TestCase): def test_issue_2019(self): From 03e06dc47eeff9d9fe621a787f1a641f47b050e7 Mon Sep 17 00:00:00 2001 From: Ali Safaya Date: Wed, 8 Jan 2025 02:28:19 +0300 Subject: [PATCH 4/4] fixed invalid import --- tests/test_fast_scan_ivf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_fast_scan_ivf.py b/tests/test_fast_scan_ivf.py index c7450bd7eb..63327e14c0 100644 --- a/tests/test_fast_scan_ivf.py +++ b/tests/test_fast_scan_ivf.py @@ -8,7 +8,6 @@ import unittest import tempfile -import faiss.invlists import numpy as np import faiss