From 61cfa302d61b8baef2c2cfeeb79c24653e1603a0 Mon Sep 17 00:00:00 2001
From: Jim Borden <jim.borden@couchbase.com>
Date: Thu, 20 Jun 2024 07:44:49 +0900
Subject: [PATCH 1/7] CBL-5860: New API for enabling extensions (#2073)

Does a sanity check on the extension as well so that it can fail fast
---
 C/Cpp_include/c4Database.hh         |  6 +++--
 C/c4.exp                            |  1 +
 C/c4CAPI.cc                         |  4 +++
 C/c4Database.cc                     |  4 +++
 C/c4_ee.exp                         |  1 +
 C/include/c4Database.h              | 15 ++++++++++--
 LiteCore/Storage/SQLiteDataFile.cc  | 38 +++++++++++++++++++++++------
 LiteCore/Storage/SQLiteDataFile.hh  |  4 ++-
 LiteCore/Support/Extension.cc       |  2 +-
 LiteCore/tests/LazyVectorAPITest.cc |  2 +-
 LiteCore/tests/VectorQueryTest.cc   |  9 +++++++
 LiteCore/tests/VectorQueryTest.hh   |  2 +-
 12 files changed, 73 insertions(+), 15 deletions(-)

diff --git a/C/Cpp_include/c4Database.hh b/C/Cpp_include/c4Database.hh
index 0e06b9833..4534e1eee 100644
--- a/C/Cpp_include/c4Database.hh
+++ b/C/Cpp_include/c4Database.hh
@@ -50,10 +50,12 @@ struct C4Database
 
     using Config = C4DatabaseConfig2;
 
-    /** Registers a directory path to load extension libraries from, such as Vector Search.
-        Must be called before opening a database that will use an extension. */
+    // Deprecated in favor of enableExtension!
     static void setExtensionPath(slice path);
 
+    /** Attempts to discover and verify the named extension in the provided path */
+    static void enableExtension(slice name, slice path);
+
     static bool exists(slice name, slice inDirectory);
     static void copyNamed(slice sourcePath, slice destinationName, const Config&);
     static bool deleteNamed(slice name, slice inDirectory);
diff --git a/C/c4.exp b/C/c4.exp
index a60ba7ae7..bd4a002f7 100644
--- a/C/c4.exp
+++ b/C/c4.exp
@@ -402,6 +402,7 @@ _c4_dumpInstances
 _gC4ExpectExceptions
 
 _c4_setExtensionPath
+_c4_enableExtension
 
 _FLDoc_FromJSON
 _FLDoc_Retain
diff --git a/C/c4CAPI.cc b/C/c4CAPI.cc
index adebbc5f2..beeb4698f 100644
--- a/C/c4CAPI.cc
+++ b/C/c4CAPI.cc
@@ -374,6 +374,10 @@ C4SliceResult c4coll_getIndexesInfo(C4Collection* coll, C4Error* C4NULLABLE outE
 
 void c4_setExtensionPath(C4String path) noexcept { C4Database::setExtensionPath(path); }
 
+bool c4_enableExtension(C4String name, C4String extensionPath, C4Error* outError) noexcept {
+    return tryCatch(outError, [=] { C4Database::enableExtension(name, extensionPath); });
+}
+
 bool c4db_exists(C4String name, C4String inDirectory) noexcept { return C4Database::exists(name, inDirectory); }
 
 bool c4key_setPassword(C4EncryptionKey* outKey, C4String password, C4EncryptionAlgorithm alg) noexcept {
diff --git a/C/c4Database.cc b/C/c4Database.cc
index 61c9c76f4..f07294e34 100644
--- a/C/c4Database.cc
+++ b/C/c4Database.cc
@@ -62,6 +62,10 @@ C4EncryptionKey C4EncryptionKeyFromPasswordSHA1(slice password, C4EncryptionAlgo
 
 void C4Database::setExtensionPath(slice path) { SQLiteDataFile::setExtensionPath(string(path)); }
 
+void C4Database::enableExtension(slice name, slice path) {
+    SQLiteDataFile::enableExtension(string(name), string(path));
+}
+
 #pragma mark - STATIC LIFECYCLE METHODS:
 
 static FilePath dbPath(slice name, slice parentDir) {
diff --git a/C/c4_ee.exp b/C/c4_ee.exp
index 87b76ca67..754bf9f28 100644
--- a/C/c4_ee.exp
+++ b/C/c4_ee.exp
@@ -443,6 +443,7 @@ _c4_dumpInstances
 _gC4ExpectExceptions
 
 _c4_setExtensionPath
+_c4_enableExtension
 
 _FLDoc_FromJSON
 _FLDoc_Retain
diff --git a/C/include/c4Database.h b/C/include/c4Database.h
index c9f4718c3..48fa8aa8b 100644
--- a/C/include/c4Database.h
+++ b/C/include/c4Database.h
@@ -45,10 +45,21 @@ NODISCARD CBL_CORE_API bool c4key_setPassword(C4EncryptionKey* encryptionKey, C4
 NODISCARD CBL_CORE_API bool c4key_setPasswordSHA1(C4EncryptionKey* encryptionKey, C4String password,
                                                   C4EncryptionAlgorithm alg) C4API;
 
-/** Registers a directory path to load extension libraries from, such as Vector Search.
-    Must be called before opening a database that will use an extension. */
+// Deprecated in favor of c4_enableExtension
 CBL_CORE_API void c4_setExtensionPath(C4String path) C4API;
 
+/** Asks LiteCore to look for and validate the presence of an extension given the name
+ * of the extension and the path in which it is supposed to reside.  It makes an attempt
+ * to only check things that have the possibility of being corrected by the user (i.e.
+ * if there is a bug in the extension and it cannot load functionally that won't be caught)
+ * @param name The name of the extension (corresponds to the lower case of the filename
+ *             without the extension)
+ * @param extensionPath The path in which the extension should be found
+ * @param outError On failure, will store the error.
+ * @return True on success, false on failure
+ */
+CBL_CORE_API bool c4_enableExtension(C4String name, C4String extensionPath, C4Error* outError) C4API;
+
 /** @} */
 
 //////// DATABASE API:
diff --git a/LiteCore/Storage/SQLiteDataFile.cc b/LiteCore/Storage/SQLiteDataFile.cc
index fe528f53a..98401f1bb 100644
--- a/LiteCore/Storage/SQLiteDataFile.cc
+++ b/LiteCore/Storage/SQLiteDataFile.cc
@@ -80,9 +80,11 @@ namespace litecore {
     // Maximum size WAL journal will be left at after a commit
     static const int64_t kJournalSize = 5 * MB;
 
+    static map<string, int> kValidExtensionVersions = {
 #ifdef COUCHBASE_ENTERPRISE
-    static constexpr int kVectorSearchCompatibleVersion = 1;
+            {"CouchbaseLiteVectorSearch", 1}
 #endif
+    };
 
     // Amount of file to memory-map
 #if TARGET_OS_OSX || TARGET_OS_SIMULATOR
@@ -185,6 +187,34 @@ namespace litecore {
 
     void SQLiteDataFile::setExtensionPath(string path) { sExtensionPath = std::move(path); }
 
+    void SQLiteDataFile::enableExtension(const string& name, string path) {
+        auto extensionEntry = kValidExtensionVersions.find(name);
+        if ( extensionEntry == kValidExtensionVersions.end() ) {
+            error::_throw(error::LiteCoreError::InvalidParameter, "'%s' is not a known extension", name.c_str());
+        }
+
+        // NOTE: This logic will need to be changed later if we have more than one extension
+        // and they reside in different directories
+        if ( !sExtensionPath.empty() && sExtensionPath != path ) {
+            WarnError("Extension path previously set to '%s' but being reset to '%s'.  This is not advisable!",
+                      sExtensionPath.c_str(), path.c_str());
+        }
+
+        sExtensionPath = std::move(path);
+
+#if defined(__ANDROID__)
+        string pluginPath = sExtensionPath + FilePath::kSeparator + "lib" + name;
+#else
+        string pluginPath = sExtensionPath + FilePath::kSeparator + name;
+#endif
+
+        if ( !extension::check_extension_version(pluginPath, extensionEntry->second) ) {
+            error::_throw(error::UnsupportedOperation,
+                          "Extension '%s' is not found or not compatible with this version of Couchbase Lite",
+                          name.c_str());
+        }
+    }
+
     SQLiteDataFile::SQLiteDataFile(const FilePath& path, DataFile::Delegate* delegate, const Options* options)
         : DataFile(path, delegate, options) {
         reopen();
@@ -217,12 +247,6 @@ namespace litecore {
         };
 
         string pluginPath = sExtensionPath + FilePath::kSeparator + extensionName;
-        if ( !litecore::extension::check_extension_version(pluginPath, kVectorSearchCompatibleVersion) ) {
-            // This function logs the reason for the version match failure, no need to log here.
-            error::_throw(error::UnsupportedOperation,
-                          "Extension '%s' is not found or not compatible with this version of Couchbase Lite",
-                          extensionName);
-        }
 
 #    if defined(_WIN32) && defined(_M_X64)
         // Flimsy hack to get around the fact that we need to load this dep from a non-standard
diff --git a/LiteCore/Storage/SQLiteDataFile.hh b/LiteCore/Storage/SQLiteDataFile.hh
index 211938da6..af160e020 100644
--- a/LiteCore/Storage/SQLiteDataFile.hh
+++ b/LiteCore/Storage/SQLiteDataFile.hh
@@ -102,9 +102,11 @@ namespace litecore {
 
         Retained<Query> compileQuery(slice expression, QueryLanguage, KeyStore*) override;
 
-        /// Sets the directory where SQLite extensions can be found (i.e. VectorSearch)
+        // Deprecated in favor of enableExtension!
         static void setExtensionPath(string);
 
+        static void enableExtension(const string& name, string path);
+
         // QueryParser::delegate:
         bool        tableExists(const std::string& tableName) const override;
         string      collectionTableName(const string& collection, DeletionStatus) const override;
diff --git a/LiteCore/Support/Extension.cc b/LiteCore/Support/Extension.cc
index 247bfe38e..6c76b9bd5 100644
--- a/LiteCore/Support/Extension.cc
+++ b/LiteCore/Support/Extension.cc
@@ -125,7 +125,7 @@ bool litecore::extension::check_extension_version(const string& extensionPath, i
     int         majorVersion = version_number_f() / 1000000;
     const char* versionStr   = version_f();
     if ( majorVersion == expectedVersion ) {
-        LogToAt(DBLog, Info, "Loaded extension '%s' version %s", extensionName.c_str(), versionStr);
+        LogToAt(DBLog, Info, "Found extension '%s' version %s", extensionName.c_str(), versionStr);
         return true;
     }
 
diff --git a/LiteCore/tests/LazyVectorAPITest.cc b/LiteCore/tests/LazyVectorAPITest.cc
index 5ae8fe7c0..cb1aa4975 100644
--- a/LiteCore/tests/LazyVectorAPITest.cc
+++ b/LiteCore/tests/LazyVectorAPITest.cc
@@ -104,7 +104,7 @@ class LazyVectorAPITest : public C4Test {
         std::call_once(sOnce, [] {
             if ( const char* path = getenv("LiteCoreExtensionPath") ) {
                 sExtensionPath = path;
-                litecore::SQLiteDataFile::setExtensionPath(sExtensionPath);
+                litecore::SQLiteDataFile::enableExtension("CouchbaseLiteVectorSearch", sExtensionPath);
                 Log("Registered LiteCore extension path %s", path);
             }
         });
diff --git a/LiteCore/tests/VectorQueryTest.cc b/LiteCore/tests/VectorQueryTest.cc
index b2a3e91d7..ad35d5204 100644
--- a/LiteCore/tests/VectorQueryTest.cc
+++ b/LiteCore/tests/VectorQueryTest.cc
@@ -20,6 +20,7 @@
 #include "Base64.hh"
 #include "c4Database.hh"
 #include "c4Collection.hh"
+#include "c4Database.h"
 
 #ifdef COUCHBASE_ENTERPRISE
 
@@ -668,6 +669,14 @@ TEST_CASE_METHOD(SIFTVectorQueryTest, "Index isTrained API", "[Query][.VectorSea
     CHECK(isTrained == expectedTrained);
 }
 
+TEST_CASE_METHOD(SIFTVectorQueryTest, "enableExtension API", "[.VectorSearch]") {
+    ExpectingExceptions e;
+    C4Error             err;
+    auto                result = c4_enableExtension("BadName"_sl, FLStr(sExtensionPath.c_str()), &err);
+    CHECK(!result);
+    CHECK(err.code == kC4ErrorInvalidParameter);
+}
+
 N_WAY_TEST_CASE_METHOD(SIFTVectorQueryTest, "Inspect Vector Index", "[Query][.VectorSearch]") {
     auto allKeyStores = db->allKeyStoreNames();
     readVectorDocs(100);
diff --git a/LiteCore/tests/VectorQueryTest.hh b/LiteCore/tests/VectorQueryTest.hh
index 12b8b7dbe..2f23b1cd5 100644
--- a/LiteCore/tests/VectorQueryTest.hh
+++ b/LiteCore/tests/VectorQueryTest.hh
@@ -20,7 +20,7 @@ class VectorQueryTest : public QueryTest {
         std::call_once(sOnce, [] {
             if ( const char* path = getenv("LiteCoreExtensionPath") ) {
                 sExtensionPath = path;
-                litecore::SQLiteDataFile::setExtensionPath(sExtensionPath);
+                litecore::SQLiteDataFile::enableExtension("CouchbaseLiteVectorSearch", sExtensionPath);
                 Log("Registered LiteCore extension path %s", path);
             }
         });

From 8c6d9e78dabff9d1e0a436f63390be4681587fa9 Mon Sep 17 00:00:00 2001
From: callumbirks <73551885+callumbirks@users.noreply.github.com>
Date: Fri, 21 Jun 2024 17:04:46 +0100
Subject: [PATCH 2/7] Added explanatory note to c4index_beginUpdate doc-comment
 [CBL-5842] (#2070) (#2074)

Co-authored-by: Jens Alfke <jens@couchbase.com>
---
 C/include/c4Index.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/C/include/c4Index.h b/C/include/c4Index.h
index f92014902..76ed20293 100644
--- a/C/include/c4Index.h
+++ b/C/include/c4Index.h
@@ -130,6 +130,12 @@ CBL_CORE_API bool c4coll_isIndexTrained(C4Collection* collection, C4String name,
     2. Call `finish` to apply the updates to the index.
     3. Release the `C4IndexUpdater`, of course.
 
+    @note The updater is not guaranteed to find all of the unindexed documents at once! It may
+        return less than the limit, even if more exist. It _is_ guaranteed to make progress,
+        by returning _some_ unindexed documents if there are any. The intention is that the app
+        will continue updating the index periodically until this call returns NULL, signaling
+        that the index is now up-to-date.
+
     @param index  The index to update; must be a vector index with the lazy attribute.
     @param limit  The maximum number of out-of-date documents to include.
     @param outError  On failure, will be set to the error status.

From c1f302ec5c124aafd7ad42145c116ba48df65ebd Mon Sep 17 00:00:00 2001
From: Jens Alfke <jens@couchbase.com>
Date: Mon, 17 Jun 2024 16:17:25 -0700
Subject: [PATCH 3/7] Use same vector-index options struct as vectorsearch repo
 (#2058)

* Use same vector-index options struct as vectorsearch repo

Replaced IndexSpec::VectorOptions with vectorsearch::IndexSpec.

* Reimplemented SQLiteKeyStore::isIndexTrained()

using a query on the vector index that checks whether any vectors
have a bucket not equal to -1.

As part of this I had to add the `const` attribute to
`KeyStore::getIndex()`.

* c4Collection::getIndexesInfo includes vector options

c4Collection::getIndexesInfo now includes the vector index options,
under the key `vector_options`, encoded in a string in the same
format as the virtual-table args in the CREATE INDEX statement.

* C4Index API update (added getOptions!)

- Added c4Index::getOptions, which returns the complete vector or
  FTS options used to create the index.
- Also added c4Index methods getType, getQueryLanguage, getExpression,
  isTrained.
- Added the equivalent C functions.
- Moved index related functions from c4Collection.h to c4Index.h.
- In c4Index.h, moved the older database-based functions like
  c4db_createIndex to the bottom, bracketed by
  `#ifndef C4_STRICT_COLLECTION_API`.
---
 C/Cpp_include/c4Index.hh                     |  13 +-
 C/c4.exp                                     |   5 +
 C/c4CAPI.cc                                  |  18 +-
 C/c4Index.cc                                 |  93 +++++-
 C/c4_ee.exp                                  |   7 +
 C/include/c4Collection.h                     |  54 +--
 C/include/c4Index.h                          | 218 ++++++------
 C/include/c4IndexTypes.h                     |   4 +-
 C/scripts/c4.txt                             |   5 +
 C/scripts/c4_ee.txt                          |   2 +
 C/tests/c4DatabaseTest.cc                    |   2 +-
 CMakeLists.txt                               |   1 +
 LiteCore/Database/CollectionImpl.hh          |  64 ++--
 LiteCore/Query/IndexSpec.cc                  |   7 +-
 LiteCore/Query/IndexSpec.hh                  |  52 +--
 LiteCore/Query/SQLiteDataFile+Indexes.cc     |  28 --
 LiteCore/Query/SQLiteKeyStore+Indexes.cc     |   2 +-
 LiteCore/Query/SQLiteKeyStore+VectorIndex.cc | 153 +++------
 LiteCore/Storage/BothKeyStore.hh             |   2 +-
 LiteCore/Storage/KeyStore.hh                 |   2 +-
 LiteCore/Storage/SQLiteKeyStore.hh           |   2 +-
 LiteCore/tests/CMakeLists.txt                |   1 +
 LiteCore/tests/LazyVectorQueryTest.cc        |   6 +-
 LiteCore/tests/PredictiveVectorQueryTest.cc  |   4 +-
 LiteCore/tests/VectorQueryTest.cc            |   9 +-
 Replicator/tests/ReplicatorSGTest.cc         |   1 +
 Xcode/LiteCore.xcodeproj/project.pbxproj     |  18 +-
 cmake/platform_base.cmake                    |   1 +
 vendor/vector_search/README.md               |   9 +
 vendor/vector_search/VectorIndexSpec.cc      | 332 +++++++++++++++++++
 vendor/vector_search/VectorIndexSpec.hh      | 190 +++++++++++
 31 files changed, 920 insertions(+), 385 deletions(-)
 create mode 100644 vendor/vector_search/README.md
 create mode 100644 vendor/vector_search/VectorIndexSpec.cc
 create mode 100644 vendor/vector_search/VectorIndexSpec.hh

diff --git a/C/Cpp_include/c4Index.hh b/C/Cpp_include/c4Index.hh
index 278b2fcbb..8091cbe81 100644
--- a/C/Cpp_include/c4Index.hh
+++ b/C/Cpp_include/c4Index.hh
@@ -33,20 +33,31 @@ struct C4Index
 
     slice getName() const noexcept { return _name; }
 
+    C4IndexType     getType() const noexcept;
+    C4QueryLanguage getQueryLanguage() const noexcept;
+    slice           getExpression() const noexcept;
+
+    /// Writes the index options to `opts` and returns true. If there are none, returns false.
+    [[nodiscard]] bool getOptions(C4IndexOptions& opts) const noexcept;
+
 #ifdef COUCHBASE_ENTERPRISE
+    bool isTrained() const;
+
     /// Finds new or updated documents for which vectors need to be recomputed by the application.
     /// If there are none, returns NULL.
     /// @param limit  The maximum number of documents/vectors to return. If this is less than
     ///               the total number, the rest will be returned on the next call to `beginUpdate`.
     /// @warning  Do not call `beginUpdate` again until you're done with the returned updater;
     ///           it's not valid to have more than one update in progress at a time.
-    Retained<struct C4IndexUpdater> beginUpdate(size_t limit);
+    Retained<C4IndexUpdater> beginUpdate(size_t limit);
 #endif
 
   protected:
     friend class litecore::CollectionImpl;
     static Retained<C4Index> getIndex(C4Collection*, slice name);
 
+    C4Index(C4Collection* coll, std::string name) : _collection(coll), _name(std::move(name)) {}
+
     Retained<C4Collection> _collection;
     std::string            _name;
 };
diff --git a/C/c4.exp b/C/c4.exp
index bd4a002f7..23cd233b4 100644
--- a/C/c4.exp
+++ b/C/c4.exp
@@ -404,6 +404,11 @@ _gC4ExpectExceptions
 _c4_setExtensionPath
 _c4_enableExtension
 
+_c4index_getType
+_c4index_getQueryLanguage
+_c4index_getExpression
+_c4index_getOptions
+
 _FLDoc_FromJSON
 _FLDoc_Retain
 _FLDoc_GetAllocedData
diff --git a/C/c4CAPI.cc b/C/c4CAPI.cc
index beeb4698f..782d10164 100644
--- a/C/c4CAPI.cc
+++ b/C/c4CAPI.cc
@@ -557,7 +557,7 @@ bool c4db_createIndex2(C4Database* database, C4Slice name, C4Slice indexSpec, C4
 }
 
 bool c4coll_isIndexTrained(C4Collection* collection, C4Slice name, C4Error* outError) noexcept {
-    memset(outError, 0, sizeof(C4Error));
+    if ( outError ) *outError = kC4NoError;
     return tryCatch(outError, [=] { return collection->isIndexTrained(name); });
 }
 
@@ -905,6 +905,22 @@ C4Document* c4enum_getDocument(C4DocEnumerator* e, C4Error* outError) noexcept {
     });
 }
 
+#pragma mark - INDEXES:
+
+C4IndexType c4index_getType(C4Index* index) C4API { return index->getType(); }
+
+C4QueryLanguage c4index_getQueryLanguage(C4Index* index) C4API { return index->getQueryLanguage(); }
+
+C4String c4index_getExpression(C4Index* index) C4API { return index->getExpression(); }
+
+bool c4index_getOptions(C4Index* index, C4IndexOptions* outOpts) C4API { return index->getOptions(*outOpts); }
+
+#ifdef COUCHBASE_ENTERPRISE
+bool c4index_isTrained(C4Index* index, C4Error* C4NULLABLE outError) C4API {
+    return c4coll_isIndexTrained(index->getCollection(), index->getName(), outError);
+}
+#endif
+
 #pragma mark - OBSERVERS:
 
 C4DatabaseObserver* c4dbobs_createOnCollection(C4Collection* coll, C4CollectionObserverCallback callback,
diff --git a/C/c4Index.cc b/C/c4Index.cc
index cefb08c4f..b2e9dd913 100644
--- a/C/c4Index.cc
+++ b/C/c4Index.cc
@@ -28,9 +28,70 @@ using namespace fleece;
 using namespace litecore;
 
 struct C4IndexImpl final : public C4Index {
-    C4IndexImpl(C4Collection* c, slice name) : _spec(asInternal(c)->keyStore().getIndex(name)) {
-        _collection = c;
-        _name       = name;
+    C4IndexImpl(C4Collection* c, IndexSpec spec) : C4Index(c, spec.name), _spec(std::move(spec)) {}
+
+    C4IndexType getType() const noexcept { return C4IndexType(_spec.type); }
+
+    C4QueryLanguage getQueryLanguage() const noexcept { return C4QueryLanguage(_spec.queryLanguage); }
+
+    slice getExpression() const noexcept { return _spec.expression; }
+
+    bool getOptions(C4IndexOptions& opts) const noexcept {
+        opts = {};
+        if ( auto ftsOpts = _spec.ftsOptions() ) {
+            opts.language         = ftsOpts->language;
+            opts.ignoreDiacritics = ftsOpts->ignoreDiacritics;
+            opts.disableStemming  = ftsOpts->disableStemming;
+            opts.stopWords        = ftsOpts->stopWords;
+            return true;
+
+#ifdef COUCHBASE_ENTERPRISE
+        } else if ( auto vecOpts = _spec.vectorOptions() ) {
+            opts.vector.dimensions      = vecOpts->dimensions;
+            opts.vector.metric          = C4VectorMetricType(int(vecOpts->metric) + 1);
+            opts.vector.clustering.type = C4VectorClusteringType(vecOpts->clusteringType());
+            switch ( vecOpts->clusteringType() ) {
+                case vectorsearch::ClusteringType::Flat:
+                    {
+                        auto flat = std::get<vectorsearch::FlatClustering>(vecOpts->clustering);
+                        opts.vector.clustering.flat_centroids = flat.numCentroids;
+                        break;
+                    }
+                case vectorsearch::ClusteringType::MultiIndex:
+                    {
+                        auto multi = std::get<vectorsearch::MultiIndexClustering>(vecOpts->clustering);
+                        opts.vector.clustering.multi_bits          = multi.bitsPerSub;
+                        opts.vector.clustering.multi_subquantizers = multi.subquantizers;
+                        break;
+                    }
+            }
+            opts.vector.encoding.type = C4VectorEncodingType(vecOpts->encodingType());
+            switch ( vecOpts->encodingType() ) {
+                case vectorsearch::EncodingType::None:
+                    break;
+                case vectorsearch::EncodingType::PQ:
+                    {
+                        auto pq                               = std::get<vectorsearch::PQEncoding>(vecOpts->encoding);
+                        opts.vector.encoding.pq_subquantizers = pq.subquantizers;
+                        opts.vector.encoding.bits             = pq.bitsPerSub;
+                        break;
+                    }
+                case vectorsearch::EncodingType::SQ:
+                    {
+                        auto sq                   = std::get<vectorsearch::SQEncoding>(vecOpts->encoding);
+                        opts.vector.encoding.bits = sq.bitsPerDimension;
+                        break;
+                    }
+            }
+            if ( vecOpts->probeCount ) opts.vector.numProbes = *vecOpts->probeCount;
+            if ( vecOpts->minTrainingCount ) opts.vector.minTrainingSize = unsigned(*vecOpts->minTrainingCount);
+            if ( vecOpts->maxTrainingCount ) opts.vector.maxTrainingSize = unsigned(*vecOpts->maxTrainingCount);
+            opts.vector.lazy = vecOpts->lazyEmbedding;
+            return true;
+#endif
+        } else {
+            return false;
+        }
     }
 
 #ifdef COUCHBASE_ENTERPRISE
@@ -43,21 +104,35 @@ struct C4IndexImpl final : public C4Index {
     }
 #endif
 
-    optional<IndexSpec>           _spec;
+    IndexSpec                     _spec;
     Retained<litecore::LazyIndex> _lazy;
 };
 
-inline C4IndexImpl* asInternal(C4Index* index) { return static_cast<C4IndexImpl*>(index); }
+inline C4IndexImpl* asInternal(C4Index* i) { return static_cast<C4IndexImpl*>(i); }
 
-Retained<C4Index> C4Index::getIndex(C4Collection* c, slice name) {
-    Retained<C4IndexImpl> index = new C4IndexImpl(c, name);
-    if ( !index->_spec ) index = nullptr;
-    return index;
+inline C4IndexImpl const* asInternal(C4Index const* i) { return static_cast<C4IndexImpl const*>(i); }
+
+/*static*/ Retained<C4Index> C4Index::getIndex(C4Collection* c, slice name) {
+    if ( optional<IndexSpec> spec = asInternal(c)->keyStore().getIndex(name) ) {
+        return new C4IndexImpl(c, *std::move(spec));
+    } else {
+        return nullptr;
+    }
 }
 
+C4IndexType C4Index::getType() const noexcept { return asInternal(this)->getType(); }
+
+C4QueryLanguage C4Index::getQueryLanguage() const noexcept { return asInternal(this)->getQueryLanguage(); }
+
+slice C4Index::getExpression() const noexcept { return asInternal(this)->getExpression(); }
+
+bool C4Index::getOptions(C4IndexOptions& opts) const noexcept { return asInternal(this)->getOptions(opts); }
+
 
 #ifdef COUCHBASE_ENTERPRISE
 
+bool C4Index::isTrained() const { return _collection->isIndexTrained(_name); }
+
 Retained<C4IndexUpdater> C4Index::beginUpdate(size_t limit) { return asInternal(this)->beginUpdate(limit); }
 
 C4IndexUpdater::C4IndexUpdater(Retained<litecore::LazyIndexUpdate> u, C4Collection* c)
diff --git a/C/c4_ee.exp b/C/c4_ee.exp
index 754bf9f28..10cf320e9 100644
--- a/C/c4_ee.exp
+++ b/C/c4_ee.exp
@@ -445,6 +445,11 @@ _gC4ExpectExceptions
 _c4_setExtensionPath
 _c4_enableExtension
 
+_c4index_getType
+_c4index_getQueryLanguage
+_c4index_getExpression
+_c4index_getOptions
+
 _FLDoc_FromJSON
 _FLDoc_Retain
 _FLDoc_GetAllocedData
@@ -481,7 +486,9 @@ _c4keypair_privateKeyData
 _c4keypair_publicKeyData
 _c4keypair_publicKeyDigest
 
+_c4index_isTrained
 _c4index_beginUpdate
+
 _c4indexupdater_count
 _c4indexupdater_valueAt
 _c4indexupdater_setVectorAt
diff --git a/C/include/c4Collection.h b/C/include/c4Collection.h
index c10f432b2..1f29364da 100644
--- a/C/include/c4Collection.h
+++ b/C/include/c4Collection.h
@@ -13,7 +13,7 @@
 #pragma once
 #include "c4DatabaseTypes.h"
 #include "c4DocumentTypes.h"
-#include "c4IndexTypes.h"
+#include "fleece/Fleece.h"
 
 C4_ASSUME_NONNULL_BEGIN
 C4API_BEGIN_DECLS
@@ -26,6 +26,11 @@ C4API_BEGIN_DECLS
     Observer-related functions are in c4Observer.h:
     - c4dbobs_createOnCollection
     - c4docobs_createWithCollection
+    Index-related functions are in c4Index.h:
+    - c4coll_createIndex
+    - c4coll_deleteIndex
+    - c4coll_getIndex
+    - c4coll_getIndexesInfo
 */
 
 
@@ -263,53 +268,6 @@ CBL_CORE_API C4Timestamp c4coll_nextDocExpiration(C4Collection*) C4API;
 NODISCARD CBL_CORE_API int64_t c4coll_purgeExpiredDocs(C4Collection*, C4Error* C4NULLABLE) C4API;
 
 
-/** @} */
-/** \name Indexes
-    @{ */
-
-
-/** Creates a collection index, of the values of specific expressions across all documents.
-    The name is used to identify the index for later updating or deletion; if an index with the
-    same name already exists, it will be replaced unless it has the exact same expressions.
-
-    The `indexSpec` argument is an expression, relative to a document, that describes what to index.
-    It can be in either the JSON query schema, or in N1QL syntax. It usually names a property,
-    but may also be a computed value based on properties.
-
-    @param collection  The collection to index.
-    @param name  The name of the index. Any existing index with the same name will be replaced,
-                 unless it has the identical expressions (in which case this is a no-op.)
-    @param indexSpec  The definition of the index in JSON or N1QL form. (See above.)
-    @param queryLanguage  The language of `indexSpec`, either JSON or N1QL.
-    @param indexType  The type of index (value full-text, etc.)
-    @param indexOptions  Options for the index. If NULL, each option will get a default value.
-    @param outError  On failure, will be set to the error status.
-    @return  True on success, false on failure. */
-NODISCARD CBL_CORE_API bool c4coll_createIndex(C4Collection* collection, C4String name, C4String indexSpec,
-                                               C4QueryLanguage queryLanguage, C4IndexType indexType,
-                                               const C4IndexOptions* C4NULLABLE indexOptions,
-                                               C4Error* C4NULLABLE              outError) C4API;
-
-/** Returns an object representing an existing index. */
-CBL_CORE_API C4Index* C4NULLABLE c4coll_getIndex(C4Collection* collection, C4String name,
-                                                 C4Error* C4NULLABLE outError) C4API;
-
-/** Deletes an index that was created by `c4coll_createIndex`.
-    @param collection  The collection to index.
-    @param name The name of the index to delete
-    @param outError  On failure, will be set to the error status.
-    @return  True on success, false on failure. */
-NODISCARD CBL_CORE_API bool c4coll_deleteIndex(C4Collection* collection, C4String name,
-                                               C4Error* C4NULLABLE outError) C4API;
-
-/** Returns information about all indexes in the collection.
-    The result is a Fleece-encoded array of dictionaries, one per index.
-    Each dictionary has keys `"name"`, `"type"` (a `C4IndexType`), and `"expr"` (the source expression).
-    @param collection  The collection to check
-    @param outError  On failure, will be set to the error status.
-    @return  A Fleece-encoded array of dictionaries, or NULL on failure. */
-CBL_CORE_API C4SliceResult c4coll_getIndexesInfo(C4Collection* collection, C4Error* C4NULLABLE outError) C4API;
-
 /** @} */
 /** @} */  // end Collections group
 
diff --git a/C/include/c4Index.h b/C/include/c4Index.h
index 76ed20293..07e54920d 100644
--- a/C/include/c4Index.h
+++ b/C/include/c4Index.h
@@ -16,111 +16,120 @@
 C4_ASSUME_NONNULL_BEGIN
 C4API_BEGIN_DECLS
 
-/** \defgroup Indexing  Database Indexes
+//======== C4Collection Methods:
+
+/** \defgroup Indexing  Indexes
      @{ */
 
+/** Creates a collection index, of the values of specific expressions across all documents.
+    The name is used to identify the index for later updating or deletion; if an index with the
+    same name already exists, it will be replaced unless it has the exact same expressions.
+
+    Currently five types of indexes are supported:
+
+    * Value indexes speed up queries by making it possible to look up property (or expression)
+      values without scanning every document. They're just like regular indexes in SQL or N1QL.
+      Multiple expressions are supported; the first is the primary key, second is secondary.
+      Expressions must evaluate to scalar types (boolean, number, string).
+    * Full-Text Search (FTS) indexes enable fast search of natural-language words or phrases
+      by using the `MATCH` operator in a query. A FTS index is **required** for full-text
+      search: a query with a `MATCH` operator will fail to compile unless there is already a
+      FTS index for the property/expression being matched. Only a single expression is
+      currently allowed, and it must evaluate to a string.
+    * Array indexes optimize UNNEST queries, by materializing an unnested array property
+      (across all documents) as a table in the SQLite database, and creating a SQL index on it.
+    * Predictive indexes optimize queries that use the PREDICTION() function, by materializing
+      the function's results as a table and creating a SQL index on a result property.
+    * Vector indexes store high-dimensional vectors/embeddings and support efficient Approximate
+      Nearest Neighbor (ANN) queries for finding the nearest vectors to a query vector.
+
+    Note: If some documents are missing the values to be indexed,
+    those documents will just be omitted from the index. It's not an error.
+
+    In an array index, the first expression must evaluate to an array to be unnested; it's
+    usually a property path but could be some other expression type. If the array items are
+    nonscalar (dictionaries or arrays), you should add a second expression defining the sub-
+    property (or computed value) to index, relative to the array item.
+
+    In a predictive index, the expression is a PREDICTION() call in JSON query syntax,
+    including the optional 3rd parameter that gives the result property to extract (and index.)
+
+    The `indexSpec` argument is an expression, relative to a document, that describes what to index.
+    It can be in either the JSON query schema, or in N1QL syntax. It usually names a property,
+    but may also be a computed value based on properties.
+
+    @param collection  The collection to index.
+    @param name  The name of the index. Any existing index with the same name will be replaced,
+                 unless it has the identical expressions (in which case this is a no-op.)
+    @param indexSpec  The definition of the index in JSON or N1QL form. (See above.)
+    @param queryLanguage  The language of `indexSpec`, either JSON or N1QL.
+    @param indexType  The type of index (value full-text, etc.)
+    @param indexOptions  Options for the index. If NULL, each option will get a default value.
+    @param outError  On failure, will be set to the error status.
+    @return  True on success, false on failure. */
+NODISCARD CBL_CORE_API bool c4coll_createIndex(C4Collection* collection, C4String name, C4String indexSpec,
+                                               C4QueryLanguage queryLanguage, C4IndexType indexType,
+                                               const C4IndexOptions* C4NULLABLE indexOptions,
+                                               C4Error* C4NULLABLE              outError) C4API;
+
+/** Returns an object representing an existing index. */
+CBL_CORE_API C4Index* C4NULLABLE c4coll_getIndex(C4Collection* collection, C4String name,
+                                                 C4Error* C4NULLABLE outError) C4API;
+
+/** Deletes an index that was created by `c4coll_createIndex`.
+    @param collection  The collection to index.
+    @param name The name of the index to delete
+    @param outError  On failure, will be set to the error status.
+    @return  True on success, false on failure. */
+NODISCARD CBL_CORE_API bool c4coll_deleteIndex(C4Collection* collection, C4String name,
+                                               C4Error* C4NULLABLE outError) C4API;
 
-/** Creates a database index, of the values of specific expressions across all documents.
-        The name is used to identify the index for later updating or deletion; if an index with the
-        same name already exists, it will be replaced unless it has the exact same expressions.
-
-        Currently four types of indexes are supported:
-
-        * Value indexes speed up queries by making it possible to look up property (or expression)
-          values without scanning every document. They're just like regular indexes in SQL or N1QL.
-          Multiple expressions are supported; the first is the primary key, second is secondary.
-          Expressions must evaluate to scalar types (boolean, number, string).
-        * Full-Text Search (FTS) indexes enable fast search of natural-language words or phrases
-          by using the `MATCH` operator in a query. A FTS index is **required** for full-text
-          search: a query with a `MATCH` operator will fail to compile unless there is already a
-          FTS index for the property/expression being matched. Only a single expression is
-          currently allowed, and it must evaluate to a string.
-        * Array indexes optimize UNNEST queries, by materializing an unnested array property
-          (across all documents) as a table in the SQLite database, and creating a SQL index on it.
-        * Predictive indexes optimize queries that use the PREDICTION() function, by materializing
-          the function's results as a table and creating a SQL index on a result property.
-
-        Note: If some documents are missing the values to be indexed,
-        those documents will just be omitted from the index. It's not an error.
-
-        In an array index, the first expression must evaluate to an array to be unnested; it's
-        usually a property path but could be some other expression type. If the array items are
-        nonscalar (dictionaries or arrays), you should add a second expression defining the sub-
-        property (or computed value) to index, relative to the array item.
-
-        In a predictive index, the expression is a PREDICTION() call in JSON query syntax,
-        including the optional 3rd parameter that gives the result property to extract (and index.)
-
-        `indexSpecJSON` specifies the index as a JSON object, with properties:
-        * `WHAT`: An array of expressions in the JSON query syntax. (Note that each
-          expression is already an array, so there are two levels of nesting.)
-        * `WHERE`: An optional expression. Including this creates a _partial index_: documents
-          for which this expression returns `false` or `null` will be skipped.
-
-        For backwards compatibility, `indexSpecJSON` may be an array; this is treated as if it were
-        a dictionary with a `WHAT` key mapping to that array.
-
-        Expressions are defined in JSON, as in a query, and wrapped in a JSON array. For example,
-        `[[".name.first"]]` will index on the first-name property. Note the two levels of brackets,
-        since an expression is already an array.
-
-        @param database  The database to index.
-        @param name  The name of the index. Any existing index with the same name will be replaced,
-                     unless it has the identical expressions (in which case this is a no-op.)
-        @param indexSpecJSON  The definition of the index in JSON form. (See above.)
-        @param indexType  The type of index (value or full-text.)
-        @param indexOptions  Options for the index. If NULL, each option will get a default value.
-        @param outError  On failure, will be set to the error status.
-        @return  True on success, false on failure. */
-NODISCARD CBL_CORE_API bool c4db_createIndex(C4Database* database, C4String name, C4String indexSpecJSON,
-                                             C4IndexType indexType, const C4IndexOptions* C4NULLABLE indexOptions,
-                                             C4Error* C4NULLABLE outError) C4API;
+/** Returns information about all indexes in the collection.
+    The result is a Fleece-encoded array of dictionaries, one per index.
+    Each dictionary has keys `"name"`, `"type"` (a `C4IndexType`), and `"expr"` (the source expression).
+    @param collection  The collection to check
+    @param outError  On failure, will be set to the error status.
+    @return  A Fleece-encoded array of dictionaries, or NULL on failure. */
+CBL_CORE_API C4SliceResult c4coll_getIndexesInfo(C4Collection* collection, C4Error* C4NULLABLE outError) C4API;
 
-/** @param database  The database to index.
-        @param name  The name of the index. Any existing index with the same name will be replaced,
-                     unless it has the identical expressions (in which case this is a no-op.)
-        @param indexSpec  The definition of the index in JSON or N1QL form. (See above.)
-        @param queryLanguage The query language (JSON or N1QL) of `indexSpec` is expressed.
-        @param indexType  The type of index (value or full-text.)
-        @param indexOptions  Options for the index. If NULL, each option will get a default value.
-        @param outError  On failure, will be set to the error status.
-        @return  True on success, false on failure. */
-NODISCARD CBL_CORE_API bool c4db_createIndex2(C4Database* database, C4String name, C4String indexSpec,
-                                              C4QueryLanguage queryLanguage, C4IndexType indexType,
-                                              const C4IndexOptions* C4NULLABLE indexOptions,
-                                              C4Error* C4NULLABLE              outError) C4API;
+/** Returns information about all indexes in the database.
+    The result is a Fleece-encoded array of dictionaries, one per index.
+    Each dictionary has keys `"name"`, `"type"` (a `C4IndexType`), and `"expr"` (the source expression).
+    @param database  The database to check
+    @param outError  On failure, will be set to the error status.
+    @return  A Fleece-encoded array of dictionaries, or NULL on failure. */
+CBL_CORE_API C4SliceResult c4db_getIndexesInfo(C4Database* database, C4Error* C4NULLABLE outError) C4API;
 
+//======== C4Index Methods:
 
-/** Deletes an index that was created by `c4db_createIndex`.
-        @param database  The database to index.
-        @param name The name of the index to delete
-        @param outError  On failure, will be set to the error status.
-        @return  True on success, false on failure. */
-NODISCARD CBL_CORE_API bool c4db_deleteIndex(C4Database* database, C4String name, C4Error* C4NULLABLE outError) C4API;
+/** Returns the index's type. */
+CBL_CORE_API C4IndexType c4index_getType(C4Index*) C4API;
 
+/** Returns the index's query language (JSON or N1QL). */
+CBL_CORE_API C4QueryLanguage c4index_getQueryLanguage(C4Index*) C4API;
 
-/** Returns information about all indexes in the database.
-        The result is a Fleece-encoded array of dictionaries, one per index.
-        Each dictionary has keys `"name"`, `"type"` (a `C4IndexType`), and `"expr"` (the source expression).
-        @param database  The database to check
-        @param outError  On failure, will be set to the error status.
-        @return  A Fleece-encoded array of dictionaries, or NULL on failure. */
-CBL_CORE_API C4SliceResult c4db_getIndexesInfo(C4Database* database, C4Error* C4NULLABLE outError) C4API;
+/** Returns the indexed expression. */
+CBL_CORE_API C4String c4index_getExpression(C4Index*) C4API;
 
-/** Returns whether or not a given vector index is trained
- *      If the index doesn't exist, or is not a vector index, then this method will
- *      return false with an appropriate error set.  Otherwise, in the absence of errors,
- *      this method will zero the error and set the return value.
- *      @param collection The collection to look up the index in
- *      @param name The name of the index to check
- *      @param outError On failure, will be set to the error status
- *      @return true if the index is trained, false if the index was not valid or is not yet trained
- */
-CBL_CORE_API bool c4coll_isIndexTrained(C4Collection* collection, C4String name, C4Error* C4NULLABLE outError) C4API;
+/** Gets the index's FTS/vector options, if any.
+    @param index  The index.
+    @param outOpts  The options will be written here, if they exist.
+    @returns  True if there are options, false if not. */
+CBL_CORE_API bool c4index_getOptions(C4Index* index, C4IndexOptions* outOpts) C4API;
 
 
 #ifdef COUCHBASE_ENTERPRISE
 
+/** Returns whether a vector index has been trained yet or not.
+    If the index doesn't exist, or is not a vector index, then this method will
+    return false with an appropriate error set.  Otherwise, in the absence of errors,
+    this method will zero the error and set the return value. */
+CBL_CORE_API bool c4index_isTrained(C4Index*, C4Error* C4NULLABLE outError) C4API;
+
+
+//======== UPDATING LAZY INDEXES:
+
+
 /** Finds new or updated documents for which vectors need to be recomputed by the application.
     If there are none, returns NULL.
     If it returns a non-NULL `C4IndexUpdater` object pointer, you should:
@@ -140,7 +149,8 @@ CBL_CORE_API bool c4coll_isIndexTrained(C4Collection* collection, C4String name,
     @param limit  The maximum number of out-of-date documents to include.
     @param outError  On failure, will be set to the error status.
     @return  A new `C4IndexUpdater` reference, or NULL if there's nothing to update. */
-CBL_CORE_API C4IndexUpdater* C4NULLABLE c4index_beginUpdate(C4Index* index, size_t limit, C4Error* outError) C4API;
+NODISCARD CBL_CORE_API C4IndexUpdater* C4NULLABLE c4index_beginUpdate(C4Index* index, size_t limit,
+                                                                      C4Error* outError) C4API;
 
 /**
  * Return the name of this index.
@@ -162,7 +172,7 @@ CBL_CORE_API size_t c4indexupdater_count(C4IndexUpdater* updater) C4API;
     @returns  A Fleece value: the value of the index's query expression evaluated on the i'th document.
               Internally this value is part of a query result. It remains valid until the index
               updater is released. If you want to keep it longer, retain it with `FLRetain`. */
-CBL_CORE_API FLValue c4indexupdater_valueAt(C4IndexUpdater* updater, size_t i) C4API;
+NODISCARD CBL_CORE_API FLValue c4indexupdater_valueAt(C4IndexUpdater* updater, size_t i) C4API;
 
 /** Sets the vector for the i'th value. If you don't call this, it's assumed there is no
     vector, and any existing vector will be removed upon `finish`.
@@ -172,8 +182,9 @@ CBL_CORE_API FLValue c4indexupdater_valueAt(C4IndexUpdater* updater, size_t i) C
     @param dimension  The dimension of `vector`; must be equal to the index's declared dimension.
     @param outError  On failure, will be set to the error status.
     @return  True on success, false on failure. */
-CBL_CORE_API bool c4indexupdater_setVectorAt(C4IndexUpdater* updater, size_t i, const float vector[C4NULLABLE],
-                                             size_t dimension, C4Error* outError) C4API;
+NODISCARD CBL_CORE_API bool c4indexupdater_setVectorAt(C4IndexUpdater* updater, size_t i,
+                                                       const float vector[C4NULLABLE], size_t dimension,
+                                                       C4Error* outError) C4API;
 
 /** Explicitly skips updating the i'th vector. No index entry will be created or deleted.
     The vector still needs to be recomputed, and will be included in the next update request.
@@ -199,6 +210,21 @@ CBL_CORE_API bool c4indexupdater_finish(C4IndexUpdater* updater, C4Error* outErr
 
 #endif
 
+
+#ifndef C4_STRICT_COLLECTION_API
+//======== SEMI-DEPRECATED DATABASE METHODS:
+NODISCARD CBL_CORE_API bool c4db_createIndex(C4Database* database, C4String name, C4String indexSpecJSON,
+                                             C4IndexType indexType, const C4IndexOptions* C4NULLABLE indexOptions,
+                                             C4Error* C4NULLABLE outError) C4API;
+NODISCARD CBL_CORE_API bool c4db_createIndex2(C4Database* database, C4String name, C4String indexSpec,
+                                              C4QueryLanguage queryLanguage, C4IndexType indexType,
+                                              const C4IndexOptions* C4NULLABLE indexOptions,
+                                              C4Error* C4NULLABLE              outError) C4API;
+NODISCARD CBL_CORE_API bool c4db_deleteIndex(C4Database* database, C4String name, C4Error* C4NULLABLE outError) C4API;
+#endif
+
+CBL_CORE_API bool c4coll_isIndexTrained(C4Collection* collection, C4String name, C4Error* C4NULLABLE outError) C4API;
+
 /** @} */
 
 C4API_END_DECLS
diff --git a/C/include/c4IndexTypes.h b/C/include/c4IndexTypes.h
index 43f1fb604..e971f7a7b 100644
--- a/C/include/c4IndexTypes.h
+++ b/C/include/c4IndexTypes.h
@@ -28,7 +28,7 @@ typedef C4_ENUM(uint32_t, C4IndexType){
         kC4ArrayIndex,       ///< Index of array values, for use with UNNEST
         kC4PredictiveIndex,  ///< Index of prediction() results (Enterprise Edition only)
         kC4VectorIndex,      ///< Index of ML vector similarity (Enterprise Edition only)
-};
+};                           // Values must match litecore::IndexSpec::Type!
 
 #ifdef COUCHBASE_ENTERPRISE
 
@@ -37,7 +37,7 @@ typedef C4_ENUM(uint32_t, C4VectorMetricType){
         kC4VectorMetricDefault,    ///< Use default metric, Euclidean
         kC4VectorMetricEuclidean,  ///< Euclidean distance (squared)
         kC4VectorMetricCosine,     ///< Cosine distance (1.0 - cosine similarity)
-};                                 // Values must match IndexSpec::VectorOptions::MetricType
+};                                 // Values DO NOT match IndexSpec::VectorOptions::MetricType!
 
 /** Types of clustering in vector indexes. There is no default type because you must fill in
     the C4VectorClustering struct with a number of centroids or subquantizers+bits. */
diff --git a/C/scripts/c4.txt b/C/scripts/c4.txt
index 95ae9dadb..1e7587c21 100644
--- a/C/scripts/c4.txt
+++ b/C/scripts/c4.txt
@@ -411,6 +411,11 @@ gC4ExpectExceptions
 
 c4_setExtensionPath
 
+c4index_getType
+c4index_getQueryLanguage
+c4index_getExpression
+c4index_getOptions
+
 FLDoc_FromJSON
 FLDoc_Retain
 FLDoc_GetAllocedData
diff --git a/C/scripts/c4_ee.txt b/C/scripts/c4_ee.txt
index 4f468eb96..7540894d1 100644
--- a/C/scripts/c4_ee.txt
+++ b/C/scripts/c4_ee.txt
@@ -57,7 +57,9 @@ c4keypair_privateKeyData
 c4keypair_publicKeyData
 c4keypair_publicKeyDigest
 
+c4index_isTrained
 c4index_beginUpdate
+
 c4indexupdater_count
 c4indexupdater_valueAt
 c4indexupdater_setVectorAt
diff --git a/C/tests/c4DatabaseTest.cc b/C/tests/c4DatabaseTest.cc
index 350e7b04f..d2248bd3a 100644
--- a/C/tests/c4DatabaseTest.cc
+++ b/C/tests/c4DatabaseTest.cc
@@ -16,7 +16,7 @@
 #include "c4Test.hh"  // IWYU pragma: keep
 #include "c4DocEnumerator.h"
 #include "c4BlobStore.h"
-#include "c4IndexTypes.h"
+#include "c4Index.h"
 #include "c4Query.h"
 #include "c4Collection.h"
 #include "Error.hh"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b17ca1c3f..9b03a01be 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -250,6 +250,7 @@ set(
     vendor/mbedtls/include
     vendor/mbedtls/crypto/include
     vendor/sockpp/include
+    vendor/vector_search
 )
 
 target_include_directories(
diff --git a/LiteCore/Database/CollectionImpl.hh b/LiteCore/Database/CollectionImpl.hh
index b6cd7e51d..0a61db54c 100644
--- a/LiteCore/Database/CollectionImpl.hh
+++ b/LiteCore/Database/CollectionImpl.hh
@@ -416,24 +416,46 @@ namespace litecore {
                     break;
                 case kC4VectorIndex:
                     if ( indexOptions ) {
-                        auto& c4Opt   = indexOptions->vector;
-                        auto& vecOpt  = options.emplace<IndexSpec::VectorOptions>(c4Opt.dimensions);
-                        vecOpt.metric = IndexSpec::VectorOptions::MetricType(c4Opt.metric);
-
-                        vecOpt.clustering.type = IndexSpec::VectorOptions::ClusteringType(c4Opt.clustering.type);
-                        vecOpt.clustering.flat_centroids      = c4Opt.clustering.flat_centroids;
-                        vecOpt.clustering.multi_subquantizers = c4Opt.clustering.multi_subquantizers;
-                        vecOpt.clustering.multi_bits          = c4Opt.clustering.multi_bits;
-
-                        vecOpt.encoding.type             = IndexSpec::VectorOptions::EncodingType(c4Opt.encoding.type);
-                        vecOpt.encoding.pq_subquantizers = c4Opt.encoding.pq_subquantizers;
-                        vecOpt.encoding.bits             = c4Opt.encoding.bits;
-
-                        vecOpt.minTrainingSize = c4Opt.minTrainingSize;
-                        vecOpt.maxTrainingSize = c4Opt.maxTrainingSize;
-                        vecOpt.numProbes       = c4Opt.numProbes;
-
-                        vecOpt.lazy = c4Opt.lazy;
+                        auto& c4Opt       = indexOptions->vector;
+                        auto& vecOpt      = options.emplace<IndexSpec::VectorOptions>();
+                        vecOpt.dimensions = c4Opt.dimensions;
+                        switch ( c4Opt.metric ) {
+                            case kC4VectorMetricEuclidean:
+                                vecOpt.metric = vectorsearch::Metric::Euclidean2;
+                            case kC4VectorMetricCosine:
+                                vecOpt.metric = vectorsearch::Metric::Cosine;
+                                break;
+                            case kC4VectorMetricDefault:
+                                break;
+                        }
+                        switch ( c4Opt.clustering.type ) {
+                            case kC4VectorClusteringFlat:
+                                vecOpt.clustering = vectorsearch::FlatClustering{c4Opt.clustering.flat_centroids};
+                                break;
+                            case kC4VectorClusteringMulti:
+                                vecOpt.clustering = vectorsearch::MultiIndexClustering{
+                                        c4Opt.clustering.multi_subquantizers, c4Opt.clustering.multi_bits};
+                                break;
+                        }
+                        switch ( c4Opt.encoding.type ) {
+                            case kC4VectorEncodingNone:
+                                vecOpt.encoding = vectorsearch::NoEncoding{};
+                                break;
+                            case kC4VectorEncodingPQ:
+                                vecOpt.encoding =
+                                        vectorsearch::PQEncoding{c4Opt.encoding.pq_subquantizers, c4Opt.encoding.bits};
+                                break;
+                            case kC4VectorEncodingSQ:
+                                vecOpt.encoding = vectorsearch::SQEncoding{c4Opt.encoding.bits};
+                                break;
+                            case kC4VectorEncodingDefault:
+                                break;
+                        }
+                        vecOpt.minTrainingCount = c4Opt.minTrainingSize;
+                        vecOpt.maxTrainingCount = c4Opt.maxTrainingSize;
+                        vecOpt.probeCount       = c4Opt.numProbes;
+                        vecOpt.lazy             = c4Opt.lazy;
+                        vecOpt.validate();
                     } else {
                         error::_throw(error::InvalidParameter, "Vector index requires options");
                     }
@@ -456,7 +478,7 @@ namespace litecore {
             FLEncoder_BeginArray(enc, 2);
             for ( const auto& spec : keyStore().getIndexes() ) {
                 if ( fullInfo ) {
-                    FLEncoder_BeginDict(enc, 3);
+                    FLEncoder_BeginDict(enc, 5);
                     FLEncoder_WriteKey(enc, slice("name"));
                     FLEncoder_WriteString(enc, slice(spec.name));
                     FLEncoder_WriteKey(enc, slice("type"));
@@ -472,6 +494,10 @@ namespace litecore {
                             FLEncoder_WriteString(enc, slice("n1ql"));
                             break;
                     }
+                    if ( auto vecOpts = spec.vectorOptions() ) {
+                        FLEncoder_WriteKey(enc, "vector_options"_sl);
+                        FLEncoder_WriteString(enc, slice(vecOpts->createArgs()));
+                    }
                     FLEncoder_EndDict(enc);
                 } else {
                     FLEncoder_WriteString(enc, slice(spec.name));
diff --git a/LiteCore/Query/IndexSpec.cc b/LiteCore/Query/IndexSpec.cc
index 416e811c1..8b5a63487 100644
--- a/LiteCore/Query/IndexSpec.cc
+++ b/LiteCore/Query/IndexSpec.cc
@@ -31,10 +31,9 @@ namespace litecore {
         , expression(std::move(expression_))
         , queryLanguage(queryLanguage_)
         , options(std::move(opt)) {
-        if ( auto whichOpts = options.index() ) {
-            if ( (type == kFullText && whichOpts != 1) || (type == kVector && whichOpts != 2) )
-                error::_throw(error::LiteCoreError::InvalidParameter, "Invalid options type for index");
-        }
+        auto whichOpts = options.index();
+        if ( (type == kFullText && whichOpts != 1 && whichOpts != 0) || (type == kVector && whichOpts != 2) )
+            error::_throw(error::LiteCoreError::InvalidParameter, "Invalid options type for index");
     }
 
     IndexSpec::IndexSpec(IndexSpec&&) = default;
diff --git a/LiteCore/Query/IndexSpec.hh b/LiteCore/Query/IndexSpec.hh
index c05bf7eb8..c536ac776 100644
--- a/LiteCore/Query/IndexSpec.hh
+++ b/LiteCore/Query/IndexSpec.hh
@@ -12,6 +12,7 @@
 
 #pragma once
 #include "Base.hh"
+#include "VectorIndexSpec.hh"
 #include <optional>
 #include <string>
 #include <variant>
@@ -29,7 +30,7 @@ namespace litecore {
     };
 
     struct IndexSpec {
-        /// The types of indexes.
+        /// The types of indexes. (Values MUST match C4IndexType)
         enum Type {
             kValue,       ///< Regular index of property value
             kFullText,    ///< Full-text index, for MATCH queries. Uses IndexSpec::FTSOptions.
@@ -47,52 +48,9 @@ namespace litecore {
         };
 
         /// Options for a vector index.
-        struct VectorOptions {
-            enum MetricType {
-                DefaultMetric,  ///< Use default metric, Euclidean
-                Euclidean,      ///< Euclidean distance (squared)
-                Cosine,         ///< Cosine distance (1.0 - cosine similarity)
-            };                  // Note: values must match C4VectorMetricType in c4IndexTypes.h
-
-            enum ClusteringType {
-                Flat,
-                Multi,
-            };  // Note: values must match C4VectorClusteringType in c4IndexTypes.h
-
-            enum EncodingType {
-                DefaultEncoding,  ///< Use default encoding, which is currently SQ8Bit
-                NoEncoding,       ///< No encoding; 4 bytes per dimension, no data loss
-                PQ,               ///< Product Quantizer
-                SQ,               ///< Scalar Quantizer
-            };                    // Note: values must match C4VectorEncodingType in c4IndexTypes.h
-
-            struct Clustering {
-                ClusteringType type;
-                unsigned       flat_centroids;
-                unsigned       multi_subquantizers;  ///< Number of pieces to split vectors into (for multi)
-                unsigned       multi_bits;           ///< log2 of # of centroids per subquantizer (for multi)
-            };
-
-            struct Encoding {
-                EncodingType type;              ///< Encoding type: none, PQ, SQ
-                unsigned     pq_subquantizers;  ///< Number of subquantizers (for PQ)
-                unsigned     bits;              ///< Number of bits (for PQ and SQ)
-            };
-
-            unsigned   dimensions;                 ///< Number of dimensions
-            MetricType metric{DefaultMetric};      ///< Distance metric
-            Clustering clustering{Flat};           ///< Clustering type & parameters
-            Encoding   encoding{DefaultEncoding};  ///< Vector compression type & parameters
-
-            unsigned minTrainingSize{0};  ///< Minimum # of vectors to train index (>= 25*numCentroids)
-            unsigned maxTrainingSize{0};  ///< Maximum # of vectors to train index on (<= 256*numCentroids)
-            unsigned numProbes{0};        ///< Default # of probes when querying
-
-            bool lazy{false};
-
-            /// Constructor. Number of dimensions is a required parameter.
-            explicit VectorOptions(unsigned d) : dimensions(d) {}
-        };
+        using VectorOptions = vectorsearch::IndexSpec;
+
+        static constexpr vectorsearch::SQEncoding DefaultEncoding{8};
 
         /// Index options. If not empty (the first state), must match the index type.
         using Options = std::variant<std::monostate, FTSOptions, VectorOptions>;
diff --git a/LiteCore/Query/SQLiteDataFile+Indexes.cc b/LiteCore/Query/SQLiteDataFile+Indexes.cc
index d6b81e725..6bc2ec9d2 100644
--- a/LiteCore/Query/SQLiteDataFile+Indexes.cc
+++ b/LiteCore/Query/SQLiteDataFile+Indexes.cc
@@ -372,32 +372,4 @@ namespace litecore {
         }
     }
 
-    bool SQLiteKeyStore::isIndexTrained(fleece::slice name) const {
-        auto specs = getIndexes();
-        for ( const auto& spec : specs ) {
-            if ( name == spec.name ) {
-                if ( spec.type != IndexSpec::kVector ) {
-                    error::_throw(error::InvalidParameter, "Index '%.*s' is not a vector index", SPLAT(name));
-                }
-
-                // IMPORTANT: These are implementation details that will break this functionality if changed
-                // in the mobile-vector-search repo!
-                static const char* vectorTableNameSuffix = "_vectorsearchImpl";
-                static const char* vectorDataTableName   = "vectorSearchIndexData";
-                // END
-
-                string sql;
-                if ( !db().getSchema(vectorDataTableName, "table", vectorDataTableName, sql) ) { return false; }
-                auto vectorTableName = db().auxiliaryTableName(tableName(), KeyStore::kVectorSeparator, (string)name)
-                                       + vectorTableNameSuffix;
-                auto rawResult = db().rawQuery(format("SELECT tableName FROM %s WHERE tableName = '%s'",
-                                                      vectorDataTableName, vectorTableName.c_str()));
-                auto result    = Value::fromTrustedData(rawResult)->asArray();
-                return result->count() == 1;
-            }
-        }
-
-        error::_throw(error::NoSuchIndex);
-    }
-
 }  // namespace litecore
diff --git a/LiteCore/Query/SQLiteKeyStore+Indexes.cc b/LiteCore/Query/SQLiteKeyStore+Indexes.cc
index 66f888968..d9b706c2f 100644
--- a/LiteCore/Query/SQLiteKeyStore+Indexes.cc
+++ b/LiteCore/Query/SQLiteKeyStore+Indexes.cc
@@ -132,7 +132,7 @@ namespace litecore {
         _createFlagsIndex("blobs", DocumentFlags::kHasAttachments, _createdBlobsIndex);
     }
 
-    optional<IndexSpec> SQLiteKeyStore::getIndex(slice indexName) {
+    optional<IndexSpec> SQLiteKeyStore::getIndex(slice indexName) const {
         optional<SQLiteIndexSpec> spec = db().getIndex(indexName);
         if ( spec && spec->keyStoreName != name() ) spec = nullopt;
         return spec;
diff --git a/LiteCore/Query/SQLiteKeyStore+VectorIndex.cc b/LiteCore/Query/SQLiteKeyStore+VectorIndex.cc
index 6b9fba45a..264b77473 100644
--- a/LiteCore/Query/SQLiteKeyStore+VectorIndex.cc
+++ b/LiteCore/Query/SQLiteKeyStore+VectorIndex.cc
@@ -10,20 +10,19 @@
 // the file licenses/APL2.txt.
 //
 
-#ifdef COUCHBASE_ENTERPRISE
-
-#    include <cstdio>
-
-#    include "SQLiteKeyStore.hh"
-#    include "SQLiteDataFile.hh"
-#    include "QueryParser.hh"
-#    include "SQLUtil.hh"
-#    include "SQLite_Internal.hh"
-#    include "StringUtil.hh"
-#    include "Array.hh"
-#    include "Error.hh"
-#    include "SQLiteCpp/Exception.h"
-#    include <sstream>
+#include <cstdio>
+
+#include "SQLiteKeyStore.hh"
+#include "SQLiteDataFile.hh"
+#include "QueryParser.hh"
+#include "SQLUtil.hh"
+#include "SQLite_Internal.hh"
+#include "StringUtil.hh"
+#include "Array.hh"
+#include "Error.hh"
+#include "SQLiteCpp/Statement.h"
+#include "SQLiteCpp/Exception.h"
+#include <sstream>
 
 using namespace std;
 using namespace fleece;
@@ -31,73 +30,31 @@ using namespace fleece::impl;
 
 namespace litecore {
 
+#ifdef COUCHBASE_ENTERPRISE
+
     // Vector search index for ML / predictive query, using the vectorsearch extension.
     // https://github.com/couchbaselabs/mobile-vector-search/blob/main/README_Extension.md
 
-    static constexpr const char* kMetricNames[] = {nullptr, "euclidean2", "cosine"};
-
-    /// Returns the SQL expression to create a vectorsearch virtual table.
-    static string createVectorSearchTableSQL(string_view vectorTableName, const IndexSpec& spec) {
-        stringstream stmt;
-        stmt << "CREATE VIRTUAL TABLE " << sqlIdentifier(vectorTableName) << " USING vectorsearch(";
-        Assert(spec.vectorOptions() != nullptr);
-        IndexSpec::VectorOptions const& options = *spec.vectorOptions();
-        stmt << "dimensions=" << options.dimensions << ',';
-        if ( options.metric != IndexSpec::VectorOptions::DefaultMetric ) {
-            stmt << "metric=" << kMetricNames[options.metric] << ',';
-        }
-        switch ( options.clustering.type ) {
-            case IndexSpec::VectorOptions::Flat:
-                stmt << "clustering=flat" << options.clustering.flat_centroids << ',';
-                break;
-            case IndexSpec::VectorOptions::Multi:
-                stmt << "clustering=multi" << options.clustering.multi_subquantizers << 'x'
-                     << options.clustering.multi_bits << ',';
-                break;
-            default:
-                error::_throw(error::InvalidParameter, "invalid vector clustering type");
-        }
-        switch ( options.encoding.type ) {
-            case IndexSpec::VectorOptions::DefaultEncoding:
-                break;
-            case IndexSpec::VectorOptions::NoEncoding:
-                stmt << "encoding=none,";
-                break;
-            case IndexSpec::VectorOptions::PQ:
-                stmt << "encoding=PQ" << options.encoding.pq_subquantizers << 'x' << options.encoding.bits << ',';
-                break;
-            case IndexSpec::VectorOptions::SQ:
-                stmt << "encoding=SQ" << options.encoding.bits << ',';
-                break;
-            default:
-                error::_throw(error::InvalidParameter, "invalid vector encoding type");
-        }
-        if ( options.numProbes > 0 ) stmt << "probes=" << options.numProbes << ',';
-        if ( options.maxTrainingSize > 0 ) stmt << "maxToTrain=" << options.maxTrainingSize << ',';
-        stmt << "minToTrain=" << options.minTrainingSize;
-        if ( QueryLog.effectiveLevel() <= LogLevel::Verbose )
-            stmt << ",verbose";  // Enable vectorsearch verbose logging (via printf, for now)
-        stmt << ")";
-        return stmt.str();
-    }
-
     // Creates a vector-similarity index.
     bool SQLiteKeyStore::createVectorIndex(const IndexSpec& spec) {
         auto vectorTableName = db().auxiliaryTableName(tableName(), KeyStore::kVectorSeparator, spec.name);
+        auto vectorOptions   = spec.vectorOptions();
+        Assert(vectorOptions);
 
         // Generate a SQL expression to get the vector:
         QueryParser qp(db(), collectionName(), tableName());
         qp.setBodyColumnName("new.body");
         string vectorExpr;
         if ( auto what = spec.what(); what && what->count() == 1 )
-            vectorExpr = qp.vectorToIndexExpressionSQL(what->get(0), spec.vectorOptions()->dimensions);
+            vectorExpr = qp.vectorToIndexExpressionSQL(what->get(0), vectorOptions->dimensions);
         else
             error::_throw(error::Unimplemented, "Vector index doesn't support multiple properties");
 
         // Create the virtual table:
         try {
-            if ( !db().createIndex(spec, this, vectorTableName, createVectorSearchTableSQL(vectorTableName, spec)) )
-                return false;
+            string sql = CONCAT("CREATE VIRTUAL TABLE " << sqlIdentifier(vectorTableName) << " USING vectorsearch("
+                                                        << *vectorOptions << ")");
+            if ( !db().createIndex(spec, this, vectorTableName, sql) ) return false;
         } catch ( SQLite::Exception const& x ) {
             string_view what(x.what());
             if ( hasPrefix(what, "no such module") ) {
@@ -123,7 +80,7 @@ namespace litecore {
         createTrigger(vectorTableName, "preupdate", "BEFORE UPDATE OF body", whereOldSQL, deleteOldSQL);
         createTrigger(vectorTableName, "del", "AFTER DELETE", whereOldSQL, deleteOldSQL);
 
-        bool lazy = spec.vectorOptions()->lazy;
+        bool lazy = vectorOptions->lazyEmbedding;
         if ( lazy ) {
             // Lazy index: Mark as lazy by initializing lastSeq. Vectors will not be computed
             // automatically; app updates them via the LazyIndex class.
@@ -165,61 +122,35 @@ namespace litecore {
         return "";  // no index found
     }
 
-    static inline unsigned asUInt(string_view sv) {
-        string str(sv);
-        return unsigned(strtoul(str.c_str(), nullptr, 10));
-    }
-
     // The opposite of createVectorSearchTableSQL
     optional<IndexSpec::VectorOptions> SQLiteKeyStore::parseVectorSearchTableSQL(string_view sql) {
-        optional<IndexSpec::VectorOptions> opts;
         // Find the virtual-table arguments in the CREATE TABLE statement:
         auto start = sql.find("vectorsearch(");
-        if ( start == string::npos ) return opts;
+        if ( start == string::npos ) return nullopt;
         start += strlen("vectorsearch(");
         auto end = sql.find(')', start);
-        if ( end == string::npos ) return opts;
+        if ( end == string::npos ) return nullopt;
 
         // Parse each comma-delimited key-value pair:
-        string_view args(&sql[start], end - start);
-        opts.emplace(0);
-        split(args, ",", [&](string_view key) {
-            string_view value;
-            if ( auto eq = key.find('='); eq != string::npos ) {
-                value = key.substr(eq + 1);
-                key   = key.substr(0, eq);
-                if ( value.empty() || key.empty() ) return;
-            }
-            if ( key == "dimensions" ) {
-                opts->dimensions = asUInt(value);
-            } else if ( key == "metric" ) {
-                if ( value == "euclidean2" ) opts->metric = IndexSpec::VectorOptions::Euclidean;
-                else if ( value == "cosine" )
-                    opts->metric = IndexSpec::VectorOptions::Cosine;
-            } else if ( key == "minToTrain" ) {
-                opts->minTrainingSize = asUInt(value);
-            } else if ( key == "maxToTrain" ) {
-                opts->maxTrainingSize = asUInt(value);
-            } else if ( key == "probes" ) {
-                opts->numProbes = asUInt(value);
-            } else if ( key == "lazyindex" ) {
-                opts->lazy = (value != "false" && value != "0");
-            } else if ( key == "clustering" ) {
-                if ( hasPrefix(value, "multi") ) opts->clustering = {IndexSpec::VectorOptions::Multi};
-                //TODO: Parse centroid count & other params; see vectorsearch::IndexSpec::setParam()
-            } else if ( key == "encoding" ) {
-                if ( value == "none" ) opts->encoding = {IndexSpec::VectorOptions::NoEncoding};
-                else if ( hasPrefix(value, "PQ") ) {
-                    opts->encoding = {IndexSpec::VectorOptions::PQ};
-                } else if ( hasPrefix(value, "SQ") ) {
-                    opts->encoding = {IndexSpec::VectorOptions::SQ};
-                }
-                //TODO: Parse encoding params; see vectorsearch::IndexSpec::setParam()
-            }
-        });
+        string_view              args(&sql[start], end - start);
+        IndexSpec::VectorOptions opts;
+        split(args, ",", [&](string_view arg) { (void)opts.readArg(arg); });
         return opts;
     }
 
-}  // namespace litecore
+#endif  // COUCHBASE_ENTERPRISE
+
+    bool SQLiteKeyStore::isIndexTrained(fleece::slice name) const {
+        if ( auto spec = db().getIndex(name); spec && spec->keyStoreName == this->name() ) {
+            if ( spec->type != IndexSpec::kVector ) {
+                error::_throw(error::InvalidParameter, "Index '%.*s' is not a vector index", SPLAT(name));
+            }
+            auto q = db().compile(
+                    ("SELECT 1 FROM \""s + spec->indexTableName + "\" WHERE bucket != -1 LIMIT 1").c_str());
+            return q->executeStep();
+        }
 
-#endif
+        error::_throw(error::NoSuchIndex);
+    }
+
+}  // namespace litecore
diff --git a/LiteCore/Storage/BothKeyStore.hh b/LiteCore/Storage/BothKeyStore.hh
index fcccefb0e..481d44789 100644
--- a/LiteCore/Storage/BothKeyStore.hh
+++ b/LiteCore/Storage/BothKeyStore.hh
@@ -98,7 +98,7 @@ namespace litecore {
 
         void deleteIndex(slice name) override { _liveStore->deleteIndex(name); }
 
-        std::optional<IndexSpec> getIndex(slice name) override { return _liveStore->getIndex(name); }
+        std::optional<IndexSpec> getIndex(slice name) const override { return _liveStore->getIndex(name); }
 
         [[nodiscard]] std::vector<IndexSpec> getIndexes() const override { return _liveStore->getIndexes(); }
 
diff --git a/LiteCore/Storage/KeyStore.hh b/LiteCore/Storage/KeyStore.hh
index 7117ac865..c5c32b9d5 100644
--- a/LiteCore/Storage/KeyStore.hh
+++ b/LiteCore/Storage/KeyStore.hh
@@ -201,7 +201,7 @@ namespace litecore {
 
         virtual void                                   deleteIndex(slice name)          = 0;
         [[nodiscard]] virtual std::vector<IndexSpec>   getIndexes() const               = 0;
-        [[nodiscard]] virtual std::optional<IndexSpec> getIndex(slice name)             = 0;
+        [[nodiscard]] virtual std::optional<IndexSpec> getIndex(slice name) const       = 0;
         [[nodiscard]] virtual bool                     isIndexTrained(slice name) const = 0;
 
         // public for complicated reasons; clients should never call it
diff --git a/LiteCore/Storage/SQLiteKeyStore.hh b/LiteCore/Storage/SQLiteKeyStore.hh
index 7d1d54422..698987c44 100644
--- a/LiteCore/Storage/SQLiteKeyStore.hh
+++ b/LiteCore/Storage/SQLiteKeyStore.hh
@@ -81,7 +81,7 @@ namespace litecore {
 
         void                     deleteIndex(slice name) override;
         std::vector<IndexSpec>   getIndexes() const override;
-        std::optional<IndexSpec> getIndex(slice name) override;
+        std::optional<IndexSpec> getIndex(slice name) const override;
         bool                     isIndexTrained(slice name) const override;
 
         std::vector<alloc_slice> withDocBodies(const std::vector<slice>& docIDs, WithDocBodyCallback callback) override;
diff --git a/LiteCore/tests/CMakeLists.txt b/LiteCore/tests/CMakeLists.txt
index b22e0fe02..c37d58c00 100644
--- a/LiteCore/tests/CMakeLists.txt
+++ b/LiteCore/tests/CMakeLists.txt
@@ -151,6 +151,7 @@ target_include_directories(
     ${TOP}REST/tests
     ${TOP}vendor/sockpp/include
     ${TOP}vendor/fleece/vendor/date/include
+    ${TOP}vendor/vector_search
 )
 
 target_link_libraries(
diff --git a/LiteCore/tests/LazyVectorQueryTest.cc b/LiteCore/tests/LazyVectorQueryTest.cc
index 1eae8d211..7d6f9e29d 100644
--- a/LiteCore/tests/LazyVectorQueryTest.cc
+++ b/LiteCore/tests/LazyVectorQueryTest.cc
@@ -80,10 +80,8 @@ class LazyVectorQueryTest : public VectorQueryTest {
     }
 
     void createVectorIndex() {
-        IndexSpec::VectorOptions options(kDimension);
-        options.clustering.type           = IndexSpec::VectorOptions::Flat;
-        options.clustering.flat_centroids = 16;
-        options.lazy                      = true;
+        IndexSpec::VectorOptions options(kDimension, vectorsearch::FlatClustering{16}, IndexSpec::DefaultEncoding);
+        options.lazyEmbedding = true;
         VectorQueryTest::createVectorIndex("factorsindex", "[  ['.num'] ]", options);
 
         _lazyIndex = make_retained<LazyIndex>(*store, "factorsindex");
diff --git a/LiteCore/tests/PredictiveVectorQueryTest.cc b/LiteCore/tests/PredictiveVectorQueryTest.cc
index a094fbfd4..75c30c318 100644
--- a/LiteCore/tests/PredictiveVectorQueryTest.cc
+++ b/LiteCore/tests/PredictiveVectorQueryTest.cc
@@ -82,9 +82,7 @@ class PredictiveVectorQueryTest : public VectorQueryTest {
     }
 
     void createVectorIndex(QueryLanguage lang) {
-        IndexSpec::VectorOptions options(5);
-        options.clustering.type           = IndexSpec::VectorOptions::Flat;
-        options.clustering.flat_centroids = 16;
+        IndexSpec::VectorOptions options(5, vectorsearch::FlatClustering{16}, IndexSpec::DefaultEncoding);
         if ( lang == QueryLanguage::kJSON ) {
             VectorQueryTest::createVectorIndex(
                     "factorsindex", "[ ['PREDICTION()', 'factors', {number: ['.num']}, '.vec'] ]", options, lang);
diff --git a/LiteCore/tests/VectorQueryTest.cc b/LiteCore/tests/VectorQueryTest.cc
index ad35d5204..50a5ce915 100644
--- a/LiteCore/tests/VectorQueryTest.cc
+++ b/LiteCore/tests/VectorQueryTest.cc
@@ -31,10 +31,7 @@ class SIFTVectorQueryTest : public VectorQueryTest {
     SIFTVectorQueryTest() : VectorQueryTest(0) {}
 
     IndexSpec::VectorOptions vectorIndexOptions() const {
-        IndexSpec::VectorOptions options(128);
-        options.clustering.type           = IndexSpec::VectorOptions::Flat;
-        options.clustering.flat_centroids = 256;
-        return options;
+        return IndexSpec::VectorOptions(128, vectorsearch::FlatClustering{256}, IndexSpec::DefaultEncoding);
     }
 
     void createVectorIndex() {
@@ -147,8 +144,8 @@ N_WAY_TEST_CASE_METHOD(SIFTVectorQueryTest, "Create/Delete Vector Index", "[Quer
     REQUIRE(vecOptions);
     auto trueOptions = vectorIndexOptions();
     CHECK(vecOptions->dimensions == trueOptions.dimensions);
-    CHECK(vecOptions->clustering.type == trueOptions.clustering.type);
-    CHECK(vecOptions->encoding.type == trueOptions.encoding.type);
+    CHECK(vecOptions->clusteringType() == trueOptions.clusteringType());
+    CHECK(vecOptions->encodingType() == trueOptions.encodingType());
 
     CHECK(db->allKeyStoreNames() == allKeyStores);  // CBL-3824, CBL-5369
     // Delete a doc too:
diff --git a/Replicator/tests/ReplicatorSGTest.cc b/Replicator/tests/ReplicatorSGTest.cc
index 375a253a5..dac25ff6a 100644
--- a/Replicator/tests/ReplicatorSGTest.cc
+++ b/Replicator/tests/ReplicatorSGTest.cc
@@ -16,6 +16,7 @@
 #include "c4Collection.h"
 #include "c4Document+Fleece.h"
 #include "c4DocEnumerator.h"
+#include "c4Index.h"
 #include "Stopwatch.hh"
 #include "StringUtil.hh"
 #include "SecureRandomize.hh"
diff --git a/Xcode/LiteCore.xcodeproj/project.pbxproj b/Xcode/LiteCore.xcodeproj/project.pbxproj
index eb2afdc60..b25b0d261 100644
--- a/Xcode/LiteCore.xcodeproj/project.pbxproj
+++ b/Xcode/LiteCore.xcodeproj/project.pbxproj
@@ -66,6 +66,7 @@
 		270C6B8C1EBA2CD600E73415 /* LogEncoder.cc in Sources */ = {isa = PBXBuildFile; fileRef = 270C6B891EBA2CD600E73415 /* LogEncoder.cc */; };
 		270C6B981EBA3AD200E73415 /* LogEncoderTest.cc in Sources */ = {isa = PBXBuildFile; fileRef = 270C6B901EBA2D5600E73415 /* LogEncoderTest.cc */; };
 		270C7D522022916D00FF86D3 /* CoreFoundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 270515581D907F6200D62D05 /* CoreFoundation.framework */; };
+		270D5B8A2C110ED800AA91E7 /* VectorIndexSpec.cc in Sources */ = {isa = PBXBuildFile; fileRef = 270D5B892C110ED800AA91E7 /* VectorIndexSpec.cc */; };
 		270F2BD52301E8AE00D8DB21 /* TCPSocket.hh in Headers */ = {isa = PBXBuildFile; fileRef = 270F2BD32301E8AE00D8DB21 /* TCPSocket.hh */; };
 		2712F5AF25D5A9AB0082D526 /* c4Error.cc in Sources */ = {isa = PBXBuildFile; fileRef = 2712F5AE25D5A9AB0082D526 /* c4Error.cc */; };
 		27139B3118F8E9750021A9A3 /* XCTest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 275072AB18E4A68E00A80C5A /* XCTest.framework */; };
@@ -901,6 +902,9 @@
 		270C6B891EBA2CD600E73415 /* LogEncoder.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = LogEncoder.cc; sourceTree = "<group>"; };
 		270C6B8A1EBA2CD600E73415 /* LogEncoder.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = LogEncoder.hh; sourceTree = "<group>"; };
 		270C6B901EBA2D5600E73415 /* LogEncoderTest.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = LogEncoderTest.cc; sourceTree = "<group>"; };
+		270D5B852C110ED800AA91E7 /* VectorIndexSpec.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = VectorIndexSpec.hh; sourceTree = "<group>"; };
+		270D5B892C110ED800AA91E7 /* VectorIndexSpec.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = VectorIndexSpec.cc; sourceTree = "<group>"; };
+		270D5B8C2C122B9500AA91E7 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = "<group>"; };
 		270F2BD32301E8AE00D8DB21 /* TCPSocket.hh */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = TCPSocket.hh; sourceTree = "<group>"; };
 		270F2BD42301E8AE00D8DB21 /* TCPSocket.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = TCPSocket.cc; sourceTree = "<group>"; };
 		2712F5AE25D5A9AB0082D526 /* c4Error.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = c4Error.cc; sourceTree = "<group>"; };
@@ -1887,6 +1891,16 @@
 			path = EE;
 			sourceTree = "<group>";
 		};
+		270D5B8B2C122B5200AA91E7 /* vector_search */ = {
+			isa = PBXGroup;
+			children = (
+				270D5B892C110ED800AA91E7 /* VectorIndexSpec.cc */,
+				270D5B852C110ED800AA91E7 /* VectorIndexSpec.hh */,
+				270D5B8C2C122B9500AA91E7 /* README.md */,
+			);
+			path = vector_search;
+			sourceTree = "<group>";
+		};
 		271057C21D3997230018247B /* C++ Tests */ = {
 			isa = PBXGroup;
 			children = (
@@ -2971,6 +2985,7 @@
 				27AFF38F23036A7100B4D6C4 /* socketpp */,
 				27EF7FFA1914296D00A327B9 /* sqlite3-unicodesn */,
 				27D74A731D4D3F0700D806E0 /* SQLiteCpp */,
+				270D5B8B2C122B5200AA91E7 /* vector_search */,
 				2744B3602418566F005A194D /* zlib */,
 			);
 			name = vendor;
@@ -4077,7 +4092,6 @@
 				27431BC7258A8AB0009E3EC5 /* QuietReporter.hh in Sources */,
 				270C6B981EBA3AD200E73415 /* LogEncoderTest.cc in Sources */,
 				274D18ED2617DFE40018D39C /* c4DocumentTest_Internal.cc in Sources */,
-				27D62A3F2B72D92B004C0787 /* LazyVectorQueryTest.cc in Sources */,
 				274D17C22615445B0018D39C /* DBAccessTestWrapper.cc in Sources */,
 				27FA09A01D6FA380005888AA /* DataFileTest.cc in Sources */,
 				274D165D261250220018D39C /* c4CollectionTest.cc in Sources */,
@@ -4093,6 +4107,7 @@
 				27098AAA216C2ED6002751DA /* PredictiveQueryTest.cc in Sources */,
 				27BEEE792A783A17005AD4BF /* VectorQueryTest.cc in Sources */,
 				27F602FE2A968503006FA1D0 /* PredictiveVectorQueryTest.cc in Sources */,
+				27D62A3F2B72D92B004C0787 /* LazyVectorQueryTest.cc in Sources */,
 				27A83D54269E3E69002B7EBA /* PropertyEncryptionTests.cc in Sources */,
 				272850B51E9BE361009CA22F /* UpgraderTest.cc in Sources */,
 				2761F3F71EEA00C3006D4BB8 /* CookieStoreTest.cc in Sources */,
@@ -4396,6 +4411,7 @@
 				272850AB1E9AF53B009CA22F /* Upgrader.cc in Sources */,
 				27469D08233D719800A1EE1A /* PublicKey+Apple.mm in Sources */,
 				274B36D225B271F7001FC28D /* Version.cc in Sources */,
+				270D5B8A2C110ED800AA91E7 /* VectorIndexSpec.cc in Sources */,
 				2744B351241854F2005A194D /* WebSocketImpl.cc in Sources */,
 				2769438C1DCD502A00DB2555 /* c4Observer.cc in Sources */,
 				2744B354241854F2005A194D /* Actor.cc in Sources */,
diff --git a/cmake/platform_base.cmake b/cmake/platform_base.cmake
index 86eae019e..00835617c 100644
--- a/cmake/platform_base.cmake
+++ b/cmake/platform_base.cmake
@@ -92,6 +92,7 @@ function(set_litecore_source_base)
         vendor/SQLiteCpp/src/Transaction.cpp
         vendor/SQLiteCpp/sqlite3/ext/carray.cc
         vendor/SQLiteCpp/sqlite3/ext/carray_bind.cc
+        vendor/vector_search/VectorIndexSpec.cc
         Replicator/c4Replicator.cc
         Replicator/c4Replicator_CAPI.cc
         Replicator/c4Socket.cc
diff --git a/vendor/vector_search/README.md b/vendor/vector_search/README.md
new file mode 100644
index 000000000..120352ff5
--- /dev/null
+++ b/vendor/vector_search/README.md
@@ -0,0 +1,9 @@
+#  `vector_search` files
+
+These source files are copied from the private [vectorsearch][VECTORSEARCH] repo.
+
+Using the same source code ensures that vector index parameters stay compatible, are validated consistently, and are communicated correctly from CBL to the `vectorsearch` virtual table.
+
+Any changes made in either repo should be copied to the other!
+
+[VECTORSEARCH]: https://github.com/couchbaselabs/mobile-vector-search
diff --git a/vendor/vector_search/VectorIndexSpec.cc b/vendor/vector_search/VectorIndexSpec.cc
new file mode 100644
index 000000000..a8f90053e
--- /dev/null
+++ b/vendor/vector_search/VectorIndexSpec.cc
@@ -0,0 +1,332 @@
+//
+// IndexSpec.cc
+//
+// 
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// NOTE: This file appears in both the vectorsearch and couchbase-lite-core repos.
+// Any changes made in one should be copied to the other!
+
+#include "VectorIndexSpec.hh"
+#include <cinttypes>
+#include <iostream>
+#include <sstream>
+
+#ifdef SQLITECPP_BUILDING_EXTENSION
+#include <sqlite3ext.h>
+SQLITE_EXTENSION_INIT3
+#else
+#include <sqlite3.h>  // for sqlite3_log
+#endif
+
+namespace vectorsearch {
+    using namespace std;
+
+
+#pragma mark - VALIDATION:
+
+
+    static void check(bool condition, const char* what, const char* problem) {
+        if (!condition) {
+            string message = "invalid vector index spec: ";
+            message += what;
+            message += problem;
+            throw std::invalid_argument(message);
+        }
+    }
+
+    template <typename T>
+    static void check(T value, T minVal, T maxVal, const char* what) {
+        check(value >= minVal, what, " parameter is too small");
+        check(value <= maxVal, what, " parameter is too large");
+    }
+
+
+    void IndexSpec::validate() const {
+        check(dimensions, kMinDimensions, kMaxDimensions, "dimension");
+        switch (clusteringType()) {
+            case ClusteringType::Flat: {
+                auto &c = std::get<FlatClustering>(clustering);
+                check(c.numCentroids,
+                      kMinFlatClustering.numCentroids,
+                      kMaxFlatClustering.numCentroids, 
+                      "centroids");
+                break;
+            }
+            case ClusteringType::MultiIndex: {
+                auto& c = std::get<MultiIndexClustering>(clustering);
+                check(c.subquantizers,
+                      kMinMultiIndexClustering.subquantizers,
+                      kMaxMultiIndexClustering.subquantizers,
+                      "clustering subquantizers");
+                check(c.bitsPerSub,
+                      kMinMultiIndexClustering.bitsPerSub,
+                      kMaxMultiIndexClustering.bitsPerSub,
+                      "clustering bits");
+                check(dimensions % c.subquantizers == 0,
+                      "clustering subquantizers",
+                      " must evenly divide the number of dimensions");
+                break;
+            }
+        }
+
+        if (probeCount)
+            check(*probeCount, 1u, numCentroids(), "probe count");
+
+        switch (encodingType()) {
+            case EncodingType::None:
+                break;
+            case EncodingType::PQ: {
+                auto& e = std::get<PQEncoding>(encoding);
+                check(e.subquantizers,
+                      kMinPQEncoding.subquantizers,
+                      kMaxPQEncoding.subquantizers,
+                      "encoding subquantizers");
+                check(e.bitsPerSub,
+                      kMinPQEncoding.bitsPerSub,
+                      kMaxPQEncoding.bitsPerSub,
+                      "encoding bits");
+                check(dimensions % e.subquantizers == 0,
+                      "encoding subquantizers",
+                      " must evenly divide the number of dimensions");
+                break;
+            }
+            case EncodingType::SQ: {
+                auto& e = std::get<SQEncoding>(encoding);
+                check(e.bitsPerDimension == 4 || e.bitsPerDimension == 6 || e.bitsPerDimension == 8,
+                      "encoding bits", " must be 4, 6 or 8");
+                break;
+            }
+        }
+    }
+
+
+    void IndexSpec::resolveTrainingCounts() {
+        // If maxTrainingCount wasn't given or is zero, set it to a reasonable value:
+        unsigned nCent = numCentroidsToTrain();
+        if (!maxTrainingCount || *maxTrainingCount == 0)
+            maxTrainingCount = kRecommendedMaxTrainingVectorsPerCentroid * nCent;
+
+        if (!minTrainingCount || *minTrainingCount == 0) {
+            // If minTrainingCount wasn't given, set a default value.
+            // (kRecommendedMinTrainingVectorsPerCentroid would be better,
+            // but would break compatibility.)
+            minTrainingCount = kMinTrainingVectorsPerCentroid * nCent;
+        } else if (int64_t m = kMinTrainingVectorsPerCentroid * nCent; *minTrainingCount < m) {
+            sqlite3_log(SQLITE_WARNING,
+                        "vectorsearch: minTrainingSize of %" PRIi64 " is too small;"
+                        " raising it to %" PRIi64 ", based on %u centroids.",
+                        *minTrainingCount, m, nCent);
+            minTrainingCount = m;
+        }
+    }
+
+
+#pragma mark - PARSING:
+
+
+    static bool popPrefix(string_view &str, string_view prefix) {
+        auto prefixLen = prefix.size();
+        if (prefixLen > str.size() || prefix != str.substr(0, prefixLen))
+            return false;
+        str = str.substr(prefixLen);
+        return true;
+    }
+
+    static unsigned asUInt(string_view str, string_view forKey) {
+        try {
+            return unsigned(std::stoul(string(str)));
+        } catch (...) {
+            throw invalid_argument("invalid numeric value '"s + string(str) + "' for " + string(forKey));
+        }
+    }
+
+    static bool asBool(string_view str) {
+        return str != "false" && str != "0";
+    }
+
+
+    static pair<unsigned,unsigned> readPQ(string_view value, string_view forKey) {
+        if (auto x = value.find('x'); x != string::npos)
+            return { asUInt(value.substr(0, x), forKey), asUInt(value.substr(x + 1), forKey) };
+        else
+            throw invalid_argument("value of '"s + string(forKey) +
+                                   " must be of form <subquantizers> x <bits>, e.g. 32x8");
+    }
+
+
+    bool IndexSpec::readArg(std::string_view key, std::string_view value) {
+        if (key == "dimensions") {
+            dimensions = asUInt(value, "dimensions");
+        } else if (key == "metric") {
+            if (value == "euclidean2")
+                metric = Metric::Euclidean2;
+            else if (value == "cosine")
+                metric = Metric::Cosine;
+            else
+                throw std::invalid_argument("unknown metric");
+        } else if (key == "clustering") {
+            if (popPrefix(value, "flat")) {
+                clustering = FlatClustering{asUInt(value, key)};
+            } else if (popPrefix(value, "multi")) {
+                auto [sub, bits] = readPQ(value, key);
+                clustering = MultiIndexClustering{sub, bits};
+            } else {
+                throw std::invalid_argument("unknown clustering");
+            }
+        } else if (key == "centroids") {
+            clustering = FlatClustering{asUInt(value, "centroid count")};
+        } else if (key == "encoding") {
+            if (value == "none")
+                encoding = NoEncoding{};
+            else if (popPrefix(value, "PQ")) {
+                auto [sub, bits] = readPQ(value, "PQ encoding");
+                encoding = PQEncoding(sub, bits);
+            } else if (popPrefix(value, "SQ")) {
+                unsigned v = 8;
+                if (!value.empty())
+                    v = asUInt(value, "SQ encoding");
+                if (v == 4 || v == 6 || v == 8)
+                    encoding = SQEncoding{v};
+                else
+                    throw std::invalid_argument("invalid bits for SQ encoding");
+            } else {
+                throw std::invalid_argument("unknown encoding");
+            }
+        } else if (key == "minToTrain") {
+            if (value == "never")
+                minTrainingCount = kNeverTrain;
+            else
+                minTrainingCount = asUInt(value, "min training size");
+        } else if (key == "maxToTrain") {
+            maxTrainingCount = asUInt(value, "max training size");
+        } else if (key == "probes") {
+            probeCount = asUInt(value, "probe count");
+        } else if (key == "lazyindex") {
+            lazyEncoding = asBool(value);
+        } else if (key == "lazyembedding") {
+            lazyEmbedding = asBool(value);
+        } else {
+            return false; // unknown key
+        }
+        return true; // fall through = success
+    }
+
+
+    bool IndexSpec::readArg(string_view arg) {
+        if (arg.empty())
+            return true;    // no-op
+        string_view value;
+        if (auto eq = arg.find('='); eq != string::npos) {
+            if (eq == 0 || eq == arg.size())
+                throw std::invalid_argument("invalid virtual-table argument " + string(arg));
+            value = arg.substr(eq + 1);
+            arg = arg.substr(0, eq);
+        }
+        return readArg(arg, value);
+    }
+
+
+#pragma mark - GENERATING TEXT:
+
+
+    static constexpr const char* kMetricNames[] = {"euclidean2", "cosine"};
+
+    std::ostream& IndexSpec::writeArgs(std::ostream& out) const {
+        out << "dimensions=" << dimensions;
+        if (metric != Metric::Default)
+            out << ",metric=" << kMetricNames[int(metric)];
+        switch (clusteringType()) {
+            case ClusteringType::Flat: {
+                auto& c = std::get<FlatClustering>(clustering);
+                out << ",clustering=flat" << c.numCentroids;
+                break;
+            }
+            case ClusteringType::MultiIndex: {
+                auto& c = std::get<MultiIndexClustering>(clustering);
+                out << ",clustering=multi" << c.subquantizers << 'x' << c.bitsPerSub;
+                break;
+            }
+        }
+        switch (encodingType()) {
+            case EncodingType::None:
+                out << ",encoding=none";
+                break;
+            case EncodingType::PQ: {
+                auto& e = std::get<PQEncoding>(encoding);
+                out << ",encoding=PQ" << e.subquantizers << 'x' << e.bitsPerSub;
+                break;
+            }
+            case EncodingType::SQ: {
+                auto& e = std::get<SQEncoding>(encoding);
+                out << ",encoding=SQ" << e.bitsPerDimension;
+                break;
+            }
+        }
+        if (minTrainingCount)
+            out << ",minToTrain=" << *minTrainingCount;
+        if ( maxTrainingCount )
+            out << ",maxToTrain=" << *maxTrainingCount;
+        if ( probeCount )
+            out << ",probes=" << *probeCount;
+        if (lazyEncoding)
+            out << ",lazyindex=true";
+        if (lazyEmbedding)
+            out << ",lazyembedding=true";
+        return out;
+    }
+
+
+    string IndexSpec::createArgs() const {
+        stringstream stmt;
+        writeArgs(stmt);
+        return stmt.str();
+    }
+
+
+    std::string IndexSpec::description() const {
+        stringstream out;
+        switch (clusteringType()) {
+            case ClusteringType::Flat:
+                out << get<FlatClustering>(clustering).numCentroids << " centroids, ";
+                break;
+            case ClusteringType::MultiIndex: {
+                auto& miq = get<MultiIndexClustering>(clustering);
+                out << "multi-index quantizer (" << miq.subquantizers << " subquantizers × "
+                << miq.bitsPerSub << " bits), ";
+                break;
+            }
+        }
+        switch(encodingType()) {
+            case EncodingType::None:
+                out << " no encoding";
+                break;
+            case EncodingType::PQ: {
+                auto& pq = get<PQEncoding>(encoding);
+                out << "PQ encoding (" << pq.subquantizers << " subquantizers × "
+                << pq.bitsPerSub << " bits)";
+                break;
+            }
+            case EncodingType::SQ: {
+                auto& sq = get<SQEncoding>(encoding);
+                out << "SQ encoding (" << sq.bitsPerDimension << " bits)";
+                break;
+            }
+        }
+        return out.str();
+    }
+
+
+}
diff --git a/vendor/vector_search/VectorIndexSpec.hh b/vendor/vector_search/VectorIndexSpec.hh
new file mode 100644
index 000000000..ea5ec77fc
--- /dev/null
+++ b/vendor/vector_search/VectorIndexSpec.hh
@@ -0,0 +1,190 @@
+//
+// VectorIndexSpec.hh
+//
+// Copyright 2024-Present Couchbase, Inc.
+//
+// Use of this software is governed by the Business Source License included
+// in the file licenses/BSL-Couchbase.txt.  As of the Change Date specified
+// in that file, in accordance with the Business Source License, use of this
+// software will be governed by the Apache License, Version 2.0, included in
+// the file licenses/APL2.txt.
+//
+
+// NOTE: This file appears in both the vectorsearch and couchbase-lite-core repos.
+// Any changes made in one should be copied to the other!
+
+#pragma once
+#include <iosfwd>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <variant>
+
+namespace vectorsearch {
+
+    /// Distance metric; defines the distance between vectors.
+    enum class Metric {
+        Euclidean2,                 ///< Euclidean distance, squared
+        Cosine,                     ///< Cosine similarity subtracted from 1, so smaller is closer
+        Default = Euclidean2
+    };
+
+    struct FlatClustering {
+        unsigned numCentroids;      ///< Number of buckets to assign the vectors to
+    };
+
+    struct MultiIndexClustering {
+        unsigned subquantizers = 2; ///< Number of pieces each vector is split into
+        unsigned bitsPerSub    = 8; ///< Number of bits of centroid count per piece
+    };
+
+    enum class ClusteringType {Flat, MultiIndex};   ///< Just identifies type of clustering
+
+    using Clustering = std::variant<FlatClustering, MultiIndexClustering>;
+
+
+    struct NoEncoding { };
+
+    struct PQEncoding {
+        unsigned subquantizers;     ///< Number of pieces each vector is split into
+        unsigned bitsPerSub = 8;    ///< Bits for encoding each piece
+
+        explicit constexpr PQEncoding(unsigned sub, unsigned bits =8)
+        :subquantizers(sub), bitsPerSub(bits) { }
+    };
+
+    struct SQEncoding {
+        unsigned bitsPerDimension = 8;          ///< Bits/dimension; must be 4, 6 or 8
+    };
+
+    enum class EncodingType {None, PQ, SQ};     ///< Just identifies type of encoding
+
+    using Encoding = std::variant<NoEncoding, PQEncoding, SQEncoding>;
+
+
+    /** The parameters of a VectorDB. */
+    struct IndexSpec {
+
+        //---- PROPERTIES:
+
+        unsigned                dimensions = 0;                     ///< Vector dimensions
+        Metric                  metric = Metric::Default;           ///< Distance metric
+        Clustering              clustering = MultiIndexClustering{};///< Clustering type
+        Encoding                encoding = SQEncoding{};            ///< Encoding type
+
+        std::optional<int64_t>  minTrainingCount;       ///< Min vectors needed to train
+        std::optional<int64_t>  maxTrainingCount;       ///< Max vectors to train with
+        std::optional<unsigned> probeCount;             ///< Number of buckets to probe
+
+        /// If true, inserted vectors are not encoded or mapped to centroids until the next query.
+        /// @warning  This is not the same meaning of "lazy" as in CBL! See \ref lazyEmbedding.
+        bool                    lazyEncoding = false;
+
+        /// If true, app will use the CBL IndexUpdater API to compute/request vectors for docs.
+        /// @note  This flag is ignored by vectorsearch! It's for the use of LiteCore.
+        bool                    lazyEmbedding = false;
+
+        /// Set `minTrainingCount` to this value (or greater) to disable automatic training.
+        static constexpr int64_t kNeverTrain = 999'999'999;
+
+        //---- CONSTRUCTION:
+
+        IndexSpec() = default;
+
+        IndexSpec(unsigned dim, Clustering q, Encoding e = NoEncoding{})
+            :dimensions(dim), clustering(q), encoding(e) { }
+
+        /// Sets an attribute of an IndexSpec from a key/value pair; useful for CLI.
+        /// See Extension.md for documentation of the supported keys and values.
+        /// @returns true if it applied the param, false if it didn't recognize the key.
+        /// @throws std::invalid_argument if the value is invalid.
+        [[nodiscard]] bool readArg(std::string_view key, std::string_view value);
+
+        /// Same as the other `readArg` but takes a single string of the form `key=value` or `key`.
+        [[nodiscard]] bool readArg(std::string_view arg);
+
+        //---- VALIDATION:
+
+        /// Throws a std::invalid_argument exception if the parameters are invalid.
+        /// Also sets reasonable values for training & probe counts, if omitted.
+        void validate() const;
+
+        /// Ensures `minTrainingCount` and `maxTrainingCount` are set to reasonable values:
+        /// - If either is `nullopt` or 0, it's set to its default (based on the # of centroids.)
+        /// - If min is too small, it's raised to the default, and a warning is logged.
+        void resolveTrainingCounts();
+
+        //---- ACCESSORS:
+
+        ClusteringType clusteringType() const {return ClusteringType(clustering.index());}
+        EncodingType encodingType() const     {return EncodingType(encoding.index());}
+
+        /// The number of centroid points that need to be identified during training.
+        /// This depends on both the clustering type and the encoding, because both PQ and SQ
+        /// encoders have their own internal sets of centroids.
+        /// @warning  FAISS is likely to throw an exception if training is performed with fewer
+        ///           vectors than this number.
+        unsigned numCentroidsToTrain() const {
+            unsigned nCent;
+            if (auto q = std::get_if<MultiIndexClustering>(&clustering))
+                nCent = 1 << q->bitsPerSub;
+            else
+                nCent = std::get<FlatClustering>(clustering).numCentroids;
+            if (auto pq = std::get_if<PQEncoding>(&encoding)) {
+                // PQ encoding has its own centroids that need to be trained:
+                nCent = std::max(nCent, 1u << pq->bitsPerSub);
+            }
+            return nCent;
+        }
+
+        /// The number of buckets to which vectors will be assigned when indexed.
+        /// @note This is not the same as `numCentroidsToTrain`, because
+        ///     (a) with multi-index clustering the 'centroids' used as buckets are actually tuples,
+        ///         with one centroid per subquantizer;
+        ///     (b) it only refers to the main IVF index, not centroids used by encoders.
+        unsigned numCentroids() const {
+            if (auto q = std::get_if<MultiIndexClustering>(&clustering))
+                return 1 << (q->bitsPerSub * q->subquantizers);
+            else
+                return std::get<FlatClustering>(clustering).numCentroids;
+        }
+
+        //---- ENCODING:
+
+        /// Writes a series of comma-separated "key=value" pairs describing this spec.
+        std::ostream& writeArgs(std::ostream&) const;
+
+        /// Returns a string of comma-separated key=value pairs describing this spec.
+        std::string createArgs() const;
+
+        friend std::ostream& operator<<(std::ostream& out, IndexSpec const& spec) {
+            return spec.writeArgs(out);
+        }
+
+        /// Returns a human-readable string describing this spec.
+        std::string description() const;
+
+        //---- LIMITS:
+
+        static constexpr unsigned               kMinDimensions = 2;
+        static constexpr unsigned               kMaxDimensions = 4096;
+        static constexpr FlatClustering         kMinFlatClustering {1};
+        static constexpr FlatClustering         kMaxFlatClustering {64'000};
+        static constexpr MultiIndexClustering   kMinMultiIndexClustering {   2,  4};
+        static constexpr MultiIndexClustering   kMaxMultiIndexClustering {1024, 12};
+        static constexpr PQEncoding             kMinPQEncoding {   2,  4};
+        static constexpr PQEncoding             kMaxPQEncoding {1024, 12};
+        static constexpr SQEncoding             kMinSQEncoding {4};
+        static constexpr SQEncoding             kMaxSQEncoding {8};
+
+        /// Absolute minimum number of training vectors needed per centroid.
+        /// The `train` method will return false instead of training if given fewer.
+        static constexpr int64_t kMinTrainingVectorsPerCentroid = 25;
+
+        /// Minimum recommended (by FAISS) number of training vectors per centroid for good results.
+        static constexpr int64_t kRecommendedMinTrainingVectorsPerCentroid = 39;
+        static constexpr int64_t kRecommendedMaxTrainingVectorsPerCentroid = 100;
+
+    };
+
+}

From d2fc40879769bc7cbce5119f051919a18831ba1b Mon Sep 17 00:00:00 2001
From: Callum Birks <callumbirks@protonmail.com>
Date: Thu, 20 Jun 2024 10:36:22 +0100
Subject: [PATCH 4/7] Correct set vecOpt.lazy to lazyEmbedding.

---
 LiteCore/Database/CollectionImpl.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LiteCore/Database/CollectionImpl.hh b/LiteCore/Database/CollectionImpl.hh
index 0a61db54c..f97e7a704 100644
--- a/LiteCore/Database/CollectionImpl.hh
+++ b/LiteCore/Database/CollectionImpl.hh
@@ -454,7 +454,7 @@ namespace litecore {
                         vecOpt.minTrainingCount = c4Opt.minTrainingSize;
                         vecOpt.maxTrainingCount = c4Opt.maxTrainingSize;
                         vecOpt.probeCount       = c4Opt.numProbes;
-                        vecOpt.lazy             = c4Opt.lazy;
+                        vecOpt.lazyEmbedding    = c4Opt.lazy;
                         vecOpt.validate();
                     } else {
                         error::_throw(error::InvalidParameter, "Vector index requires options");

From 768120f61d622eb4509304e272507e600dc254b5 Mon Sep 17 00:00:00 2001
From: Callum Birks <callumbirks@protonmail.com>
Date: Thu, 20 Jun 2024 14:16:07 +0100
Subject: [PATCH 5/7] Add kC4NoError

---
 C/include/c4Error.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/C/include/c4Error.h b/C/include/c4Error.h
index 27aaf1a6d..daf843d9c 100644
--- a/C/include/c4Error.h
+++ b/C/include/c4Error.h
@@ -186,6 +186,12 @@ typedef struct C4Error {
 #endif
 } C4Error;
 
+#ifdef __cplusplus
+static constexpr C4Error kC4NoError = {};
+#else
+#    define kC4NoError ((C4Error){})
+#endif
+
 // C4Error C API:
 
 

From dc7bf71dec29ece3c1c2d75c3911f1859efd657a Mon Sep 17 00:00:00 2001
From: Callum Birks <callumbirks@protonmail.com>
Date: Thu, 20 Jun 2024 17:17:21 +0100
Subject: [PATCH 6/7] LazyVectorAPITest fixes for VectorOptions changes.

---
 LiteCore/tests/LazyVectorAPITest.cc | 95 ++++++++++++++++-------------
 1 file changed, 54 insertions(+), 41 deletions(-)

diff --git a/LiteCore/tests/LazyVectorAPITest.cc b/LiteCore/tests/LazyVectorAPITest.cc
index cb1aa4975..df58d8bee 100644
--- a/LiteCore/tests/LazyVectorAPITest.cc
+++ b/LiteCore/tests/LazyVectorAPITest.cc
@@ -2,6 +2,7 @@
 // Created by Callum Birks on 28/05/2024.
 //
 
+#include "VectorIndexSpec.hh"
 #include "c4Base.hh"
 #include "DatabaseImpl.hh"
 #include "LazyIndex.hh"
@@ -9,6 +10,7 @@
 #include "c4Collection.hh"
 #include "c4Index.h"
 #include "c4Index.hh"
+#include "c4IndexTypes.h"
 #include "c4Query.h"
 #include "c4Test.hh"  // IWYU pragma: keep
 #include "LiteCoreTest.hh"
@@ -181,7 +183,7 @@ class LazyVectorAPITest : public C4Test {
     bool createVectorIndex(bool lazy, slice expression = R"(['.word'])"_sl, slice name = "words_index"_sl,
                            IndexSpec::VectorOptions options = vectorOptions(300, 8),
                            C4Error*                 err     = ERROR_INFO()) const {
-        options.lazy = lazy;
+        options.lazyEmbedding = lazy;
         return createIndex(name, json5(expression), kC4VectorIndex, indexOptions(options), err);
     }
 
@@ -267,60 +269,71 @@ class LazyVectorAPITest : public C4Test {
     static C4VectorIndexOptions c4VectorOptions(const IndexSpec::VectorOptions& options) {
         C4VectorMetricType metric{};
         switch ( options.metric ) {
-            case IndexSpec::VectorOptions::DefaultMetric:
-                metric = kC4VectorMetricDefault;
-                break;
-            case IndexSpec::VectorOptions::Euclidean:
+            case vectorsearch::Metric::Euclidean2:
                 metric = kC4VectorMetricEuclidean;
                 break;
-            case IndexSpec::VectorOptions::Cosine:
+            case vectorsearch::Metric::Cosine:
                 metric = kC4VectorMetricCosine;
                 break;
         }
 
-        C4VectorClusteringType clusteringType{};
-        switch ( options.clustering.type ) {
-            case IndexSpec::VectorOptions::Flat:
-                clusteringType = kC4VectorClusteringFlat;
-                break;
-            case IndexSpec::VectorOptions::Multi:
-                clusteringType = kC4VectorClusteringMulti;
-                break;
+        C4VectorClustering clustering{};
+        switch ( options.clustering.index() ) {
+            case 0:
+                {
+                    clustering.type           = kC4VectorClusteringFlat;
+                    auto _clustering          = std::get<vectorsearch::FlatClustering>(options.clustering);
+                    clustering.flat_centroids = _clustering.numCentroids;
+                    break;
+                }
+            case 1:
+                {
+                    clustering.type                = kC4VectorClusteringMulti;
+                    auto _clustering               = std::get<vectorsearch::MultiIndexClustering>(options.clustering);
+                    clustering.multi_bits          = _clustering.bitsPerSub;
+                    clustering.multi_subquantizers = _clustering.subquantizers;
+                    break;
+                }
         }
 
-        C4VectorEncodingType encodingType{};
-        switch ( options.encoding.type ) {
-            case IndexSpec::VectorOptions::DefaultEncoding:
-                encodingType = kC4VectorEncodingDefault;
-                break;
-            case IndexSpec::VectorOptions::NoEncoding:
-                encodingType = kC4VectorEncodingNone;
-                break;
-            case IndexSpec::VectorOptions::PQ:
-                encodingType = kC4VectorEncodingPQ;
-                break;
-            case IndexSpec::VectorOptions::SQ:
-                encodingType = kC4VectorEncodingSQ;
-                break;
+        C4VectorEncoding encoding{};
+        switch ( options.encoding.index() ) {
+            case 0:
+                {
+                    encoding.type = kC4VectorEncodingNone;
+                    break;
+                }
+            case 1:
+                {
+                    encoding.type             = kC4VectorEncodingPQ;
+                    auto _encoding            = std::get<vectorsearch::PQEncoding>(options.encoding);
+                    encoding.bits             = _encoding.bitsPerSub;
+                    encoding.pq_subquantizers = _encoding.subquantizers;
+                    break;
+                }
+            case 2:
+                {
+                    encoding.type  = kC4VectorEncodingSQ;
+                    auto _encoding = std::get<vectorsearch::SQEncoding>(options.encoding);
+                    encoding.bits  = _encoding.bitsPerDimension;
+                    break;
+                }
         }
 
         return C4VectorIndexOptions{
                 options.dimensions,
                 metric,
-                C4VectorClustering{clusteringType, options.clustering.flat_centroids,
-                                   options.clustering.multi_subquantizers, options.clustering.multi_bits},
-                C4VectorEncoding{encodingType, options.encoding.pq_subquantizers, options.encoding.bits},
-                options.minTrainingSize,
-                options.maxTrainingSize,
-                options.numProbes,
-                options.lazy,
+                clustering,
+                encoding,
+                static_cast<unsigned int>(options.minTrainingCount.value_or(0)),
+                static_cast<unsigned int>(options.maxTrainingCount.value_or(0)),
+                options.probeCount.value_or(0),
+                options.lazyEmbedding,
         };
     }
 
     static IndexSpec::VectorOptions vectorOptions(unsigned dimensions, unsigned centroids) {
-        IndexSpec::VectorOptions options(dimensions);
-        options.clustering.type           = IndexSpec::VectorOptions::Flat;
-        options.clustering.flat_centroids = centroids;
+        IndexSpec::VectorOptions options(dimensions, vectorsearch::FlatClustering { centroids });
         return options;
     }
 
@@ -337,7 +350,7 @@ class LazyVectorAPITest : public C4Test {
 // 1, 2
 TEST_CASE_METHOD(LazyVectorAPITest, "Lazy Vector isLazy Default False", "[API][.VectorSearch]") {
     auto vectorOpt = vectorOptions(300, 20);
-    CHECK(vectorOpt.lazy == false);
+    CHECK(vectorOpt.lazyEmbedding == false);
 }
 
 // 3
@@ -393,7 +406,7 @@ TEST_CASE_METHOD(LazyVectorAPITest, "BeginUpdate on Non-Vector", "[API][.VectorS
     auto index = REQUIRED(getIndex("value_index"_sl, ERROR_INFO()));
 
     C4Error err{};
-    c4index_beginUpdate(index, 10, &err);
+    auto _ = c4index_beginUpdate(index, 10, &err);
     CHECK(err.code == kC4ErrorUnsupported);
 
     c4index_release(index);
@@ -406,7 +419,7 @@ TEST_CASE_METHOD(LazyVectorAPITest, "BeginUpdate on Non-Lazy Vector", "[API][.Ve
     auto index = REQUIRED(getIndex("nonlazyindex"_sl));
 
     C4Error err{};
-    c4index_beginUpdate(index, 10, &err);
+    auto _ = c4index_beginUpdate(index, 10, &err);
     CHECK(err.code == kC4ErrorUnsupported);
 
     c4index_release(index);

From a4effd9120b98f32937bc62eaee22a476beab650 Mon Sep 17 00:00:00 2001
From: Callum Birks <callumbirks@protonmail.com>
Date: Fri, 21 Jun 2024 14:23:15 +0100
Subject: [PATCH 7/7] Formatting.

---
 LiteCore/tests/LazyVectorAPITest.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/LiteCore/tests/LazyVectorAPITest.cc b/LiteCore/tests/LazyVectorAPITest.cc
index df58d8bee..07c9bf89c 100644
--- a/LiteCore/tests/LazyVectorAPITest.cc
+++ b/LiteCore/tests/LazyVectorAPITest.cc
@@ -333,7 +333,7 @@ class LazyVectorAPITest : public C4Test {
     }
 
     static IndexSpec::VectorOptions vectorOptions(unsigned dimensions, unsigned centroids) {
-        IndexSpec::VectorOptions options(dimensions, vectorsearch::FlatClustering { centroids });
+        IndexSpec::VectorOptions options(dimensions, vectorsearch::FlatClustering{centroids});
         return options;
     }
 
@@ -406,7 +406,7 @@ TEST_CASE_METHOD(LazyVectorAPITest, "BeginUpdate on Non-Vector", "[API][.VectorS
     auto index = REQUIRED(getIndex("value_index"_sl, ERROR_INFO()));
 
     C4Error err{};
-    auto _ = c4index_beginUpdate(index, 10, &err);
+    auto    _ = c4index_beginUpdate(index, 10, &err);
     CHECK(err.code == kC4ErrorUnsupported);
 
     c4index_release(index);
@@ -419,7 +419,7 @@ TEST_CASE_METHOD(LazyVectorAPITest, "BeginUpdate on Non-Lazy Vector", "[API][.Ve
     auto index = REQUIRED(getIndex("nonlazyindex"_sl));
 
     C4Error err{};
-    auto _ = c4index_beginUpdate(index, 10, &err);
+    auto    _ = c4index_beginUpdate(index, 10, &err);
     CHECK(err.code == kC4ErrorUnsupported);
 
     c4index_release(index);