From 674e9144d7808f91d1d3e9ec9f579c5ac3216ff2 Mon Sep 17 00:00:00 2001 From: Mark Koh Date: Tue, 2 Jan 2024 19:03:41 -0700 Subject: [PATCH 01/25] Refactor cpp structure --- .gitmodules | 3 + CONTRIBUTING.md | 18 ++ cpp/.clang-format | 7 + cpp/CMakeLists.txt | 8 + cpp/include/CMakeLists.txt | 3 + cpp/include/Catch2 | 1 + cpp/src/CMakeLists.txt | 6 + cpp/{ => src}/E4M3.h | 0 cpp/{ => src}/Enums.h | 0 cpp/{ => src}/Index.h | 0 cpp/src/Makefile | 140 ++++++++++++++ cpp/{ => src}/Metadata.h | 105 +++++++++-- cpp/{ => src}/Spaces/Euclidean.h | 0 cpp/{ => src}/Spaces/InnerProduct.h | 0 cpp/{ => src}/Spaces/Space.h | 0 cpp/{ => src}/StreamUtils.h | 0 cpp/{ => src}/TypedIndex.h | 0 cpp/{ => src}/array_utils.h | 0 cpp/src/cmake_install.cmake | 39 ++++ cpp/{ => src}/hnswalg.h | 0 cpp/{ => src}/hnswlib.h | 0 cpp/{ => src}/std_utils.h | 0 cpp/{ => src}/visited_list_pool.h | 0 cpp/test/CMakeLists.txt | 12 ++ cpp/test/test_main.cpp | 272 ++++++++++++++++++++++++++++ 25 files changed, 594 insertions(+), 20 deletions(-) create mode 100644 .gitmodules create mode 100644 cpp/.clang-format create mode 100644 cpp/CMakeLists.txt create mode 100644 cpp/include/CMakeLists.txt create mode 160000 cpp/include/Catch2 create mode 100644 cpp/src/CMakeLists.txt rename cpp/{ => src}/E4M3.h (100%) rename cpp/{ => src}/Enums.h (100%) rename cpp/{ => src}/Index.h (100%) create mode 100644 cpp/src/Makefile rename cpp/{ => src}/Metadata.h (52%) rename cpp/{ => src}/Spaces/Euclidean.h (100%) rename cpp/{ => src}/Spaces/InnerProduct.h (100%) rename cpp/{ => src}/Spaces/Space.h (100%) rename cpp/{ => src}/StreamUtils.h (100%) rename cpp/{ => src}/TypedIndex.h (100%) rename cpp/{ => src}/array_utils.h (100%) create mode 100644 cpp/src/cmake_install.cmake rename cpp/{ => src}/hnswalg.h (100%) rename cpp/{ => src}/hnswlib.h (100%) rename cpp/{ => src}/std_utils.h (100%) rename cpp/{ => src}/visited_list_pool.h (100%) create mode 100644 cpp/test/CMakeLists.txt create mode 100644 cpp/test/test_main.cpp diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..7b78b421 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "cpp/include/Catch2"] + path = cpp/include/Catch2 + url = git@github.com:catchorg/Catch2.git diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c4c28b8b..a639d9eb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -83,9 +83,12 @@ We follow the [GitHub Flow Workflow](https://guides.github.com/introduction/flow ## Testing +### Python Tests + We use `tox` for testing - running tests from end-to-end should be as simple as: ``` +cd python pip3 install tox tox ``` @@ -110,6 +113,16 @@ asv continuous --sort name --no-only-changed HEAD main Please note that `airspeed-velocity` can only run benchmarks against a git commit, so if you have uncommited code that you want to run benchmarks for you need to commit it first. +### Java Tests + +We provide java test execution as a maven test step. Thus you can run the tests with: + +```shell +cd java +mvn verify +```` + + ## Style Use [`clang-format`](https://clang.llvm.org/docs/ClangFormat.html) for C++ code, and `black` with defaults for Python code. @@ -123,6 +136,11 @@ tox -e check-formatting tox -e format ``` +For C++ code, you can use the following command to check formatting: +```bash +cd cpp + + ## Issues When creating an issue please try to ahere to the following format: diff --git a/cpp/.clang-format b/cpp/.clang-format new file mode 100644 index 00000000..3cb6857e --- /dev/null +++ b/cpp/.clang-format @@ -0,0 +1,7 @@ +--- +BasedOnStyle: LLVM +IndentWidth: 2 +--- +Language: Cpp +# Use 120 columns since we have big screens now +ColumnLimit: 120 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt new file mode 100644 index 00000000..70565d3b --- /dev/null +++ b/cpp/CMakeLists.txt @@ -0,0 +1,8 @@ +cmake_minimum_required(VERSION 3.20) +project(Voyager) + +set(CMAKE_CXX_STANDARD 17) + +add_subdirectory(include) +add_subdirectory(src) +add_subdirectory(test) diff --git a/cpp/include/CMakeLists.txt b/cpp/include/CMakeLists.txt new file mode 100644 index 00000000..2d465b52 --- /dev/null +++ b/cpp/include/CMakeLists.txt @@ -0,0 +1,3 @@ + +add_subdirectory(Catch2) +include(Catch) diff --git a/cpp/include/Catch2 b/cpp/include/Catch2 new file mode 160000 index 00000000..863c662c --- /dev/null +++ b/cpp/include/Catch2 @@ -0,0 +1 @@ +Subproject commit 863c662c0eff026300f4d729a7054e90d6d12cdd diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt new file mode 100644 index 00000000..28724010 --- /dev/null +++ b/cpp/src/CMakeLists.txt @@ -0,0 +1,6 @@ +add_library(VoyagerLib INTERFACE) +target_include_directories(VoyagerLib + INTERFACE + . + Spaces +) diff --git a/cpp/E4M3.h b/cpp/src/E4M3.h similarity index 100% rename from cpp/E4M3.h rename to cpp/src/E4M3.h diff --git a/cpp/Enums.h b/cpp/src/Enums.h similarity index 100% rename from cpp/Enums.h rename to cpp/src/Enums.h diff --git a/cpp/Index.h b/cpp/src/Index.h similarity index 100% rename from cpp/Index.h rename to cpp/src/Index.h diff --git a/cpp/src/Makefile b/cpp/src/Makefile new file mode 100644 index 00000000..c3253bc6 --- /dev/null +++ b/cpp/src/Makefile @@ -0,0 +1,140 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.28 + +# Default target executed when no arguments are given to make. +default_target: all +.PHONY : default_target + +# Allow only one "make -f Makefile2" at a time, but pass parallelism. +.NOTPARALLEL: + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + +# Disable VCS-based implicit rules. +% : %,v + +# Disable VCS-based implicit rules. +% : RCS/% + +# Disable VCS-based implicit rules. +% : RCS/%,v + +# Disable VCS-based implicit rules. +% : SCCS/s.% + +# Disable VCS-based implicit rules. +% : s.% + +.SUFFIXES: .hpux_make_needs_suffix_list + +# Command-line flag to silence nested $(MAKE). +$(VERBOSE)MAKESILENT = -s + +#Suppress display of executed commands. +$(VERBOSE).SILENT: + +# A target that is always out of date. +cmake_force: +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /opt/homebrew/Cellar/cmake/3.28.1/bin/cmake + +# The command to remove a file. +RM = /opt/homebrew/Cellar/cmake/3.28.1/bin/cmake -E rm -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /Users/markkoh/spotify/voyager/cpp + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /Users/markkoh/spotify/voyager/cpp + +#============================================================================= +# Targets provided globally by CMake. + +# Special rule for the target edit_cache +edit_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Running CMake cache editor..." + /opt/homebrew/Cellar/cmake/3.28.1/bin/ccmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : edit_cache + +# Special rule for the target edit_cache +edit_cache/fast: edit_cache +.PHONY : edit_cache/fast + +# Special rule for the target rebuild_cache +rebuild_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Running CMake to regenerate build system..." + /opt/homebrew/Cellar/cmake/3.28.1/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : rebuild_cache + +# Special rule for the target rebuild_cache +rebuild_cache/fast: rebuild_cache +.PHONY : rebuild_cache/fast + +# The main all target +all: cmake_check_build_system + cd /Users/markkoh/spotify/voyager/cpp && $(CMAKE_COMMAND) -E cmake_progress_start /Users/markkoh/spotify/voyager/cpp/CMakeFiles /Users/markkoh/spotify/voyager/cpp/src//CMakeFiles/progress.marks + cd /Users/markkoh/spotify/voyager/cpp && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 src/all + $(CMAKE_COMMAND) -E cmake_progress_start /Users/markkoh/spotify/voyager/cpp/CMakeFiles 0 +.PHONY : all + +# The main clean target +clean: + cd /Users/markkoh/spotify/voyager/cpp && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 src/clean +.PHONY : clean + +# The main clean target +clean/fast: clean +.PHONY : clean/fast + +# Prepare targets for installation. +preinstall: all + cd /Users/markkoh/spotify/voyager/cpp && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 src/preinstall +.PHONY : preinstall + +# Prepare targets for installation. +preinstall/fast: + cd /Users/markkoh/spotify/voyager/cpp && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 src/preinstall +.PHONY : preinstall/fast + +# clear depends +depend: + cd /Users/markkoh/spotify/voyager/cpp && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 +.PHONY : depend + +# Help Target +help: + @echo "The following are some of the valid targets for this Makefile:" + @echo "... all (the default if no target is provided)" + @echo "... clean" + @echo "... depend" + @echo "... edit_cache" + @echo "... rebuild_cache" +.PHONY : help + + + +#============================================================================= +# Special targets to cleanup operation of make. + +# Special rule to run CMake to check the build system integrity. +# No rule that depends on this can have commands that come from listfiles +# because they might be regenerated. +cmake_check_build_system: + cd /Users/markkoh/spotify/voyager/cpp && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 +.PHONY : cmake_check_build_system + diff --git a/cpp/Metadata.h b/cpp/src/Metadata.h similarity index 52% rename from cpp/Metadata.h rename to cpp/src/Metadata.h index 0032346b..fcdbf5c4 100644 --- a/cpp/Metadata.h +++ b/cpp/src/Metadata.h @@ -21,6 +21,8 @@ * limitations under the License. * -/-/- */ +#include +#include #include "Enums.h" #include "StreamUtils.h" @@ -28,15 +30,14 @@ namespace voyager { namespace Metadata { /** - * @brief A basic metadata class that stores the number of dimensions, - * the SpaceType, StorageDataType, and number of dimensions. + * @brief A basic metadata class that stores the SpaceType, StorageDataType, and + * number of dimensions. */ class V1 { public: - V1(int numDimensions, SpaceType spaceType, StorageDataType storageDataType, - float maxNorm, bool useOrderPreservingTransform) - : numDimensions(numDimensions), spaceType(spaceType), - storageDataType(storageDataType), maxNorm(maxNorm), + V1(int numDimensions, SpaceType spaceType, StorageDataType storageDataType, float maxNorm, + bool useOrderPreservingTransform) + : numDimensions(numDimensions), spaceType(spaceType), storageDataType(storageDataType), maxNorm(maxNorm), useOrderPreservingTransform(useOrderPreservingTransform) {} V1() {} @@ -52,20 +53,12 @@ class V1 { float getMaxNorm() { return maxNorm; } - bool getUseOrderPreservingTransform() const { - return useOrderPreservingTransform; - } - void setUseOrderPreservingTransform(bool newValue) { - useOrderPreservingTransform = newValue; - } + bool getUseOrderPreservingTransform() const { return useOrderPreservingTransform; } + void setUseOrderPreservingTransform(bool newValue) { useOrderPreservingTransform = newValue; } - void setNumDimensions(int newNumDimensions) { - numDimensions = newNumDimensions; - } + void setNumDimensions(int newNumDimensions) { numDimensions = newNumDimensions; } - void setStorageDataType(StorageDataType newStorageDataType) { - storageDataType = newStorageDataType; - } + void setStorageDataType(StorageDataType newStorageDataType) { storageDataType = newStorageDataType; } void setSpaceType(SpaceType newSpaceType) { spaceType = newSpaceType; } @@ -98,8 +91,75 @@ class V1 { bool useOrderPreservingTransform; }; -static std::unique_ptr -loadFromStream(std::shared_ptr inputStream) { +/** + * @brief A basic metadata class that stores the number of dimensions, + * the SpaceType, StorageDataType, and number of dimensions. + */ +class V2 { +public: + V2(int numDimensions, SpaceType spaceType, StorageDataType storageDataType, float maxNorm, + bool useOrderPreservingTransform, vector labels) + : numDimensions(numDimensions), spaceType(spaceType), storageDataType(storageDataType), maxNorm(maxNorm), + useOrderPreservingTransform(useOrderPreservingTransform), labels(labels) {} + + V2() {} + virtual ~V2() {} + + int version() const { return 2; } + + int getNumDimensions() { return numDimensions; } + + StorageDataType getStorageDataType() { return storageDataType; } + + SpaceType getSpaceType() { return spaceType; } + + float getMaxNorm() { return maxNorm; } + + bool getUseOrderPreservingTransform() const { return useOrderPreservingTransform; } + + std::string *getLabels() { return labels; } + + void setUseOrderPreservingTransform(bool newValue) { useOrderPreservingTransform = newValue; } + + void setNumDimensions(int newNumDimensions) { numDimensions = newNumDimensions; } + + void setStorageDataType(StorageDataType newStorageDataType) { storageDataType = newStorageDataType; } + + void setSpaceType(SpaceType newSpaceType) { spaceType = newSpaceType; } + + void setMaxNorm(float newMaxNorm) { maxNorm = newMaxNorm; } + + virtual void serializeToStream(std::shared_ptr stream) { + stream->write("VOYA", 4); + writeBinaryPOD(stream, version()); + writeBinaryPOD(stream, numDimensions); + writeBinaryPOD(stream, spaceType); + writeBinaryPOD(stream, storageDataType); + writeBinaryPOD(stream, maxNorm); + writeBinaryPOD(stream, useOrderPreservingTransform); + writeBinaryPOD(stream, labels); + }; + + virtual void loadFromStream(std::shared_ptr stream) { + // Version has already been loaded before we get here! + readBinaryPOD(stream, numDimensions); + readBinaryPOD(stream, spaceType); + readBinaryPOD(stream, storageDataType); + readBinaryPOD(stream, maxNorm); + readBinaryPOD(stream, useOrderPreservingTransform); + readBinaryPOD(stream, labels); + }; + +private: + int numDimensions; + SpaceType spaceType; + StorageDataType storageDataType; + float maxNorm; + bool useOrderPreservingTransform; + vector labels[]; +}; + +static std::unique_ptr loadFromStream(std::shared_ptr inputStream) { uint32_t header = inputStream->peek(); if (header != 'AYOV') { return nullptr; @@ -117,6 +177,11 @@ loadFromStream(std::shared_ptr inputStream) { metadata->loadFromStream(inputStream); return metadata; } + case 2: { + std::unique_ptr metadata = std::make_unique(); + metadata->loadFromStream(inputStream); + return metadata; + } default: { std::stringstream stream; stream << std::hex << version; diff --git a/cpp/Spaces/Euclidean.h b/cpp/src/Spaces/Euclidean.h similarity index 100% rename from cpp/Spaces/Euclidean.h rename to cpp/src/Spaces/Euclidean.h diff --git a/cpp/Spaces/InnerProduct.h b/cpp/src/Spaces/InnerProduct.h similarity index 100% rename from cpp/Spaces/InnerProduct.h rename to cpp/src/Spaces/InnerProduct.h diff --git a/cpp/Spaces/Space.h b/cpp/src/Spaces/Space.h similarity index 100% rename from cpp/Spaces/Space.h rename to cpp/src/Spaces/Space.h diff --git a/cpp/StreamUtils.h b/cpp/src/StreamUtils.h similarity index 100% rename from cpp/StreamUtils.h rename to cpp/src/StreamUtils.h diff --git a/cpp/TypedIndex.h b/cpp/src/TypedIndex.h similarity index 100% rename from cpp/TypedIndex.h rename to cpp/src/TypedIndex.h diff --git a/cpp/array_utils.h b/cpp/src/array_utils.h similarity index 100% rename from cpp/array_utils.h rename to cpp/src/array_utils.h diff --git a/cpp/src/cmake_install.cmake b/cpp/src/cmake_install.cmake new file mode 100644 index 00000000..66d3bbea --- /dev/null +++ b/cpp/src/cmake_install.cmake @@ -0,0 +1,39 @@ +# Install script for directory: /Users/markkoh/spotify/voyager/cpp/src + +# Set the install prefix +if(NOT DEFINED CMAKE_INSTALL_PREFIX) + set(CMAKE_INSTALL_PREFIX "/usr/local") +endif() +string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") + +# Set the install configuration name. +if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) + if(BUILD_TYPE) + string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" + CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") + else() + set(CMAKE_INSTALL_CONFIG_NAME "") + endif() + message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") +endif() + +# Set the component getting installed. +if(NOT CMAKE_INSTALL_COMPONENT) + if(COMPONENT) + message(STATUS "Install component: \"${COMPONENT}\"") + set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") + else() + set(CMAKE_INSTALL_COMPONENT) + endif() +endif() + +# Is this installation the result of a crosscompile? +if(NOT DEFINED CMAKE_CROSSCOMPILING) + set(CMAKE_CROSSCOMPILING "FALSE") +endif() + +# Set default install directory permissions. +if(NOT DEFINED CMAKE_OBJDUMP) + set(CMAKE_OBJDUMP "/Library/Developer/CommandLineTools/usr/bin/objdump") +endif() + diff --git a/cpp/hnswalg.h b/cpp/src/hnswalg.h similarity index 100% rename from cpp/hnswalg.h rename to cpp/src/hnswalg.h diff --git a/cpp/hnswlib.h b/cpp/src/hnswlib.h similarity index 100% rename from cpp/hnswlib.h rename to cpp/src/hnswlib.h diff --git a/cpp/std_utils.h b/cpp/src/std_utils.h similarity index 100% rename from cpp/std_utils.h rename to cpp/src/std_utils.h diff --git a/cpp/visited_list_pool.h b/cpp/src/visited_list_pool.h similarity index 100% rename from cpp/visited_list_pool.h rename to cpp/src/visited_list_pool.h diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt new file mode 100644 index 00000000..b07bcb7b --- /dev/null +++ b/cpp/test/CMakeLists.txt @@ -0,0 +1,12 @@ +add_executable(test main.cpp) +target_link_libraries(test + PUBLIC + VoyagerLib + PRIVATE + Catch2::Catch2WithMain +) + +target_compile_options(test PRIVATE -O2 -g) + +include(CTest) +catch_discover_tests(test) diff --git a/cpp/test/test_main.cpp b/cpp/test/test_main.cpp new file mode 100644 index 00000000..7a2f9884 --- /dev/null +++ b/cpp/test/test_main.cpp @@ -0,0 +1,272 @@ +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +// TODO: Extract data generation as a function or as a Catch2 Generator + +template struct AllCloseMatcher : Catch::Matchers::MatcherGenericBase { + AllCloseMatcher(const std::vector &a, const float rtol = 1e-7, const float atol = 0) + : a_(a), rtol_(rtol), atol_(atol) {} + + bool match(const std::vector &b) const { + // Could use std::reduce, but early return is most likely faster + if (a_.size() != b.size()) { + return false; + } + // TODO: Replace with Ranges https://en.cppreference.com/w/cpp/ranges + for (int i = 0; i < a_.size(); ++i) { + if (!(std::fabs(a_[i] - b[i]) <= (atol_ + rtol_ * std::fabs(a_[i])))) { + return false; + } + } + return true; + } + + std::string describe() const override { return "IsClose"; } + +private: + const std::vector &a_; + const float atol_; + const float rtol_; +}; + +template +auto AllClose(const std::vector a, const float rtol = 1e-7, const float atol = 0) + -> AllCloseMatcher { + return AllCloseMatcher{a, rtol, atol}; +} + +template std::vector flattenNDArray(NDArray &arr) { + std::vector res(arr.shape[0]); + for (auto i = 0; i < arr.shape[0]; ++i) { + res[i] = arr[i][0]; + } + return res; +}; + +// dist_t, data_t, scalefactor, tolerance + +TEMPLATE_TEST_CASE("create_and_query", + "[index_creation]", + (std::tuple>), + (std::tuple>), + (std::tuple>)) { + auto num_dimensions = GENERATE(4, 16, 128, 128, 256); + auto num_elements = GENERATE(100, 1000); + auto space = GENERATE(SpaceType::Euclidean, SpaceType::Cosine); + + // It's a struggle to include these as std::ratio in the TEMPLATE test case so + // we'll set distance tolerance here. + float distance_tolerance = 0.0; + if (std::is_same::type, struct E4M3>::value) { + distance_tolerance = 0.20; + } else if (std::is_same::type, char>::value) { + distance_tolerance = 0.20; + } else if (std::is_same::type, float>::value) { + distance_tolerance = 2e-6; + } + + SECTION("(num_dimensions, num_elements, space): (" + std::to_string(num_dimensions) + "," + + std::to_string(num_elements) + "," + std::to_string(space) + ")") { + + // Generate a 2D Matrix of test data + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(0.0, 1.0); + auto input_data = std::vector(num_elements * num_dimensions); + std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { + float val = 2 * dis(gen) - 1; + if (std::is_same::type, char>::value) { + val = std::round(val * 127.0f) / 127.0f; + } + return val; + }); + + auto input_array = NDArray(input_data, {num_elements, num_dimensions}); + + // Create Index + auto index = TypedIndex::type, + typename std::tuple_element<1, TestType>::type, + typename std::tuple_element<2, TestType>::type>( + space, num_dimensions, 20, num_elements); + + index.setEF(num_elements); + index.addItems(input_array); + SECTION("Multiple query interface") { + auto [labels, distances] = index.query(input_array); + + if (!std::is_same::type, float>::value) { + auto matches = 0; + // Could be std::reduce or std::accumulate + for (auto row = 0; row < num_elements; ++row) { + matches += labels[row][0] == row; + } + REQUIRE((double)matches / (double)num_elements > 0.5); + } else { + // Could be std::reduce or std::accumulate + std::vector expected(num_elements); + std::iota(expected.begin(), expected.end(), 0); + REQUIRE_THAT(flattenNDArray(labels), AllClose(expected)); + } + + REQUIRE_THAT(flattenNDArray(distances), + AllClose(std::vector(num_elements, 0.0), 1e-7, distance_tolerance)); + } + + SECTION("Single query interface") { + for (auto row = 0; row < num_elements; ++row) { + auto [labels, distances] = + index.query({input_array[row], input_array[row] + num_dimensions}); + if (std::is_same::type, float>::value) { + REQUIRE(labels[0] == row); + } + if(distances[0] >= distance_tolerance) { + float a = 0; + } + REQUIRE(distances[0] < distance_tolerance); + } + } + + SECTION("Saving an index") { + auto output_file = std::tmpfile(); + index.saveIndex(std::make_shared(output_file)); + auto file_byte_count = std::ftell(output_file); + REQUIRE(file_byte_count > 0); + auto memory_output_stream = std::make_shared(); + index.saveIndex(memory_output_stream); + auto index_bytes = memory_output_stream->getValue().size(); + REQUIRE(index_bytes > 0); + REQUIRE(file_byte_count == index_bytes); + } + } +} + +TEST_CASE("Spaces") { + auto [space, expected_distances] = + GENERATE(std::make_tuple>(SpaceType::Euclidean, + {0.0, 1.0, 2.0, 2.0, 2.0}), + std::make_tuple>(SpaceType::InnerProduct, + {-2.0, -1.0, 0.0, 0.0, 0.0}), + std::make_tuple>( + SpaceType::Cosine, {0, 1.835e-1, 4.23e-1, 4.23e-1, 4.23e-1})); + auto right_dimension = GENERATE(Catch::Generators::range(1, 128, 3)); + auto left_dimension = GENERATE(Catch::Generators::range(1, 32, 5)); + + auto num_dimensions = 3; + auto data = NDArray({1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1}, {5, num_dimensions}); + + auto input_data = std::vector(); + for (int i = 0; i < data.shape[0]; ++i) { + std::vector to_insert(left_dimension, 0); + std::vector right(right_dimension, 0); + to_insert.insert(to_insert.end(), data[0], data[0] + data.shape[1]); + to_insert.insert(to_insert.end(), right.begin(), right.end()); + input_data.insert(input_data.end(), to_insert.begin(), to_insert.end()); + } + + num_dimensions = right_dimension + left_dimension + data.shape[1]; + + auto data_2 = NDArray(input_data, {data.shape[0], num_dimensions}); + auto index = TypedIndex(space, num_dimensions, 16, 100); + index.setEF(10); + index.addItems(data_2); + + auto [labels, distances] = index.query( + std::vector(data_2[data_2.shape[0] - 1], data_2[data_2.shape[0] - 1] + num_dimensions), 5); + REQUIRE_THAT(distances, AllClose(expected_distances, 1e-7, 1e-3)); +} + +TEST_CASE("Get Vectors") { + auto num_dimensions = GENERATE(4, 16, 128, 256); + auto num_elements = GENERATE(100, 1000); + auto space = GENERATE(SpaceType::Euclidean, SpaceType::InnerProduct); + + // Generate a 2D Matrix of test data + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(0.0, 1.0); + auto input_data = std::vector(num_elements * num_dimensions); + std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { return 2 * dis(gen) - 1; }); + auto input_array = NDArray(input_data, {num_elements, num_dimensions}); + + auto index = TypedIndex(space, num_dimensions); + auto labels = std::vector(num_elements); + std::iota(labels.begin(), labels.end(), 0); + + REQUIRE_THROWS(index.getVector(labels[0])); + index.addItems(input_array); + + SECTION("Test single vector retrieval") { + for (auto i = 0; i < labels.size(); ++i) { + REQUIRE_THAT(index.getVector(labels[i]), + AllClose(std::vector(input_array[i], input_array[i] + num_dimensions))); + } + } + + SECTION("Test all vectors retrieval") { + auto vectors = index.getVectors(labels); + for (auto i = 0; i < labels.size(); ++i) { + REQUIRE_THAT(std::vector(vectors[i], vectors[i] + num_dimensions), + AllClose(std::vector(input_array[i], input_array[i] + num_dimensions))); + } + } +} + +TEST_CASE("Query EF") { + auto space = GENERATE(SpaceType::Euclidean, SpaceType::InnerProduct); + auto [query_ef, rank_tolerance] = + GENERATE(std::make_tuple(1, 100), std::make_tuple(2, 75), std::make_tuple(100, 1)); + auto num_dimensions = 32; + auto num_elements = 1000; + + // Generate a 2D Matrix of test data + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(0.0, 1.0); + auto input_data = std::vector(num_elements * num_dimensions); + std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { return 2 * dis(gen) - 1; }); + auto input_array = NDArray(input_data, {num_elements, num_dimensions}); + + auto index = TypedIndex(space, num_dimensions, 20, num_elements); + index.setEF(num_elements); + index.addItems(input_array); + + auto [closest_labels_per_vector, _] = index.query(input_array, num_elements, -1, num_elements); + SECTION("Multi query interface") { + auto [labels, _] = index.query(input_array, 1, -1, query_ef); + for (auto i = 0; i < labels.shape[0]; ++i) { + auto returned_label = labels[0][0]; + // Consider doing this in a loop with an early break. + auto label_iter = std::find(closest_labels_per_vector[i], + closest_labels_per_vector[i] + closest_labels_per_vector.shape[1], + returned_label); + auto actual_rank = std::distance(closest_labels_per_vector[i], label_iter); + REQUIRE(actual_rank < rank_tolerance); + } + } + + SECTION("Single query interface") { + for (auto i = 0; i < input_array.shape[0]; ++i) { + auto [returned_labels, _] = + index.query({input_data[i], input_data[i] + num_dimensions}, 1, query_ef); + auto returned_label = returned_labels[0]; + auto label_iter = std::find(closest_labels_per_vector[i], + closest_labels_per_vector[i] + closest_labels_per_vector.shape[1], + returned_label); + auto actual_rank = std::distance(closest_labels_per_vector[i], label_iter); + REQUIRE(actual_rank < rank_tolerance); + } + } +} \ No newline at end of file From 9a68237e5f8a28950bc1cc73a1c8a16b5f033746 Mon Sep 17 00:00:00 2001 From: Mark Koh Date: Mon, 18 Mar 2024 09:07:33 -0600 Subject: [PATCH 02/25] Start to add tests --- cpp/CMakeLists.txt | 2 + cpp/src/Makefile | 140 --------- cpp/src/Metadata.h | 105 ++----- cpp/src/Metadata_new.txt | 207 +++++++++++++ cpp/test/CMakeLists.txt | 2 +- cpp/test/test_main.cpp | 426 ++++++++++++++------------- java/com_spotify_voyager_jni_Index.h | 140 ++++----- 7 files changed, 513 insertions(+), 509 deletions(-) delete mode 100644 cpp/src/Makefile create mode 100644 cpp/src/Metadata_new.txt diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 70565d3b..003d7602 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -2,6 +2,8 @@ cmake_minimum_required(VERSION 3.20) project(Voyager) set(CMAKE_CXX_STANDARD 17) +set(LLVM_CXX_STD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) add_subdirectory(include) add_subdirectory(src) diff --git a/cpp/src/Makefile b/cpp/src/Makefile deleted file mode 100644 index c3253bc6..00000000 --- a/cpp/src/Makefile +++ /dev/null @@ -1,140 +0,0 @@ -# CMAKE generated file: DO NOT EDIT! -# Generated by "Unix Makefiles" Generator, CMake Version 3.28 - -# Default target executed when no arguments are given to make. -default_target: all -.PHONY : default_target - -# Allow only one "make -f Makefile2" at a time, but pass parallelism. -.NOTPARALLEL: - -#============================================================================= -# Special targets provided by cmake. - -# Disable implicit rules so canonical targets will work. -.SUFFIXES: - -# Disable VCS-based implicit rules. -% : %,v - -# Disable VCS-based implicit rules. -% : RCS/% - -# Disable VCS-based implicit rules. -% : RCS/%,v - -# Disable VCS-based implicit rules. -% : SCCS/s.% - -# Disable VCS-based implicit rules. -% : s.% - -.SUFFIXES: .hpux_make_needs_suffix_list - -# Command-line flag to silence nested $(MAKE). -$(VERBOSE)MAKESILENT = -s - -#Suppress display of executed commands. -$(VERBOSE).SILENT: - -# A target that is always out of date. -cmake_force: -.PHONY : cmake_force - -#============================================================================= -# Set environment variables for the build. - -# The shell in which to execute make rules. -SHELL = /bin/sh - -# The CMake executable. -CMAKE_COMMAND = /opt/homebrew/Cellar/cmake/3.28.1/bin/cmake - -# The command to remove a file. -RM = /opt/homebrew/Cellar/cmake/3.28.1/bin/cmake -E rm -f - -# Escaping for special characters. -EQUALS = = - -# The top-level source directory on which CMake was run. -CMAKE_SOURCE_DIR = /Users/markkoh/spotify/voyager/cpp - -# The top-level build directory on which CMake was run. -CMAKE_BINARY_DIR = /Users/markkoh/spotify/voyager/cpp - -#============================================================================= -# Targets provided globally by CMake. - -# Special rule for the target edit_cache -edit_cache: - @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Running CMake cache editor..." - /opt/homebrew/Cellar/cmake/3.28.1/bin/ccmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) -.PHONY : edit_cache - -# Special rule for the target edit_cache -edit_cache/fast: edit_cache -.PHONY : edit_cache/fast - -# Special rule for the target rebuild_cache -rebuild_cache: - @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Running CMake to regenerate build system..." - /opt/homebrew/Cellar/cmake/3.28.1/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) -.PHONY : rebuild_cache - -# Special rule for the target rebuild_cache -rebuild_cache/fast: rebuild_cache -.PHONY : rebuild_cache/fast - -# The main all target -all: cmake_check_build_system - cd /Users/markkoh/spotify/voyager/cpp && $(CMAKE_COMMAND) -E cmake_progress_start /Users/markkoh/spotify/voyager/cpp/CMakeFiles /Users/markkoh/spotify/voyager/cpp/src//CMakeFiles/progress.marks - cd /Users/markkoh/spotify/voyager/cpp && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 src/all - $(CMAKE_COMMAND) -E cmake_progress_start /Users/markkoh/spotify/voyager/cpp/CMakeFiles 0 -.PHONY : all - -# The main clean target -clean: - cd /Users/markkoh/spotify/voyager/cpp && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 src/clean -.PHONY : clean - -# The main clean target -clean/fast: clean -.PHONY : clean/fast - -# Prepare targets for installation. -preinstall: all - cd /Users/markkoh/spotify/voyager/cpp && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 src/preinstall -.PHONY : preinstall - -# Prepare targets for installation. -preinstall/fast: - cd /Users/markkoh/spotify/voyager/cpp && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 src/preinstall -.PHONY : preinstall/fast - -# clear depends -depend: - cd /Users/markkoh/spotify/voyager/cpp && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 -.PHONY : depend - -# Help Target -help: - @echo "The following are some of the valid targets for this Makefile:" - @echo "... all (the default if no target is provided)" - @echo "... clean" - @echo "... depend" - @echo "... edit_cache" - @echo "... rebuild_cache" -.PHONY : help - - - -#============================================================================= -# Special targets to cleanup operation of make. - -# Special rule to run CMake to check the build system integrity. -# No rule that depends on this can have commands that come from listfiles -# because they might be regenerated. -cmake_check_build_system: - cd /Users/markkoh/spotify/voyager/cpp && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 -.PHONY : cmake_check_build_system - diff --git a/cpp/src/Metadata.h b/cpp/src/Metadata.h index fcdbf5c4..0032346b 100644 --- a/cpp/src/Metadata.h +++ b/cpp/src/Metadata.h @@ -21,8 +21,6 @@ * limitations under the License. * -/-/- */ -#include -#include #include "Enums.h" #include "StreamUtils.h" @@ -30,14 +28,15 @@ namespace voyager { namespace Metadata { /** - * @brief A basic metadata class that stores the SpaceType, StorageDataType, and - * number of dimensions. + * @brief A basic metadata class that stores the number of dimensions, + * the SpaceType, StorageDataType, and number of dimensions. */ class V1 { public: - V1(int numDimensions, SpaceType spaceType, StorageDataType storageDataType, float maxNorm, - bool useOrderPreservingTransform) - : numDimensions(numDimensions), spaceType(spaceType), storageDataType(storageDataType), maxNorm(maxNorm), + V1(int numDimensions, SpaceType spaceType, StorageDataType storageDataType, + float maxNorm, bool useOrderPreservingTransform) + : numDimensions(numDimensions), spaceType(spaceType), + storageDataType(storageDataType), maxNorm(maxNorm), useOrderPreservingTransform(useOrderPreservingTransform) {} V1() {} @@ -53,77 +52,20 @@ class V1 { float getMaxNorm() { return maxNorm; } - bool getUseOrderPreservingTransform() const { return useOrderPreservingTransform; } - void setUseOrderPreservingTransform(bool newValue) { useOrderPreservingTransform = newValue; } - - void setNumDimensions(int newNumDimensions) { numDimensions = newNumDimensions; } - - void setStorageDataType(StorageDataType newStorageDataType) { storageDataType = newStorageDataType; } - - void setSpaceType(SpaceType newSpaceType) { spaceType = newSpaceType; } - - void setMaxNorm(float newMaxNorm) { maxNorm = newMaxNorm; } - - virtual void serializeToStream(std::shared_ptr stream) { - stream->write("VOYA", 4); - writeBinaryPOD(stream, version()); - writeBinaryPOD(stream, numDimensions); - writeBinaryPOD(stream, spaceType); - writeBinaryPOD(stream, storageDataType); - writeBinaryPOD(stream, maxNorm); - writeBinaryPOD(stream, useOrderPreservingTransform); - }; - - virtual void loadFromStream(std::shared_ptr stream) { - // Version has already been loaded before we get here! - readBinaryPOD(stream, numDimensions); - readBinaryPOD(stream, spaceType); - readBinaryPOD(stream, storageDataType); - readBinaryPOD(stream, maxNorm); - readBinaryPOD(stream, useOrderPreservingTransform); - }; - -private: - int numDimensions; - SpaceType spaceType; - StorageDataType storageDataType; - float maxNorm; - bool useOrderPreservingTransform; -}; - -/** - * @brief A basic metadata class that stores the number of dimensions, - * the SpaceType, StorageDataType, and number of dimensions. - */ -class V2 { -public: - V2(int numDimensions, SpaceType spaceType, StorageDataType storageDataType, float maxNorm, - bool useOrderPreservingTransform, vector labels) - : numDimensions(numDimensions), spaceType(spaceType), storageDataType(storageDataType), maxNorm(maxNorm), - useOrderPreservingTransform(useOrderPreservingTransform), labels(labels) {} - - V2() {} - virtual ~V2() {} - - int version() const { return 2; } - - int getNumDimensions() { return numDimensions; } - - StorageDataType getStorageDataType() { return storageDataType; } - - SpaceType getSpaceType() { return spaceType; } - - float getMaxNorm() { return maxNorm; } - - bool getUseOrderPreservingTransform() const { return useOrderPreservingTransform; } - - std::string *getLabels() { return labels; } - - void setUseOrderPreservingTransform(bool newValue) { useOrderPreservingTransform = newValue; } + bool getUseOrderPreservingTransform() const { + return useOrderPreservingTransform; + } + void setUseOrderPreservingTransform(bool newValue) { + useOrderPreservingTransform = newValue; + } - void setNumDimensions(int newNumDimensions) { numDimensions = newNumDimensions; } + void setNumDimensions(int newNumDimensions) { + numDimensions = newNumDimensions; + } - void setStorageDataType(StorageDataType newStorageDataType) { storageDataType = newStorageDataType; } + void setStorageDataType(StorageDataType newStorageDataType) { + storageDataType = newStorageDataType; + } void setSpaceType(SpaceType newSpaceType) { spaceType = newSpaceType; } @@ -137,7 +79,6 @@ class V2 { writeBinaryPOD(stream, storageDataType); writeBinaryPOD(stream, maxNorm); writeBinaryPOD(stream, useOrderPreservingTransform); - writeBinaryPOD(stream, labels); }; virtual void loadFromStream(std::shared_ptr stream) { @@ -147,7 +88,6 @@ class V2 { readBinaryPOD(stream, storageDataType); readBinaryPOD(stream, maxNorm); readBinaryPOD(stream, useOrderPreservingTransform); - readBinaryPOD(stream, labels); }; private: @@ -156,10 +96,10 @@ class V2 { StorageDataType storageDataType; float maxNorm; bool useOrderPreservingTransform; - vector labels[]; }; -static std::unique_ptr loadFromStream(std::shared_ptr inputStream) { +static std::unique_ptr +loadFromStream(std::shared_ptr inputStream) { uint32_t header = inputStream->peek(); if (header != 'AYOV') { return nullptr; @@ -177,11 +117,6 @@ static std::unique_ptr loadFromStream(std::shared_ptr metadata->loadFromStream(inputStream); return metadata; } - case 2: { - std::unique_ptr metadata = std::make_unique(); - metadata->loadFromStream(inputStream); - return metadata; - } default: { std::stringstream stream; stream << std::hex << version; diff --git a/cpp/src/Metadata_new.txt b/cpp/src/Metadata_new.txt new file mode 100644 index 00000000..fcdbf5c4 --- /dev/null +++ b/cpp/src/Metadata_new.txt @@ -0,0 +1,207 @@ +#pragma once +/*- + * -\-\- + * voyager + * -- + * Copyright (C) 2016 - 2023 Spotify AB + * + * This file is heavily based on hnswlib (https://github.com/nmslib/hnswlib, + * Apache 2.0-licensed, no copyright author listed) + * -- + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * -/-/- + */ +#include +#include + +#include "Enums.h" +#include "StreamUtils.h" + +namespace voyager { +namespace Metadata { +/** + * @brief A basic metadata class that stores the SpaceType, StorageDataType, and + * number of dimensions. + */ +class V1 { +public: + V1(int numDimensions, SpaceType spaceType, StorageDataType storageDataType, float maxNorm, + bool useOrderPreservingTransform) + : numDimensions(numDimensions), spaceType(spaceType), storageDataType(storageDataType), maxNorm(maxNorm), + useOrderPreservingTransform(useOrderPreservingTransform) {} + + V1() {} + virtual ~V1() {} + + int version() const { return 1; } + + int getNumDimensions() { return numDimensions; } + + StorageDataType getStorageDataType() { return storageDataType; } + + SpaceType getSpaceType() { return spaceType; } + + float getMaxNorm() { return maxNorm; } + + bool getUseOrderPreservingTransform() const { return useOrderPreservingTransform; } + void setUseOrderPreservingTransform(bool newValue) { useOrderPreservingTransform = newValue; } + + void setNumDimensions(int newNumDimensions) { numDimensions = newNumDimensions; } + + void setStorageDataType(StorageDataType newStorageDataType) { storageDataType = newStorageDataType; } + + void setSpaceType(SpaceType newSpaceType) { spaceType = newSpaceType; } + + void setMaxNorm(float newMaxNorm) { maxNorm = newMaxNorm; } + + virtual void serializeToStream(std::shared_ptr stream) { + stream->write("VOYA", 4); + writeBinaryPOD(stream, version()); + writeBinaryPOD(stream, numDimensions); + writeBinaryPOD(stream, spaceType); + writeBinaryPOD(stream, storageDataType); + writeBinaryPOD(stream, maxNorm); + writeBinaryPOD(stream, useOrderPreservingTransform); + }; + + virtual void loadFromStream(std::shared_ptr stream) { + // Version has already been loaded before we get here! + readBinaryPOD(stream, numDimensions); + readBinaryPOD(stream, spaceType); + readBinaryPOD(stream, storageDataType); + readBinaryPOD(stream, maxNorm); + readBinaryPOD(stream, useOrderPreservingTransform); + }; + +private: + int numDimensions; + SpaceType spaceType; + StorageDataType storageDataType; + float maxNorm; + bool useOrderPreservingTransform; +}; + +/** + * @brief A basic metadata class that stores the number of dimensions, + * the SpaceType, StorageDataType, and number of dimensions. + */ +class V2 { +public: + V2(int numDimensions, SpaceType spaceType, StorageDataType storageDataType, float maxNorm, + bool useOrderPreservingTransform, vector labels) + : numDimensions(numDimensions), spaceType(spaceType), storageDataType(storageDataType), maxNorm(maxNorm), + useOrderPreservingTransform(useOrderPreservingTransform), labels(labels) {} + + V2() {} + virtual ~V2() {} + + int version() const { return 2; } + + int getNumDimensions() { return numDimensions; } + + StorageDataType getStorageDataType() { return storageDataType; } + + SpaceType getSpaceType() { return spaceType; } + + float getMaxNorm() { return maxNorm; } + + bool getUseOrderPreservingTransform() const { return useOrderPreservingTransform; } + + std::string *getLabels() { return labels; } + + void setUseOrderPreservingTransform(bool newValue) { useOrderPreservingTransform = newValue; } + + void setNumDimensions(int newNumDimensions) { numDimensions = newNumDimensions; } + + void setStorageDataType(StorageDataType newStorageDataType) { storageDataType = newStorageDataType; } + + void setSpaceType(SpaceType newSpaceType) { spaceType = newSpaceType; } + + void setMaxNorm(float newMaxNorm) { maxNorm = newMaxNorm; } + + virtual void serializeToStream(std::shared_ptr stream) { + stream->write("VOYA", 4); + writeBinaryPOD(stream, version()); + writeBinaryPOD(stream, numDimensions); + writeBinaryPOD(stream, spaceType); + writeBinaryPOD(stream, storageDataType); + writeBinaryPOD(stream, maxNorm); + writeBinaryPOD(stream, useOrderPreservingTransform); + writeBinaryPOD(stream, labels); + }; + + virtual void loadFromStream(std::shared_ptr stream) { + // Version has already been loaded before we get here! + readBinaryPOD(stream, numDimensions); + readBinaryPOD(stream, spaceType); + readBinaryPOD(stream, storageDataType); + readBinaryPOD(stream, maxNorm); + readBinaryPOD(stream, useOrderPreservingTransform); + readBinaryPOD(stream, labels); + }; + +private: + int numDimensions; + SpaceType spaceType; + StorageDataType storageDataType; + float maxNorm; + bool useOrderPreservingTransform; + vector labels[]; +}; + +static std::unique_ptr loadFromStream(std::shared_ptr inputStream) { + uint32_t header = inputStream->peek(); + if (header != 'AYOV') { + return nullptr; + } + + // Actually read instead of just peeking: + inputStream->read((char *)&header, sizeof(header)); + + int version; + readBinaryPOD(inputStream, version); + + switch (version) { + case 1: { + std::unique_ptr metadata = std::make_unique(); + metadata->loadFromStream(inputStream); + return metadata; + } + case 2: { + std::unique_ptr metadata = std::make_unique(); + metadata->loadFromStream(inputStream); + return metadata; + } + default: { + std::stringstream stream; + stream << std::hex << version; + std::string resultAsHex(stream.str()); + + std::string error = "Unable to parse version of Voyager index file; found " + "unsupported version \"0x" + + resultAsHex + "\"."; + + if (version < 20) { + error += " A newer version of the Voyager library may be able to read " + "this index."; + } else { + error += " This index may be corrupted (or not a Voyager index)."; + } + + throw std::domain_error(error); + } + } +}; + +} // namespace Metadata +}; // namespace voyager \ No newline at end of file diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index b07bcb7b..a153e455 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -1,4 +1,4 @@ -add_executable(test main.cpp) +add_executable(test test_main.cpp) target_link_libraries(test PUBLIC VoyagerLib diff --git a/cpp/test/test_main.cpp b/cpp/test/test_main.cpp index 7a2f9884..1ad0f2e2 100644 --- a/cpp/test/test_main.cpp +++ b/cpp/test/test_main.cpp @@ -56,217 +56,231 @@ template std::vector flattenNDArray(NDArray &arr) { return res; }; -// dist_t, data_t, scalefactor, tolerance - -TEMPLATE_TEST_CASE("create_and_query", - "[index_creation]", - (std::tuple>), - (std::tuple>), - (std::tuple>)) { - auto num_dimensions = GENERATE(4, 16, 128, 128, 256); +TEST_CASE("Basic init") { + auto space = GENERATE(SpaceType::Euclidean, SpaceType::InnerProduct); + auto num_dimensions = GENERATE(4, 16, 128, 256); auto num_elements = GENERATE(100, 1000); - auto space = GENERATE(SpaceType::Euclidean, SpaceType::Cosine); - - // It's a struggle to include these as std::ratio in the TEMPLATE test case so - // we'll set distance tolerance here. - float distance_tolerance = 0.0; - if (std::is_same::type, struct E4M3>::value) { - distance_tolerance = 0.20; - } else if (std::is_same::type, char>::value) { - distance_tolerance = 0.20; - } else if (std::is_same::type, float>::value) { - distance_tolerance = 2e-6; - } SECTION("(num_dimensions, num_elements, space): (" + std::to_string(num_dimensions) + "," + std::to_string(num_elements) + "," + std::to_string(space) + ")") { - - // Generate a 2D Matrix of test data - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution dis(0.0, 1.0); - auto input_data = std::vector(num_elements * num_dimensions); - std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { - float val = 2 * dis(gen) - 1; - if (std::is_same::type, char>::value) { - val = std::round(val * 127.0f) / 127.0f; - } - return val; - }); - - auto input_array = NDArray(input_data, {num_elements, num_dimensions}); - - // Create Index - auto index = TypedIndex::type, - typename std::tuple_element<1, TestType>::type, - typename std::tuple_element<2, TestType>::type>( - space, num_dimensions, 20, num_elements); - - index.setEF(num_elements); - index.addItems(input_array); - SECTION("Multiple query interface") { - auto [labels, distances] = index.query(input_array); - - if (!std::is_same::type, float>::value) { - auto matches = 0; - // Could be std::reduce or std::accumulate - for (auto row = 0; row < num_elements; ++row) { - matches += labels[row][0] == row; - } - REQUIRE((double)matches / (double)num_elements > 0.5); - } else { - // Could be std::reduce or std::accumulate - std::vector expected(num_elements); - std::iota(expected.begin(), expected.end(), 0); - REQUIRE_THAT(flattenNDArray(labels), AllClose(expected)); - } - - REQUIRE_THAT(flattenNDArray(distances), - AllClose(std::vector(num_elements, 0.0), 1e-7, distance_tolerance)); - } - - SECTION("Single query interface") { - for (auto row = 0; row < num_elements; ++row) { - auto [labels, distances] = - index.query({input_array[row], input_array[row] + num_dimensions}); - if (std::is_same::type, float>::value) { - REQUIRE(labels[0] == row); - } - if(distances[0] >= distance_tolerance) { - float a = 0; - } - REQUIRE(distances[0] < distance_tolerance); - } - } - - SECTION("Saving an index") { - auto output_file = std::tmpfile(); - index.saveIndex(std::make_shared(output_file)); - auto file_byte_count = std::ftell(output_file); - REQUIRE(file_byte_count > 0); - auto memory_output_stream = std::make_shared(); - index.saveIndex(memory_output_stream); - auto index_bytes = memory_output_stream->getValue().size(); - REQUIRE(index_bytes > 0); - REQUIRE(file_byte_count == index_bytes); - } - } -} - -TEST_CASE("Spaces") { - auto [space, expected_distances] = - GENERATE(std::make_tuple>(SpaceType::Euclidean, - {0.0, 1.0, 2.0, 2.0, 2.0}), - std::make_tuple>(SpaceType::InnerProduct, - {-2.0, -1.0, 0.0, 0.0, 0.0}), - std::make_tuple>( - SpaceType::Cosine, {0, 1.835e-1, 4.23e-1, 4.23e-1, 4.23e-1})); - auto right_dimension = GENERATE(Catch::Generators::range(1, 128, 3)); - auto left_dimension = GENERATE(Catch::Generators::range(1, 32, 5)); - - auto num_dimensions = 3; - auto data = NDArray({1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1}, {5, num_dimensions}); - - auto input_data = std::vector(); - for (int i = 0; i < data.shape[0]; ++i) { - std::vector to_insert(left_dimension, 0); - std::vector right(right_dimension, 0); - to_insert.insert(to_insert.end(), data[0], data[0] + data.shape[1]); - to_insert.insert(to_insert.end(), right.begin(), right.end()); - input_data.insert(input_data.end(), to_insert.begin(), to_insert.end()); - } - - num_dimensions = right_dimension + left_dimension + data.shape[1]; - - auto data_2 = NDArray(input_data, {data.shape[0], num_dimensions}); - auto index = TypedIndex(space, num_dimensions, 16, 100); - index.setEF(10); - index.addItems(data_2); - - auto [labels, distances] = index.query( - std::vector(data_2[data_2.shape[0] - 1], data_2[data_2.shape[0] - 1] + num_dimensions), 5); - REQUIRE_THAT(distances, AllClose(expected_distances, 1e-7, 1e-3)); -} - -TEST_CASE("Get Vectors") { - auto num_dimensions = GENERATE(4, 16, 128, 256); - auto num_elements = GENERATE(100, 1000); - auto space = GENERATE(SpaceType::Euclidean, SpaceType::InnerProduct); - - // Generate a 2D Matrix of test data - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution dis(0.0, 1.0); - auto input_data = std::vector(num_elements * num_dimensions); - std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { return 2 * dis(gen) - 1; }); - auto input_array = NDArray(input_data, {num_elements, num_dimensions}); - - auto index = TypedIndex(space, num_dimensions); - auto labels = std::vector(num_elements); - std::iota(labels.begin(), labels.end(), 0); - - REQUIRE_THROWS(index.getVector(labels[0])); - index.addItems(input_array); - - SECTION("Test single vector retrieval") { - for (auto i = 0; i < labels.size(); ++i) { - REQUIRE_THAT(index.getVector(labels[i]), - AllClose(std::vector(input_array[i], input_array[i] + num_dimensions))); - } - } - - SECTION("Test all vectors retrieval") { - auto vectors = index.getVectors(labels); - for (auto i = 0; i < labels.size(); ++i) { - REQUIRE_THAT(std::vector(vectors[i], vectors[i] + num_dimensions), - AllClose(std::vector(input_array[i], input_array[i] + num_dimensions))); - } + auto index = TypedIndex(space, num_dimensions); + REQUIRE(index.getSpace() == space); + REQUIRE(index.getStorageDataType() == StorageDataType::Float32); + REQUIRE(index.getNumDimensions() == num_dimensions); } } -TEST_CASE("Query EF") { - auto space = GENERATE(SpaceType::Euclidean, SpaceType::InnerProduct); - auto [query_ef, rank_tolerance] = - GENERATE(std::make_tuple(1, 100), std::make_tuple(2, 75), std::make_tuple(100, 1)); - auto num_dimensions = 32; - auto num_elements = 1000; - - // Generate a 2D Matrix of test data - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution dis(0.0, 1.0); - auto input_data = std::vector(num_elements * num_dimensions); - std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { return 2 * dis(gen) - 1; }); - auto input_array = NDArray(input_data, {num_elements, num_dimensions}); - - auto index = TypedIndex(space, num_dimensions, 20, num_elements); - index.setEF(num_elements); - index.addItems(input_array); - - auto [closest_labels_per_vector, _] = index.query(input_array, num_elements, -1, num_elements); - SECTION("Multi query interface") { - auto [labels, _] = index.query(input_array, 1, -1, query_ef); - for (auto i = 0; i < labels.shape[0]; ++i) { - auto returned_label = labels[0][0]; - // Consider doing this in a loop with an early break. - auto label_iter = std::find(closest_labels_per_vector[i], - closest_labels_per_vector[i] + closest_labels_per_vector.shape[1], - returned_label); - auto actual_rank = std::distance(closest_labels_per_vector[i], label_iter); - REQUIRE(actual_rank < rank_tolerance); - } - } +// dist_t, data_t, scalefactor, tolerance - SECTION("Single query interface") { - for (auto i = 0; i < input_array.shape[0]; ++i) { - auto [returned_labels, _] = - index.query({input_data[i], input_data[i] + num_dimensions}, 1, query_ef); - auto returned_label = returned_labels[0]; - auto label_iter = std::find(closest_labels_per_vector[i], - closest_labels_per_vector[i] + closest_labels_per_vector.shape[1], - returned_label); - auto actual_rank = std::distance(closest_labels_per_vector[i], label_iter); - REQUIRE(actual_rank < rank_tolerance); - } - } -} \ No newline at end of file +// TEMPLATE_TEST_CASE("create_and_query", +// "[index_creation]", +// (std::tuple>), +// (std::tuple>), +// (std::tuple>)) { +// auto num_dimensions = GENERATE(4, 16, 128, 128, 256); +// auto num_elements = GENERATE(100, 1000); +// auto space = GENERATE(SpaceType::Euclidean, SpaceType::Cosine); + +// // It's a struggle to include these as std::ratio in the TEMPLATE test case so +// // we'll set distance tolerance here. +// float distance_tolerance = 0.0; +// if (std::is_same::type, struct E4M3>::value) { +// distance_tolerance = 0.20; +// } else if (std::is_same::type, char>::value) { +// distance_tolerance = 0.20; +// } else if (std::is_same::type, float>::value) { +// distance_tolerance = 2e-6; +// } + +// SECTION("(num_dimensions, num_elements, space): (" + std::to_string(num_dimensions) + "," + +// std::to_string(num_elements) + "," + std::to_string(space) + ")") { + +// // Generate a 2D Matrix of test data +// std::random_device rd; +// std::mt19937 gen(rd()); +// std::uniform_real_distribution dis(0.0, 1.0); +// auto input_data = std::vector(num_elements * num_dimensions); +// std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { +// float val = 2 * dis(gen) - 1; +// if (std::is_same::type, char>::value) { +// val = std::round(val * 127.0f) / 127.0f; +// } +// return val; +// }); + +// auto input_array = NDArray(input_data, {num_elements, num_dimensions}); + +// // Create Index +// auto index = TypedIndex::type, +// typename std::tuple_element<1, TestType>::type, +// typename std::tuple_element<2, TestType>::type>( +// space, num_dimensions, 20, num_elements); + +// index.setEF(num_elements); +// index.addItems(input_array); +// SECTION("Multiple query interface") { +// auto [labels, distances] = index.query(input_array); + +// if (!std::is_same::type, float>::value) { +// auto matches = 0; +// // Could be std::reduce or std::accumulate +// for (auto row = 0; row < num_elements; ++row) { +// matches += labels[row][0] == row; +// } +// REQUIRE((double)matches / (double)num_elements > 0.5); +// } else { +// // Could be std::reduce or std::accumulate +// std::vector expected(num_elements); +// std::iota(expected.begin(), expected.end(), 0); +// REQUIRE_THAT(flattenNDArray(labels), AllClose(expected)); +// } + +// REQUIRE_THAT(flattenNDArray(distances), +// AllClose(std::vector(num_elements, 0.0), 1e-7, distance_tolerance)); +// } + +// SECTION("Single query interface") { +// for (auto row = 0; row < num_elements; ++row) { +// auto [labels, distances] = +// index.query({input_array[row], input_array[row] + num_dimensions}); +// if (std::is_same::type, float>::value) { +// REQUIRE(labels[0] == row); +// } +// if(distances[0] >= distance_tolerance) { +// float a = 0; +// } +// REQUIRE(distances[0] < distance_tolerance); +// } +// } + +// // SECTION("Saving an index") { +// // auto output_file = std::tmpfile(); +// // index.saveIndex(std::make_shared(output_file)); +// // auto file_byte_count = std::ftell(output_file); +// // REQUIRE(file_byte_count > 0); +// // auto memory_output_stream = std::make_shared(); +// // index.saveIndex(memory_output_stream); +// // auto index_bytes = memory_output_stream->getValue().size(); +// // REQUIRE(index_bytes > 0); +// // REQUIRE(file_byte_count == index_bytes); +// // } +// } +// } + +// TEST_CASE("Spaces") { +// auto [space, expected_distances] = +// GENERATE(std::make_tuple>(SpaceType::Euclidean, +// {0.0, 1.0, 2.0, 2.0, 2.0}), +// std::make_tuple>(SpaceType::InnerProduct, +// {-2.0, -1.0, 0.0, 0.0, 0.0}), +// std::make_tuple>( +// SpaceType::Cosine, {0, 1.835e-1, 4.23e-1, 4.23e-1, 4.23e-1})); +// auto right_dimension = GENERATE(Catch::Generators::range(1, 128, 3)); +// auto left_dimension = GENERATE(Catch::Generators::range(1, 32, 5)); + +// auto num_dimensions = 3; +// auto data = NDArray({1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1}, {5, num_dimensions}); + +// auto input_data = std::vector(); +// for (int i = 0; i < data.shape[0]; ++i) { +// std::vector to_insert(left_dimension, 0); +// std::vector right(right_dimension, 0); +// to_insert.insert(to_insert.end(), data[0], data[0] + data.shape[1]); +// to_insert.insert(to_insert.end(), right.begin(), right.end()); +// input_data.insert(input_data.end(), to_insert.begin(), to_insert.end()); +// } + +// num_dimensions = right_dimension + left_dimension + data.shape[1]; + +// auto data_2 = NDArray(input_data, {data.shape[0], num_dimensions}); +// auto index = TypedIndex(space, num_dimensions, 16, 100); +// index.setEF(10); +// index.addItems(data_2); + +// auto [labels, distances] = index.query( +// std::vector(data_2[data_2.shape[0] - 1], data_2[data_2.shape[0] - 1] + num_dimensions), 5); +// REQUIRE_THAT(distances, AllClose(expected_distances, 1e-7, 1e-3)); +// } + +// TEST_CASE("Get Vectors") { +// auto num_dimensions = GENERATE(4, 16, 128, 256); +// auto num_elements = GENERATE(100, 1000); +// auto space = GENERATE(SpaceType::Euclidean, SpaceType::InnerProduct); + +// // Generate a 2D Matrix of test data +// std::random_device rd; +// std::mt19937 gen(rd()); +// std::uniform_real_distribution dis(0.0, 1.0); +// auto input_data = std::vector(num_elements * num_dimensions); +// std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { return 2 * dis(gen) - 1; }); +// auto input_array = NDArray(input_data, {num_elements, num_dimensions}); + +// auto index = TypedIndex(space, num_dimensions); +// auto labels = std::vector(num_elements); +// std::iota(labels.begin(), labels.end(), 0); + +// REQUIRE_THROWS(index.getVector(labels[0])); +// index.addItems(input_array); + +// SECTION("Test single vector retrieval") { +// for (auto i = 0; i < labels.size(); ++i) { +// REQUIRE_THAT(index.getVector(labels[i]), +// AllClose(std::vector(input_array[i], input_array[i] + num_dimensions))); +// } +// } + +// SECTION("Test all vectors retrieval") { +// auto vectors = index.getVectors(labels); +// for (auto i = 0; i < labels.size(); ++i) { +// REQUIRE_THAT(std::vector(vectors[i], vectors[i] + num_dimensions), +// AllClose(std::vector(input_array[i], input_array[i] + num_dimensions))); +// } +// } +// } + +// TEST_CASE("Query EF") { +// auto space = GENERATE(SpaceType::Euclidean, SpaceType::InnerProduct); +// auto [query_ef, rank_tolerance] = +// GENERATE(std::make_tuple(1, 100), std::make_tuple(2, 75), std::make_tuple(100, 1)); +// auto num_dimensions = 32; +// auto num_elements = 1000; + +// // Generate a 2D Matrix of test data +// std::random_device rd; +// std::mt19937 gen(rd()); +// std::uniform_real_distribution dis(0.0, 1.0); +// auto input_data = std::vector(num_elements * num_dimensions); +// std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { return 2 * dis(gen) - 1; }); +// auto input_array = NDArray(input_data, {num_elements, num_dimensions}); + +// auto index = TypedIndex(space, num_dimensions, 20, num_elements); +// index.setEF(num_elements); +// index.addItems(input_array); + +// auto [closest_labels_per_vector, _] = index.query(input_array, num_elements, -1, num_elements); +// SECTION("Multi query interface") { +// auto [labels, _] = index.query(input_array, 1, -1, query_ef); +// for (auto i = 0; i < labels.shape[0]; ++i) { +// auto returned_label = labels[0][0]; +// // Consider doing this in a loop with an early break. +// auto label_iter = std::find(closest_labels_per_vector[i], +// closest_labels_per_vector[i] + closest_labels_per_vector.shape[1], +// returned_label); +// auto actual_rank = std::distance(closest_labels_per_vector[i], label_iter); +// REQUIRE(actual_rank < rank_tolerance); +// } +// } + +// SECTION("Single query interface") { +// for (auto i = 0; i < input_array.shape[0]; ++i) { +// auto [returned_labels, _] = +// index.query({input_data[i], input_data[i] + num_dimensions}, 1, query_ef); +// auto returned_label = returned_labels[0]; +// auto label_iter = std::find(closest_labels_per_vector[i], +// closest_labels_per_vector[i] + closest_labels_per_vector.shape[1], +// returned_label); +// auto actual_rank = std::distance(closest_labels_per_vector[i], label_iter); +// REQUIRE(actual_rank < rank_tolerance); +// } +// } +// } \ No newline at end of file diff --git a/java/com_spotify_voyager_jni_Index.h b/java/com_spotify_voyager_jni_Index.h index 25587917..c10ae6ed 100644 --- a/java/com_spotify_voyager_jni_Index.h +++ b/java/com_spotify_voyager_jni_Index.h @@ -10,256 +10,242 @@ extern "C" { /* * Class: com_spotify_voyager_jni_Index * Method: nativeConstructor - * Signature: - * (Lcom/spotify/voyager/jni/Index/SpaceType;IJJJJLcom/spotify/voyager/jni/Index/StorageDataType;)V + * Signature: (Lcom/spotify/voyager/jni/Index/SpaceType;IJJJJLcom/spotify/voyager/jni/Index/StorageDataType;)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeConstructor( - JNIEnv *, jobject, jobject, jint, jlong, jlong, jlong, jlong, jobject); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeConstructor + (JNIEnv *, jobject, jobject, jint, jlong, jlong, jlong, jlong, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: nativeLoadFromFileWithParameters - * Signature: - * (Ljava/lang/String;Lcom/spotify/voyager/jni/Index/SpaceType;ILcom/spotify/voyager/jni/Index/StorageDataType;)V + * Signature: (Ljava/lang/String;Lcom/spotify/voyager/jni/Index/SpaceType;ILcom/spotify/voyager/jni/Index/StorageDataType;)V */ -JNIEXPORT void JNICALL -Java_com_spotify_voyager_jni_Index_nativeLoadFromFileWithParameters( - JNIEnv *, jobject, jstring, jobject, jint, jobject); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeLoadFromFileWithParameters + (JNIEnv *, jobject, jstring, jobject, jint, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: nativeLoadFromFile * Signature: (Ljava/lang/String;)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeLoadFromFile( - JNIEnv *, jobject, jstring); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeLoadFromFile + (JNIEnv *, jobject, jstring); /* * Class: com_spotify_voyager_jni_Index * Method: nativeLoadFromInputStreamWithParameters - * Signature: - * (Ljava/io/InputStream;Lcom/spotify/voyager/jni/Index/SpaceType;ILcom/spotify/voyager/jni/Index/StorageDataType;)V + * Signature: (Ljava/io/InputStream;Lcom/spotify/voyager/jni/Index/SpaceType;ILcom/spotify/voyager/jni/Index/StorageDataType;)V */ -JNIEXPORT void JNICALL -Java_com_spotify_voyager_jni_Index_nativeLoadFromInputStreamWithParameters( - JNIEnv *, jobject, jobject, jobject, jint, jobject); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeLoadFromInputStreamWithParameters + (JNIEnv *, jobject, jobject, jobject, jint, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: nativeLoadFromInputStream * Signature: (Ljava/io/InputStream;)V */ -JNIEXPORT void JNICALL -Java_com_spotify_voyager_jni_Index_nativeLoadFromInputStream(JNIEnv *, jobject, - jobject); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeLoadFromInputStream + (JNIEnv *, jobject, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: nativeDestructor * Signature: ()V */ -JNIEXPORT void JNICALL -Java_com_spotify_voyager_jni_Index_nativeDestructor(JNIEnv *, jobject); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeDestructor + (JNIEnv *, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: setEf * Signature: (J)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_setEf(JNIEnv *, - jobject, jlong); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_setEf + (JNIEnv *, jobject, jlong); /* * Class: com_spotify_voyager_jni_Index * Method: getEf * Signature: ()I */ -JNIEXPORT jint JNICALL Java_com_spotify_voyager_jni_Index_getEf(JNIEnv *, - jobject); +JNIEXPORT jint JNICALL Java_com_spotify_voyager_jni_Index_getEf + (JNIEnv *, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: getSpace * Signature: ()Lcom/spotify/voyager/jni/Index/SpaceType; */ -JNIEXPORT jobject JNICALL Java_com_spotify_voyager_jni_Index_getSpace(JNIEnv *, - jobject); +JNIEXPORT jobject JNICALL Java_com_spotify_voyager_jni_Index_getSpace + (JNIEnv *, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: getNumDimensions * Signature: ()I */ -JNIEXPORT jint JNICALL -Java_com_spotify_voyager_jni_Index_getNumDimensions(JNIEnv *, jobject); +JNIEXPORT jint JNICALL Java_com_spotify_voyager_jni_Index_getNumDimensions + (JNIEnv *, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: setNumThreads * Signature: (I)V */ -JNIEXPORT void JNICALL -Java_com_spotify_voyager_jni_Index_setNumThreads(JNIEnv *, jobject, jint); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_setNumThreads + (JNIEnv *, jobject, jint); /* * Class: com_spotify_voyager_jni_Index * Method: getNumThreads * Signature: ()I */ -JNIEXPORT jint JNICALL -Java_com_spotify_voyager_jni_Index_getNumThreads(JNIEnv *, jobject); +JNIEXPORT jint JNICALL Java_com_spotify_voyager_jni_Index_getNumThreads + (JNIEnv *, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: saveIndex * Signature: (Ljava/lang/String;)V */ -JNIEXPORT void JNICALL -Java_com_spotify_voyager_jni_Index_saveIndex__Ljava_lang_String_2(JNIEnv *, - jobject, - jstring); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_saveIndex__Ljava_lang_String_2 + (JNIEnv *, jobject, jstring); /* * Class: com_spotify_voyager_jni_Index * Method: saveIndex * Signature: (Ljava/io/OutputStream;)V */ -JNIEXPORT void JNICALL -Java_com_spotify_voyager_jni_Index_saveIndex__Ljava_io_OutputStream_2(JNIEnv *, - jobject, - jobject); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_saveIndex__Ljava_io_OutputStream_2 + (JNIEnv *, jobject, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: addItem * Signature: ([F)V */ -JNIEXPORT void JNICALL -Java_com_spotify_voyager_jni_Index_addItem___3F(JNIEnv *, jobject, jfloatArray); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_addItem___3F + (JNIEnv *, jobject, jfloatArray); /* * Class: com_spotify_voyager_jni_Index * Method: addItem * Signature: ([FJ)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_addItem___3FJ( - JNIEnv *, jobject, jfloatArray, jlong); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_addItem___3FJ + (JNIEnv *, jobject, jfloatArray, jlong); /* * Class: com_spotify_voyager_jni_Index * Method: addItems * Signature: ([[FI)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_addItems___3_3FI( - JNIEnv *, jobject, jobjectArray, jint); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_addItems___3_3FI + (JNIEnv *, jobject, jobjectArray, jint); /* * Class: com_spotify_voyager_jni_Index * Method: addItems * Signature: ([[F[JI)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_addItems___3_3F_3JI( - JNIEnv *, jobject, jobjectArray, jlongArray, jint); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_addItems___3_3F_3JI + (JNIEnv *, jobject, jobjectArray, jlongArray, jint); /* * Class: com_spotify_voyager_jni_Index * Method: getVector * Signature: (J)[F */ -JNIEXPORT jfloatArray JNICALL -Java_com_spotify_voyager_jni_Index_getVector(JNIEnv *, jobject, jlong); +JNIEXPORT jfloatArray JNICALL Java_com_spotify_voyager_jni_Index_getVector + (JNIEnv *, jobject, jlong); /* * Class: com_spotify_voyager_jni_Index * Method: getVectors * Signature: ([J)[[F */ -JNIEXPORT jobjectArray JNICALL -Java_com_spotify_voyager_jni_Index_getVectors(JNIEnv *, jobject, jlongArray); +JNIEXPORT jobjectArray JNICALL Java_com_spotify_voyager_jni_Index_getVectors + (JNIEnv *, jobject, jlongArray); /* * Class: com_spotify_voyager_jni_Index * Method: getIDs * Signature: ()[J */ -JNIEXPORT jlongArray JNICALL Java_com_spotify_voyager_jni_Index_getIDs(JNIEnv *, - jobject); +JNIEXPORT jlongArray JNICALL Java_com_spotify_voyager_jni_Index_getIDs + (JNIEnv *, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: query * Signature: ([FIJ)Lcom/spotify/voyager/jni/Index/QueryResults; */ -JNIEXPORT jobject JNICALL Java_com_spotify_voyager_jni_Index_query___3FIJ( - JNIEnv *, jobject, jfloatArray, jint, jlong); +JNIEXPORT jobject JNICALL Java_com_spotify_voyager_jni_Index_query___3FIJ + (JNIEnv *, jobject, jfloatArray, jint, jlong); /* * Class: com_spotify_voyager_jni_Index * Method: query * Signature: ([[FIIJ)[Lcom/spotify/voyager/jni/Index/QueryResults; */ -JNIEXPORT jobjectArray JNICALL -Java_com_spotify_voyager_jni_Index_query___3_3FIIJ(JNIEnv *, jobject, - jobjectArray, jint, jint, - jlong); +JNIEXPORT jobjectArray JNICALL Java_com_spotify_voyager_jni_Index_query___3_3FIIJ + (JNIEnv *, jobject, jobjectArray, jint, jint, jlong); /* * Class: com_spotify_voyager_jni_Index * Method: markDeleted * Signature: (J)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_markDeleted(JNIEnv *, - jobject, - jlong); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_markDeleted + (JNIEnv *, jobject, jlong); /* * Class: com_spotify_voyager_jni_Index * Method: unmarkDeleted * Signature: (J)V */ -JNIEXPORT void JNICALL -Java_com_spotify_voyager_jni_Index_unmarkDeleted(JNIEnv *, jobject, jlong); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_unmarkDeleted + (JNIEnv *, jobject, jlong); /* * Class: com_spotify_voyager_jni_Index * Method: resizeIndex * Signature: (J)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_resizeIndex(JNIEnv *, - jobject, - jlong); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_resizeIndex + (JNIEnv *, jobject, jlong); /* * Class: com_spotify_voyager_jni_Index * Method: getMaxElements * Signature: ()J */ -JNIEXPORT jlong JNICALL -Java_com_spotify_voyager_jni_Index_getMaxElements(JNIEnv *, jobject); +JNIEXPORT jlong JNICALL Java_com_spotify_voyager_jni_Index_getMaxElements + (JNIEnv *, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: getNumElements * Signature: ()J */ -JNIEXPORT jlong JNICALL -Java_com_spotify_voyager_jni_Index_getNumElements(JNIEnv *, jobject); +JNIEXPORT jlong JNICALL Java_com_spotify_voyager_jni_Index_getNumElements + (JNIEnv *, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: getEfConstruction * Signature: ()J */ -JNIEXPORT jlong JNICALL -Java_com_spotify_voyager_jni_Index_getEfConstruction(JNIEnv *, jobject); +JNIEXPORT jlong JNICALL Java_com_spotify_voyager_jni_Index_getEfConstruction + (JNIEnv *, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: getM * Signature: ()J */ -JNIEXPORT jlong JNICALL Java_com_spotify_voyager_jni_Index_getM(JNIEnv *, - jobject); +JNIEXPORT jlong JNICALL Java_com_spotify_voyager_jni_Index_getM + (JNIEnv *, jobject); #ifdef __cplusplus } From 059c7bd081ab064f52b43313ec0c1d4b75fd1647 Mon Sep 17 00:00:00 2001 From: Mark Koh Date: Fri, 22 Mar 2024 17:58:40 -0600 Subject: [PATCH 03/25] Try to clean up my mess a bit. Also add PR template --- .github/pull_request_template.md | 38 +++ CONTRIBUTING.md | 13 +- cpp/src/E4M3.h | 23 +- cpp/src/Index.h | 26 +- cpp/src/Metadata.h | 26 +- cpp/src/Metadata_new.txt | 207 -------------- cpp/src/StreamUtils.h | 37 +-- cpp/src/TypedIndex.h | 299 ++++++++------------ cpp/src/array_utils.h | 115 +++----- cpp/src/cmake_install.cmake | 39 --- cpp/src/hnswalg.h | 467 +++++++++++-------------------- cpp/src/hnswlib.h | 11 +- cpp/src/std_utils.h | 22 +- 13 files changed, 416 insertions(+), 907 deletions(-) create mode 100644 .github/pull_request_template.md delete mode 100644 cpp/src/Metadata_new.txt delete mode 100644 cpp/src/cmake_install.cmake diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..2f9fab38 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,38 @@ +# Pull Request + +## Description + + +## Related Issues + + +## Changes Made + + +### C++ + + +### Python + + +### Java + + +## Testing + + +## Checklist + + +- [ ] My code follows the code style of this project. +- [ ] I have added and/or updated appropriate documentation (if applicable). +- [ ] All new and existing tests pass locally with these changes. +- [ ] I have run static code analysis (if available) and resolved any issues. +- [ ] I have considered backward compatibility (if applicable). +- [ ] I have confirmed that this PR does not introduce any security vulnerabilities. + +## Additional Comments + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a639d9eb..9319a0c0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,7 +11,7 @@ To compile Voyager from scratch, the following packages will need to be installe - [Python 3.7](https://www.python.org/downloads/) or higher. - A C++ compiler, e.g. `gcc`, `clang`, etc. -### Building Voyager +### Building Voyager Python ```shell git clone git@github.com:spotify/voyager.git @@ -127,7 +127,7 @@ mvn verify Use [`clang-format`](https://clang.llvm.org/docs/ClangFormat.html) for C++ code, and `black` with defaults for Python code. -In order to check and run formatting within the python module, you can use tox to facilitate this. +In order to check and run formatting within the python module (but not the c++ core module), you can use tox to facilitate this. ```bash cd python # Check formatting only (don't change files) @@ -136,9 +136,14 @@ tox -e check-formatting tox -e format ``` -For C++ code, you can use the following command to check formatting: +For C++ code within the core `cpp` module, you can use the following command to check formatting: ```bash cd cpp +# Check formatting only +clang-format --verbose --dry-run -i src/*.h +# Run formatter +clang-format --verbose -i src/*.h +``` ## Issues @@ -163,7 +168,7 @@ When creating an issue please try to ahere to the following format: Files should be exempt of trailing spaces. -We adhere to a specific format for commit messages. Please write your commit +We adhere to a specific format for pull requests. Please write your commit messages along these guidelines. Please keep the line width no greater than 80 columns (You can use `fmt -n -p -w 80` to accomplish this). diff --git a/cpp/src/E4M3.h b/cpp/src/E4M3.h index 37f6b523..c4258e92 100644 --- a/cpp/src/E4M3.h +++ b/cpp/src/E4M3.h @@ -294,11 +294,9 @@ class E4M3 { E4M3() : E4M3(0, 0, 0) {} - E4M3(uint8_t sign, uint8_t exponent, uint8_t mantissa) - : sign(sign), exponent(exponent), mantissa(mantissa) {} + E4M3(uint8_t sign, uint8_t exponent, uint8_t mantissa) : sign(sign), exponent(exponent), mantissa(mantissa) {} - E4M3(uint8_t c) - : sign(c >> 7), exponent((c >> 3) & 0b1111), mantissa(c & 0b111) {} + E4M3(uint8_t c) : sign(c >> 7), exponent((c >> 3) & 0b1111), mantissa(c & 0b111) {} E4M3(float input) { if (std::isnan(input) || std::isinf(input)) { @@ -316,15 +314,11 @@ class E4M3 { // TODO: Don't hard-code these, and instead compute them based on the bit // widths above! if (input < -448 || input > 448) { - throw std::domain_error( - "E4M3 cannot represent values outside of [-448, 448]."); + throw std::domain_error("E4M3 cannot represent values outside of [-448, 448]."); } - int originalExponent = ((*((const unsigned int *)&input) & - 0b01111111100000000000000000000000) >> - 23); - int originalMantissa = - (*((const unsigned int *)&input) & 0b00000000011111111111111111111111); + int originalExponent = ((*((const unsigned int *)&input) & 0b01111111100000000000000000000000) >> 23); + int originalMantissa = (*((const unsigned int *)&input) & 0b00000000011111111111111111111111); sign = input < 0; @@ -386,10 +380,9 @@ class E4M3 { if (mantissa == 0b111) { if (exponent == 0b1111) { // Rounding up would push us just outside of the representable range! - throw std::domain_error( - "E4M3 cannot represent values outside of [-448, " - "448] - tried to convert " + - std::to_string(input) + "."); + throw std::domain_error("E4M3 cannot represent values outside of [-448, " + "448] - tried to convert " + + std::to_string(input) + "."); } else { exponent++; mantissa = 0; diff --git a/cpp/src/Index.h b/cpp/src/Index.h index f6fe581c..e5cca6f7 100644 --- a/cpp/src/Index.h +++ b/cpp/src/Index.h @@ -49,7 +49,7 @@ class Index { virtual void setEF(size_t ef) = 0; virtual int getEF() const = 0; - virtual SpaceType getSpace() const = 0; + virtual SpaceType getSpace( ) const = 0; virtual std::string getSpaceName() const = 0; virtual StorageDataType getStorageDataType() const = 0; @@ -62,33 +62,27 @@ class Index { virtual void saveIndex(const std::string &pathToIndex) = 0; virtual void saveIndex(std::shared_ptr outputStream) = 0; - virtual void loadIndex(const std::string &pathToIndex, - bool searchOnly = false) = 0; - virtual void loadIndex(std::shared_ptr inputStream, - bool searchOnly = false) = 0; + virtual void loadIndex(const std::string &pathToIndex, bool searchOnly = false) = 0; + virtual void loadIndex(std::shared_ptr inputStream, bool searchOnly = false) = 0; virtual float getDistance(std::vector a, std::vector b) = 0; - virtual hnswlib::labeltype addItem(std::vector vector, - std::optional id) = 0; - virtual std::vector - addItems(NDArray input, std::vector ids = {}, - int numThreads = -1) = 0; + virtual hnswlib::labeltype addItem(std::vector vector, std::optional id) = 0; + virtual std::vector addItems(NDArray input, std::vector ids = {}, + int numThreads = -1) = 0; virtual std::vector getVector(hnswlib::labeltype id) = 0; virtual NDArray getVectors(std::vector ids) = 0; virtual std::vector getIDs() const = 0; virtual long long getIDsCount() const = 0; - virtual const std::unordered_map & - getIDsMap() const = 0; + virtual const std::unordered_map &getIDsMap() const = 0; - virtual std::tuple, std::vector> - query(std::vector queryVector, int k = 1, long queryEf = -1) = 0; + virtual std::tuple, std::vector> query(std::vector queryVector, + int k = 1, long queryEf = -1) = 0; virtual std::tuple, NDArray> - query(NDArray queryVectors, int k = 1, int numThreads = -1, - long queryEf = -1) = 0; + query(NDArray queryVectors, int k = 1, int numThreads = -1, long queryEf = -1) = 0; virtual void markDeleted(hnswlib::labeltype label) = 0; virtual void unmarkDeleted(hnswlib::labeltype label) = 0; diff --git a/cpp/src/Metadata.h b/cpp/src/Metadata.h index 0032346b..7f460268 100644 --- a/cpp/src/Metadata.h +++ b/cpp/src/Metadata.h @@ -33,10 +33,9 @@ namespace Metadata { */ class V1 { public: - V1(int numDimensions, SpaceType spaceType, StorageDataType storageDataType, - float maxNorm, bool useOrderPreservingTransform) - : numDimensions(numDimensions), spaceType(spaceType), - storageDataType(storageDataType), maxNorm(maxNorm), + V1(int numDimensions, SpaceType spaceType, StorageDataType storageDataType, float maxNorm, + bool useOrderPreservingTransform) + : numDimensions(numDimensions), spaceType(spaceType), storageDataType(storageDataType), maxNorm(maxNorm), useOrderPreservingTransform(useOrderPreservingTransform) {} V1() {} @@ -52,20 +51,12 @@ class V1 { float getMaxNorm() { return maxNorm; } - bool getUseOrderPreservingTransform() const { - return useOrderPreservingTransform; - } - void setUseOrderPreservingTransform(bool newValue) { - useOrderPreservingTransform = newValue; - } + bool getUseOrderPreservingTransform() const { return useOrderPreservingTransform; } + void setUseOrderPreservingTransform(bool newValue) { useOrderPreservingTransform = newValue; } - void setNumDimensions(int newNumDimensions) { - numDimensions = newNumDimensions; - } + void setNumDimensions(int newNumDimensions) { numDimensions = newNumDimensions; } - void setStorageDataType(StorageDataType newStorageDataType) { - storageDataType = newStorageDataType; - } + void setStorageDataType(StorageDataType newStorageDataType) { storageDataType = newStorageDataType; } void setSpaceType(SpaceType newSpaceType) { spaceType = newSpaceType; } @@ -98,8 +89,7 @@ class V1 { bool useOrderPreservingTransform; }; -static std::unique_ptr -loadFromStream(std::shared_ptr inputStream) { +static std::unique_ptr loadFromStream(std::shared_ptr inputStream) { uint32_t header = inputStream->peek(); if (header != 'AYOV') { return nullptr; diff --git a/cpp/src/Metadata_new.txt b/cpp/src/Metadata_new.txt deleted file mode 100644 index fcdbf5c4..00000000 --- a/cpp/src/Metadata_new.txt +++ /dev/null @@ -1,207 +0,0 @@ -#pragma once -/*- - * -\-\- - * voyager - * -- - * Copyright (C) 2016 - 2023 Spotify AB - * - * This file is heavily based on hnswlib (https://github.com/nmslib/hnswlib, - * Apache 2.0-licensed, no copyright author listed) - * -- - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * -/-/- - */ -#include -#include - -#include "Enums.h" -#include "StreamUtils.h" - -namespace voyager { -namespace Metadata { -/** - * @brief A basic metadata class that stores the SpaceType, StorageDataType, and - * number of dimensions. - */ -class V1 { -public: - V1(int numDimensions, SpaceType spaceType, StorageDataType storageDataType, float maxNorm, - bool useOrderPreservingTransform) - : numDimensions(numDimensions), spaceType(spaceType), storageDataType(storageDataType), maxNorm(maxNorm), - useOrderPreservingTransform(useOrderPreservingTransform) {} - - V1() {} - virtual ~V1() {} - - int version() const { return 1; } - - int getNumDimensions() { return numDimensions; } - - StorageDataType getStorageDataType() { return storageDataType; } - - SpaceType getSpaceType() { return spaceType; } - - float getMaxNorm() { return maxNorm; } - - bool getUseOrderPreservingTransform() const { return useOrderPreservingTransform; } - void setUseOrderPreservingTransform(bool newValue) { useOrderPreservingTransform = newValue; } - - void setNumDimensions(int newNumDimensions) { numDimensions = newNumDimensions; } - - void setStorageDataType(StorageDataType newStorageDataType) { storageDataType = newStorageDataType; } - - void setSpaceType(SpaceType newSpaceType) { spaceType = newSpaceType; } - - void setMaxNorm(float newMaxNorm) { maxNorm = newMaxNorm; } - - virtual void serializeToStream(std::shared_ptr stream) { - stream->write("VOYA", 4); - writeBinaryPOD(stream, version()); - writeBinaryPOD(stream, numDimensions); - writeBinaryPOD(stream, spaceType); - writeBinaryPOD(stream, storageDataType); - writeBinaryPOD(stream, maxNorm); - writeBinaryPOD(stream, useOrderPreservingTransform); - }; - - virtual void loadFromStream(std::shared_ptr stream) { - // Version has already been loaded before we get here! - readBinaryPOD(stream, numDimensions); - readBinaryPOD(stream, spaceType); - readBinaryPOD(stream, storageDataType); - readBinaryPOD(stream, maxNorm); - readBinaryPOD(stream, useOrderPreservingTransform); - }; - -private: - int numDimensions; - SpaceType spaceType; - StorageDataType storageDataType; - float maxNorm; - bool useOrderPreservingTransform; -}; - -/** - * @brief A basic metadata class that stores the number of dimensions, - * the SpaceType, StorageDataType, and number of dimensions. - */ -class V2 { -public: - V2(int numDimensions, SpaceType spaceType, StorageDataType storageDataType, float maxNorm, - bool useOrderPreservingTransform, vector labels) - : numDimensions(numDimensions), spaceType(spaceType), storageDataType(storageDataType), maxNorm(maxNorm), - useOrderPreservingTransform(useOrderPreservingTransform), labels(labels) {} - - V2() {} - virtual ~V2() {} - - int version() const { return 2; } - - int getNumDimensions() { return numDimensions; } - - StorageDataType getStorageDataType() { return storageDataType; } - - SpaceType getSpaceType() { return spaceType; } - - float getMaxNorm() { return maxNorm; } - - bool getUseOrderPreservingTransform() const { return useOrderPreservingTransform; } - - std::string *getLabels() { return labels; } - - void setUseOrderPreservingTransform(bool newValue) { useOrderPreservingTransform = newValue; } - - void setNumDimensions(int newNumDimensions) { numDimensions = newNumDimensions; } - - void setStorageDataType(StorageDataType newStorageDataType) { storageDataType = newStorageDataType; } - - void setSpaceType(SpaceType newSpaceType) { spaceType = newSpaceType; } - - void setMaxNorm(float newMaxNorm) { maxNorm = newMaxNorm; } - - virtual void serializeToStream(std::shared_ptr stream) { - stream->write("VOYA", 4); - writeBinaryPOD(stream, version()); - writeBinaryPOD(stream, numDimensions); - writeBinaryPOD(stream, spaceType); - writeBinaryPOD(stream, storageDataType); - writeBinaryPOD(stream, maxNorm); - writeBinaryPOD(stream, useOrderPreservingTransform); - writeBinaryPOD(stream, labels); - }; - - virtual void loadFromStream(std::shared_ptr stream) { - // Version has already been loaded before we get here! - readBinaryPOD(stream, numDimensions); - readBinaryPOD(stream, spaceType); - readBinaryPOD(stream, storageDataType); - readBinaryPOD(stream, maxNorm); - readBinaryPOD(stream, useOrderPreservingTransform); - readBinaryPOD(stream, labels); - }; - -private: - int numDimensions; - SpaceType spaceType; - StorageDataType storageDataType; - float maxNorm; - bool useOrderPreservingTransform; - vector labels[]; -}; - -static std::unique_ptr loadFromStream(std::shared_ptr inputStream) { - uint32_t header = inputStream->peek(); - if (header != 'AYOV') { - return nullptr; - } - - // Actually read instead of just peeking: - inputStream->read((char *)&header, sizeof(header)); - - int version; - readBinaryPOD(inputStream, version); - - switch (version) { - case 1: { - std::unique_ptr metadata = std::make_unique(); - metadata->loadFromStream(inputStream); - return metadata; - } - case 2: { - std::unique_ptr metadata = std::make_unique(); - metadata->loadFromStream(inputStream); - return metadata; - } - default: { - std::stringstream stream; - stream << std::hex << version; - std::string resultAsHex(stream.str()); - - std::string error = "Unable to parse version of Voyager index file; found " - "unsupported version \"0x" + - resultAsHex + "\"."; - - if (version < 20) { - error += " A newer version of the Voyager library may be able to read " - "this index."; - } else { - error += " This index may be corrupted (or not a Voyager index)."; - } - - throw std::domain_error(error); - } - } -}; - -} // namespace Metadata -}; // namespace voyager \ No newline at end of file diff --git a/cpp/src/StreamUtils.h b/cpp/src/StreamUtils.h index c3d74148..5f0f68b1 100644 --- a/cpp/src/StreamUtils.h +++ b/cpp/src/StreamUtils.h @@ -39,9 +39,7 @@ class InputStream { virtual bool isExhausted() = 0; virtual long long getPosition() = 0; virtual bool setPosition(long long position) = 0; - virtual bool advanceBy(long long numBytes) { - return setPosition(getPosition() + numBytes); - } + virtual bool advanceBy(long long numBytes) { return setPosition(getPosition() + numBytes); } virtual uint32_t peek() = 0; }; @@ -64,18 +62,12 @@ class FileInputStream : public InputStream { virtual bool isSeekable() { return isRegularFile; } virtual long long getTotalLength() { return sizeInBytes; } - virtual long long read(char *buffer, long long bytesToRead) { - return fread(buffer, 1, bytesToRead, handle); - } + virtual long long read(char *buffer, long long bytesToRead) { return fread(buffer, 1, bytesToRead, handle); } virtual bool isExhausted() { return feof(handle); } virtual long long getPosition() { return ftell(handle); } - virtual bool setPosition(long long position) { - return fseek(handle, position, SEEK_SET) == 0; - } - virtual bool advanceBy(long long bytes) { - return fseek(handle, bytes, SEEK_CUR) == 0; - } + virtual bool setPosition(long long position) { return fseek(handle, position, SEEK_SET) == 0; } + virtual bool advanceBy(long long bytes) { return fseek(handle, bytes, SEEK_CUR) == 0; } virtual uint32_t peek() { uint32_t result = 0; long long lastPosition = getPosition(); @@ -83,10 +75,8 @@ class FileInputStream : public InputStream { setPosition(lastPosition); return result; } else { - throw std::runtime_error( - "Failed to peek " + std::to_string(sizeof(result)) + - " bytes from file \"" + filename + "\" at index " + - std::to_string(lastPosition) + "."); + throw std::runtime_error("Failed to peek " + std::to_string(sizeof(result)) + " bytes from file \"" + filename + + "\" at index " + std::to_string(lastPosition) + "."); } } @@ -123,8 +113,7 @@ class FileOutputStream : public OutputStream { errno = 0; handle = fopen(filename.c_str(), "wb"); if (!handle) { - throw std::runtime_error("Failed to open file for writing (errno " + - std::to_string(errno) + "): " + filename); + throw std::runtime_error("Failed to open file for writing (errno " + std::to_string(errno) + "): " + filename); } } @@ -161,20 +150,16 @@ class MemoryOutputStream : public OutputStream { std::ostringstream outputStream; }; -template -static void writeBinaryPOD(std::shared_ptr out, const T &podRef) { +template static void writeBinaryPOD(std::shared_ptr out, const T &podRef) { if (!out->write((char *)&podRef, sizeof(T))) { - throw std::runtime_error("Failed to write " + std::to_string(sizeof(T)) + - " bytes to stream!"); + throw std::runtime_error("Failed to write " + std::to_string(sizeof(T)) + " bytes to stream!"); } } -template -static void readBinaryPOD(std::shared_ptr in, T &podRef) { +template static void readBinaryPOD(std::shared_ptr in, T &podRef) { long long bytesRead = in->read((char *)&podRef, sizeof(T)); if (bytesRead != sizeof(T)) { - throw std::runtime_error("Failed to read " + std::to_string(sizeof(T)) + - " bytes from stream! Got " + + throw std::runtime_error("Failed to read " + std::to_string(sizeof(T)) + " bytes from stream! Got " + std::to_string(bytesRead) + "."); } } \ No newline at end of file diff --git a/cpp/src/TypedIndex.h b/cpp/src/TypedIndex.h index e8905e4e..70b3d834 100644 --- a/cpp/src/TypedIndex.h +++ b/cpp/src/TypedIndex.h @@ -39,20 +39,34 @@ template inline const StorageDataType storageDataType(); template inline const std::string storageDataTypeName(); -template <> const StorageDataType storageDataType() { - return StorageDataType::Float8; -} -template <> const StorageDataType storageDataType() { - return StorageDataType::Float32; -} -template <> const StorageDataType storageDataType() { - return StorageDataType::E4M3; -} +template <> const StorageDataType storageDataType() { return StorageDataType::Float8; } +template <> const StorageDataType storageDataType() { return StorageDataType::Float32; } +template <> const StorageDataType storageDataType() { return StorageDataType::E4M3; } template <> const std::string storageDataTypeName() { return "Float8"; } template <> const std::string storageDataTypeName() { return "Float32"; } template <> const std::string storageDataTypeName() { return "E4M3"; } +template dist_t ensureNotNegative(dist_t distance, hnswlib::labeltype label) { + if constexpr (std::is_same_v) { + // Allow for a very slight negative distance if using E4M3 + if (distance < 0 && distance >= -0.14) { + return 0; + } + } + + if (distance < 0) { + if (distance >= -0.00001) { + return 0; + } + + throw std::runtime_error("Potential candidate (with label '" + std::to_string(label) + "') had negative distance " + + std::to_string(distance) + ". This may indicate a corrupted index file."); + } + + return distance; +} + /** * A C++ wrapper class for a typed HNSW index. * @@ -60,8 +74,7 @@ template <> const std::string storageDataTypeName() { return "E4M3"; } * so its interfaces should only include C++ or C datatypes, and * it should avoid unnecessary memory copies if possible. */ -template > +template > class TypedIndex : public Index { private: static const int ser_version = 1; // serialization version @@ -87,33 +100,27 @@ class TypedIndex : public Index { /** * Create an empty index with the given parameters. */ - TypedIndex(const SpaceType space, const int dimensions, const size_t M = 12, - const size_t efConstruction = 200, const size_t randomSeed = 1, - const size_t maxElements = 1, + TypedIndex(const SpaceType space, const int dimensions, const size_t M = 12, const size_t efConstruction = 200, + const size_t randomSeed = 1, const size_t maxElements = 1, const bool enableOrderPreservingTransform = true) : space(space), dimensions(dimensions), - metadata(std::make_unique( - dimensions, space, getStorageDataType(), 0.0, - space == InnerProduct)) { + metadata(std::make_unique(dimensions, space, getStorageDataType(), 0.0, + space == InnerProduct)) { switch (space) { case Euclidean: - spaceImpl = std::make_unique< - hnswlib::EuclideanSpace>(dimensions); + spaceImpl = std::make_unique>(dimensions); break; case InnerProduct: useOrderPreservingTransform = enableOrderPreservingTransform; - spaceImpl = std::make_unique< - hnswlib::InnerProductSpace>( + spaceImpl = std::make_unique>( dimensions + (useOrderPreservingTransform ? 1 : 0)); break; case Cosine: - spaceImpl = std::make_unique< - hnswlib::InnerProductSpace>(dimensions); + spaceImpl = std::make_unique>(dimensions); normalize = true; break; default: - throw new std::runtime_error( - "Space must be one of Euclidean, InnerProduct, or Cosine."); + throw new std::runtime_error("Space must be one of Euclidean, InnerProduct, or Cosine."); } ep_added = true; @@ -122,8 +129,8 @@ class TypedIndex : public Index { defaultEF = 10; currentLabel = 0; - algorithmImpl = std::make_unique>( - spaceImpl.get(), maxElements, M, efConstruction, randomSeed); + algorithmImpl = std::make_unique>(spaceImpl.get(), maxElements, M, + efConstruction, randomSeed); ep_added = false; algorithmImpl->ef_ = defaultEF; @@ -139,13 +146,12 @@ class TypedIndex : public Index { * * This constructor is only used to load a V0-type index from file. */ - TypedIndex(const std::string &indexFilename, const SpaceType space, - const int dimensions, bool searchOnly = false) + TypedIndex(const std::string &indexFilename, const SpaceType space, const int dimensions, bool searchOnly = false) : TypedIndex(space, dimensions, /* M */ 12, /* efConstruction */ 200, /* randomSeed */ 1, /* maxElements */ 1, /* enableOrderPreservingTransform */ false) { - algorithmImpl = std::make_unique>( - spaceImpl.get(), indexFilename, 0, searchOnly); + algorithmImpl = + std::make_unique>(spaceImpl.get(), indexFilename, 0, searchOnly); currentLabel = algorithmImpl->cur_element_count; } @@ -155,13 +161,13 @@ class TypedIndex : public Index { * * This constructor is only used to load a V0-type index from a stream. */ - TypedIndex(std::shared_ptr inputStream, const SpaceType space, - const int dimensions, bool searchOnly = false) + TypedIndex(std::shared_ptr inputStream, const SpaceType space, const int dimensions, + bool searchOnly = false) : TypedIndex(space, dimensions, /* M */ 12, /* efConstruction */ 200, /* randomSeed */ 1, /* maxElements */ 1, /* enableOrderPreservingTransform */ false) { - algorithmImpl = std::make_unique>( - spaceImpl.get(), inputStream, 0, searchOnly); + algorithmImpl = + std::make_unique>(spaceImpl.get(), inputStream, 0, searchOnly); currentLabel = algorithmImpl->cur_element_count; } @@ -169,15 +175,15 @@ class TypedIndex : public Index { * Load an index from the given input stream, interpreting * it as the given Space and number of dimensions. */ - TypedIndex(std::unique_ptr metadata, - std::shared_ptr inputStream, bool searchOnly = false) + TypedIndex(std::unique_ptr metadata, std::shared_ptr inputStream, + bool searchOnly = false) : TypedIndex(metadata->getSpaceType(), metadata->getNumDimensions(), /* M */ 12, /* efConstruction */ 200, /* randomSeed */ 1, /* maxElements */ 1, /* enableOrderPreservingTransform */ metadata->getUseOrderPreservingTransform()) { - algorithmImpl = std::make_unique>( - spaceImpl.get(), inputStream, 0, searchOnly); + algorithmImpl = + std::make_unique>(spaceImpl.get(), inputStream, 0, searchOnly); max_norm = metadata->getMaxNorm(); currentLabel = algorithmImpl->cur_element_count; } @@ -200,13 +206,9 @@ class TypedIndex : public Index { } } - StorageDataType getStorageDataType() const { - return storageDataType(); - } + StorageDataType getStorageDataType() const { return storageDataType(); } - std::string getStorageDataTypeName() const { - return storageDataTypeName(); - } + std::string getStorageDataTypeName() const { return storageDataTypeName(); } void setEF(size_t ef) { defaultEF = ef; @@ -220,8 +222,7 @@ class TypedIndex : public Index { throw std::runtime_error("Not implemented."); } - void loadIndex(std::shared_ptr inputStream, - bool searchOnly = false) { + void loadIndex(std::shared_ptr inputStream, bool searchOnly = false) { throw std::runtime_error("Not implemented."); } @@ -248,13 +249,11 @@ class TypedIndex : public Index { float getDistance(std::vector _a, std::vector _b) { if ((int)_a.size() != dimensions || (int)_b.size() != dimensions) { throw std::runtime_error("Index has " + std::to_string(dimensions) + - " dimensions, but received vectors of size: " + - std::to_string(_a.size()) + " and " + + " dimensions, but received vectors of size: " + std::to_string(_a.size()) + " and " + std::to_string(_b.size()) + "."); } - int actualDimensions = - useOrderPreservingTransform ? dimensions + 1 : dimensions; + int actualDimensions = useOrderPreservingTransform ? dimensions + 1 : dimensions; std::vector a(actualDimensions); std::vector b(actualDimensions); @@ -267,10 +266,8 @@ class TypedIndex : public Index { } if (normalize) { - normalizeVector(_a.data(), a.data(), - a.size()); - normalizeVector(_b.data(), b.data(), - b.size()); + normalizeVector(_a.data(), a.data(), a.size()); + normalizeVector(_b.data(), b.data(), b.size()); } else { floatToDataType(_a.data(), a.data(), a.size()); floatToDataType(_b.data(), b.data(), b.size()); @@ -279,8 +276,7 @@ class TypedIndex : public Index { return spaceImpl->get_dist_func()(a.data(), b.data(), actualDimensions); } - hnswlib::labeltype addItem(std::vector vector, - std::optional id) { + hnswlib::labeltype addItem(std::vector vector, std::optional id) { std::vector ids; if (id) { @@ -290,9 +286,8 @@ class TypedIndex : public Index { return addItems(NDArray(vector, {1, (int)vector.size()}), ids)[0]; } - std::vector - addItems(NDArray floatInput, - std::vector ids = {}, int numThreads = -1) { + std::vector addItems(NDArray floatInput, std::vector ids = {}, + int numThreads = -1) { if (numThreads <= 0) numThreads = numThreadsDefault; @@ -300,10 +295,9 @@ class TypedIndex : public Index { size_t features = std::get<1>(floatInput.shape); if (features != (size_t)dimensions) { - throw std::domain_error( - "The provided vector(s) have " + std::to_string(features) + - " dimensions, but this index expects vectors with " + - std::to_string(dimensions) + " dimensions."); + throw std::domain_error("The provided vector(s) have " + std::to_string(features) + + " dimensions, but this index expects vectors with " + std::to_string(dimensions) + + " dimensions."); } std::vector idsToReturn(rows); @@ -314,11 +308,9 @@ class TypedIndex : public Index { } if (!ids.empty() && (unsigned long)ids.size() != rows) { - throw std::runtime_error( - std::to_string(rows) + " vectors were provided, but " + - std::to_string(ids.size()) + - " IDs were provided. If providing IDs along with vectors, the number " - "of provided IDs must match the number of vectors."); + throw std::runtime_error(std::to_string(rows) + " vectors were provided, but " + std::to_string(ids.size()) + + " IDs were provided. If providing IDs along with vectors, the number " + "of provided IDs must match the number of vectors."); } // TODO: Should we always double the number of elements instead? Maybe use @@ -332,8 +324,7 @@ class TypedIndex : public Index { } } - int actualDimensions = - useOrderPreservingTransform ? dimensions + 1 : dimensions; + int actualDimensions = useOrderPreservingTransform ? dimensions + 1 : dimensions; int start = 0; if (!ep_added) { @@ -342,19 +333,17 @@ class TypedIndex : public Index { std::vector inputVector(actualDimensions); std::vector convertedVector(actualDimensions); - std::memcpy(inputVector.data(), floatInput[0], - dimensions * sizeof(float)); + std::memcpy(inputVector.data(), floatInput[0], dimensions * sizeof(float)); if (useOrderPreservingTransform) { inputVector[dimensions] = getDotFactorAndUpdateNorm(floatInput[0]); } if (normalize) { - normalizeVector( - inputVector.data(), convertedVector.data(), convertedVector.size()); + normalizeVector(inputVector.data(), convertedVector.data(), + convertedVector.size()); } else { - floatToDataType( - inputVector.data(), convertedVector.data(), convertedVector.size()); + floatToDataType(inputVector.data(), convertedVector.data(), convertedVector.size()); } algorithmImpl->addPoint(convertedVector.data(), (size_t)id); @@ -368,18 +357,14 @@ class TypedIndex : public Index { std::vector convertedArray(numThreads * actualDimensions); ParallelFor(start, rows, numThreads, [&](size_t row, size_t threadId) { size_t startIndex = threadId * actualDimensions; - std::memcpy(&inputArray[startIndex], floatInput[row], - dimensions * sizeof(float)); + std::memcpy(&inputArray[startIndex], floatInput[row], dimensions * sizeof(float)); if (useOrderPreservingTransform) { - inputArray[startIndex + dimensions] = - getDotFactorAndUpdateNorm(floatInput[row]); + inputArray[startIndex + dimensions] = getDotFactorAndUpdateNorm(floatInput[row]); } - floatToDataType(&inputArray[startIndex], - &convertedArray[startIndex], - actualDimensions); - size_t id = ids.size() ? ids.at(row) : (currentLabel.fetch_add(1)); + floatToDataType(&inputArray[startIndex], &convertedArray[startIndex], actualDimensions); + size_t id = ids.size() ? ids.at(row) : (currentLabel + row); try { algorithmImpl->addPoint(convertedArray.data() + startIndex, id); } catch (IndexFullError &e) { @@ -404,18 +389,15 @@ class TypedIndex : public Index { ParallelFor(start, rows, numThreads, [&](size_t row, size_t threadId) { size_t startIndex = threadId * actualDimensions; - std::memcpy(&inputArray[startIndex], floatInput[row], - dimensions * sizeof(float)); + std::memcpy(&inputArray[startIndex], floatInput[row], dimensions * sizeof(float)); if (useOrderPreservingTransform) { - inputArray[startIndex + dimensions] = - getDotFactorAndUpdateNorm(floatInput[row]); + inputArray[startIndex + dimensions] = getDotFactorAndUpdateNorm(floatInput[row]); } - normalizeVector( - &inputArray[startIndex], &normalizedArray[startIndex], - actualDimensions); - size_t id = ids.size() ? ids.at(row) : (currentLabel.fetch_add(1)); + normalizeVector(&inputArray[startIndex], &normalizedArray[startIndex], + actualDimensions); + size_t id = ids.size() ? ids.at(row) : (currentLabel + row); try { algorithmImpl->addPoint(normalizedArray.data() + startIndex, id); @@ -445,8 +427,7 @@ class TypedIndex : public Index { dist_t prevMaxNorm = max_norm; // atomically update max_norm when inserting from multiple threads - while (prevMaxNorm < norm && - !max_norm.compare_exchange_weak(prevMaxNorm, norm)) { + while (prevMaxNorm < norm && !max_norm.compare_exchange_weak(prevMaxNorm, norm)) { } return getDotFactor(norm); @@ -462,9 +443,7 @@ class TypedIndex : public Index { return sqrt((max_norm * max_norm) - (norm * norm)); } - std::vector getRawVector(hnswlib::labeltype id) { - return algorithmImpl->getDataByLabel(id); - } + std::vector getRawVector(hnswlib::labeltype id) { return algorithmImpl->getDataByLabel(id); } std::vector getVector(hnswlib::labeltype id) { std::vector rawData = getRawVector(id); @@ -477,8 +456,7 @@ class TypedIndex : public Index { for (unsigned long i = 0; i < ids.size(); i++) { std::vector vector = getVector(ids[i]); - std::copy(vector.begin(), vector.end(), - output.data.data() + (i * dimensions)); + std::copy(vector.begin(), vector.end(), output.data.data() + (i * dimensions)); } return output; @@ -497,14 +475,12 @@ class TypedIndex : public Index { long long getIDsCount() const { return algorithmImpl->label_lookup_.size(); } - const std::unordered_map & - getIDsMap() const { + const std::unordered_map &getIDsMap() const { return algorithmImpl->label_lookup_; } - std::tuple, NDArray> - query(NDArray floatQueryVectors, int k = 1, int numThreads = -1, - long queryEf = -1) { + std::tuple, NDArray> query(NDArray floatQueryVectors, int k = 1, + int numThreads = -1, long queryEf = -1) { if (queryEf > 0 && queryEf < k) { throw std::runtime_error("queryEf must be equal to or greater than the " "requested number of neighbors"); @@ -513,8 +489,7 @@ class TypedIndex : public Index { int numFeatures = std::get<1>(floatQueryVectors.shape); if (numFeatures != dimensions) { - throw std::runtime_error( - "Query vectors expected to share dimensionality with index."); + throw std::runtime_error("Query vectors expected to share dimensionality with index."); } NDArray labels({numRows, k}); @@ -533,8 +508,7 @@ class TypedIndex : public Index { numThreads = 1; } - int actualDimensions = - useOrderPreservingTransform ? dimensions + 1 : dimensions; + int actualDimensions = useOrderPreservingTransform ? dimensions + 1 : dimensions; if (normalize == false) { std::vector inputArray(numThreads * actualDimensions); @@ -545,22 +519,17 @@ class TypedIndex : public Index { // Only copy at most `dimensions` from the input; if we're using // the order-preserving transform, the remaining dimension will be 0 // anyways. - std::memcpy(&inputArray[start_idx], floatQueryVectors[row], - dimensions * sizeof(float)); + std::memcpy(&inputArray[start_idx], floatQueryVectors[row], dimensions * sizeof(float)); - floatToDataType(&inputArray[start_idx], - &convertedArray[start_idx], - actualDimensions); + floatToDataType(&inputArray[start_idx], &convertedArray[start_idx], actualDimensions); std::priority_queue> result = - algorithmImpl->searchKnn((convertedArray.data() + start_idx), k, - nullptr, queryEf); + algorithmImpl->searchKnn((convertedArray.data() + start_idx), k, nullptr, queryEf); if (result.size() != (unsigned long)k) { - throw std::runtime_error( - "Fewer than expected results were retrieved; only found " + - std::to_string(result.size()) + " of " + std::to_string(k) + - " requested neighbors."); + throw std::runtime_error("Fewer than expected results were retrieved; only found " + + std::to_string(result.size()) + " of " + std::to_string(k) + + " requested neighbors."); } for (int i = k - 1; i >= 0; i--) { @@ -583,21 +552,17 @@ class TypedIndex : public Index { // Only copy at most `dimensions` from the input; if we're using // the order-preserving transform, the remaining dimension will be 0 // anyways. - std::memcpy(&inputArray[start_idx], floatQueryVectors[row], - dimensions * sizeof(float)); + std::memcpy(&inputArray[start_idx], floatQueryVectors[row], dimensions * sizeof(float)); - normalizeVector( - &inputArray[start_idx], &norm_array[start_idx], actualDimensions); + normalizeVector(&inputArray[start_idx], &norm_array[start_idx], actualDimensions); std::priority_queue> result = - algorithmImpl->searchKnn(norm_array.data() + start_idx, k, nullptr, - queryEf); + algorithmImpl->searchKnn(norm_array.data() + start_idx, k, nullptr, queryEf); if (result.size() != (unsigned long)k) { - throw std::runtime_error( - "Fewer than expected results were retrieved; only found " + - std::to_string(result.size()) + " of " + std::to_string(k) + - " requested neighbors."); + throw std::runtime_error("Fewer than expected results were retrieved; only found " + + std::to_string(result.size()) + " of " + std::to_string(k) + + " requested neighbors."); } for (int i = k - 1; i >= 0; i--) { @@ -606,7 +571,7 @@ class TypedIndex : public Index { dist_t distance = result_tuple.first; hnswlib::labeltype label = result_tuple.second; - distancePointer[row * k + i] = distance; + distancePointer[row * k + i] = ensureNotNegative(distance, label); labelPointer[row * k + i] = label; result.pop(); } @@ -616,8 +581,8 @@ class TypedIndex : public Index { return {labels, distances}; } - std::tuple, std::vector> - query(std::vector floatQueryVector, int k = 1, long queryEf = -1) { + std::tuple, std::vector> query(std::vector floatQueryVector, int k = 1, + long queryEf = -1) { if (queryEf > 0 && queryEf < k) { throw std::runtime_error("queryEf must be equal to or greater than the " "requested number of neighbors"); @@ -626,8 +591,7 @@ class TypedIndex : public Index { int numFeatures = floatQueryVector.size(); if (numFeatures != dimensions) { - throw std::runtime_error( - "Query vector expected to share dimensionality with index."); + throw std::runtime_error("Query vector expected to share dimensionality with index."); } int actualDimensions = dimensions; @@ -643,17 +607,14 @@ class TypedIndex : public Index { dist_t *distancePointer = distances.data(); if (normalize == false) { - const std::vector queryVector = - floatToDataType(floatQueryVector); + const std::vector queryVector = floatToDataType(floatQueryVector); std::priority_queue> result = algorithmImpl->searchKnn(queryVector.data(), k, nullptr, queryEf); if (result.size() != (unsigned long)k) { - throw std::runtime_error( - "Fewer than expected results were retrieved; only found " + - std::to_string(result.size()) + " of " + std::to_string(k) + - " requested neighbors."); + throw std::runtime_error("Fewer than expected results were retrieved; only found " + + std::to_string(result.size()) + " of " + std::to_string(k) + " requested neighbors."); } for (int i = k - 1; i >= 0; i--) { @@ -664,23 +625,20 @@ class TypedIndex : public Index { } } else { std::vector norm_array(numFeatures); - normalizeVector( - floatQueryVector.data(), norm_array.data(), actualDimensions); + normalizeVector(floatQueryVector.data(), norm_array.data(), actualDimensions); std::priority_queue> result = algorithmImpl->searchKnn(norm_array.data(), k, nullptr, queryEf); if (result.size() != (unsigned long)k) { - throw std::runtime_error( - "Fewer than expected results were retrieved; only found " + - std::to_string(result.size()) + " of " + std::to_string(k) + - " requested neighbors."); + throw std::runtime_error("Fewer than expected results were retrieved; only found " + + std::to_string(result.size()) + " of " + std::to_string(k) + " requested neighbors."); } for (int i = k - 1; i >= 0; i--) { auto &result_tuple = result.top(); - distancePointer[i] = result_tuple.first; + distancePointer[i] = ensureNotNegative(result_tuple.first, result_tuple.second); labelPointer[i] = result_tuple.second; result.pop(); } @@ -689,13 +647,9 @@ class TypedIndex : public Index { return {labels, distances}; } - void markDeleted(hnswlib::labeltype label) { - algorithmImpl->markDelete(label); - } + void markDeleted(hnswlib::labeltype label) { algorithmImpl->markDelete(label); } - void unmarkDeleted(hnswlib::labeltype label) { - algorithmImpl->unmarkDelete(label); - } + void unmarkDeleted(hnswlib::labeltype label) { algorithmImpl->unmarkDelete(label); } void resizeIndex(size_t new_size) { algorithmImpl->resizeIndex(new_size); } @@ -717,47 +671,34 @@ class TypedIndex : public Index { size_t getM() const { return algorithmImpl->M_; } }; -std::unique_ptr -loadTypedIndexFromMetadata(std::unique_ptr metadata, - std::shared_ptr inputStream) { +std::unique_ptr loadTypedIndexFromMetadata(std::unique_ptr metadata, + std::shared_ptr inputStream) { if (!metadata) { - throw std::domain_error( - "The provided file contains no Voyager parameter metadata. Please " - "specify the number of dimensions, SpaceType, and StorageDataType that " - "this index contains."); - } else if (voyager::Metadata::V1 *v1 = - dynamic_cast(metadata.get())) { + throw std::domain_error("The provided file contains no Voyager parameter metadata. Please " + "specify the number of dimensions, SpaceType, and StorageDataType that " + "this index contains."); + } else if (voyager::Metadata::V1 *v1 = dynamic_cast(metadata.get())) { // We have enough information to create a TypedIndex! switch (v1->getStorageDataType()) { case StorageDataType::Float32: return std::make_unique>( - std::unique_ptr( - (voyager::Metadata::V1 *)metadata.release()), - inputStream); + std::unique_ptr((voyager::Metadata::V1 *)metadata.release()), inputStream); break; case StorageDataType::Float8: return std::make_unique>>( - std::unique_ptr( - (voyager::Metadata::V1 *)metadata.release()), - inputStream); + std::unique_ptr((voyager::Metadata::V1 *)metadata.release()), inputStream); break; case StorageDataType::E4M3: return std::make_unique>( - std::unique_ptr( - (voyager::Metadata::V1 *)metadata.release()), - inputStream); + std::unique_ptr((voyager::Metadata::V1 *)metadata.release()), inputStream); break; default: - throw std::domain_error("Unknown storage data type: " + - std::to_string((int)v1->getStorageDataType())); + throw std::domain_error("Unknown storage data type: " + std::to_string((int)v1->getStorageDataType())); } } else { throw std::domain_error("Unknown Voyager metadata format."); } } -std::unique_ptr -loadTypedIndexFromStream(std::shared_ptr inputStream) { - return loadTypedIndexFromMetadata( - voyager::Metadata::loadFromStream(inputStream), inputStream); -} \ No newline at end of file +std::unique_ptr loadTypedIndexFromStream(std::shared_ptr inputStream) { + return loadTypedIndexFromMetadata(voyager::Metadata::loadFromStream(inputStream), inputStream); diff --git a/cpp/src/array_utils.h b/cpp/src/array_utils.h index 2e497a99..cb8f95cf 100644 --- a/cpp/src/array_utils.h +++ b/cpp/src/array_utils.h @@ -42,16 +42,13 @@ template class NDArray { const std::array strides; NDArray(std::array shape) - : data(std::accumulate(shape.begin(), shape.end(), 1, - std::multiplies())), - shape(shape), strides(computeStrides()) {} + : data(std::accumulate(shape.begin(), shape.end(), 1, std::multiplies())), shape(shape), + strides(computeStrides()) {} - NDArray(std::vector data, std::array shape) - : data(data), shape(shape), strides(computeStrides()) {} + NDArray(std::vector data, std::array shape) : data(data), shape(shape), strides(computeStrides()) {} NDArray(T *inputPointer, std::array shape) - : data(computeNumElements(shape)), shape(shape), - strides(computeStrides()) { + : data(computeNumElements(shape)), shape(shape), strides(computeStrides()) { std::memcpy(data.data(), inputPointer, data.size() * sizeof(T)); } @@ -84,9 +81,8 @@ NDArray floatToDataType(NDArray input) { // Handle rescaling to integer storage values if necessary: if constexpr (std::is_same_v) { if constexpr (scalefactor::num != scalefactor::den) { - throw std::runtime_error( - "Index has a non-unity scale factor set, but is using float32 data " - "storage. This combination is not yet implemented."); + throw std::runtime_error("Index has a non-unity scale factor set, but is using float32 data " + "storage. This combination is not yet implemented."); } return input; @@ -103,12 +99,10 @@ NDArray floatToDataType(NDArray input) { return output; } else { // Re-scale the input values by multiplying by `scalefactor`: - constexpr float lowerBound = (float)std::numeric_limits::min() * - (float)scalefactor::num / - (float)scalefactor::den; - constexpr float upperBound = (float)std::numeric_limits::max() * - (float)scalefactor::num / - (float)scalefactor::den; + constexpr float lowerBound = + (float)std::numeric_limits::min() * (float)scalefactor::num / (float)scalefactor::den; + constexpr float upperBound = + (float)std::numeric_limits::max() * (float)scalefactor::num / (float)scalefactor::den; NDArray output(input.shape); @@ -118,15 +112,12 @@ NDArray floatToDataType(NDArray input) { for (unsigned long i = 0; i < input.data.size(); i++) { if (inputPointer[i] > upperBound || inputPointer[i] < lowerBound) { - throw std::domain_error( - "One or more vectors contain values outside of [" + - std::to_string(lowerBound) + ", " + std::to_string(upperBound) + - "]. Index: " + std::to_string(i) + - ", invalid value: " + std::to_string(inputPointer[i])); + throw std::domain_error("One or more vectors contain values outside of [" + std::to_string(lowerBound) + ", " + + std::to_string(upperBound) + "]. Index: " + std::to_string(i) + + ", invalid value: " + std::to_string(inputPointer[i])); } - outputPointer[i] = - (inputPointer[i] * (float)scalefactor::den) / (float)scalefactor::num; + outputPointer[i] = (inputPointer[i] * (float)scalefactor::den) / (float)scalefactor::num; } return output; @@ -134,14 +125,12 @@ NDArray floatToDataType(NDArray input) { } template > -void floatToDataType(const float *inputPointer, data_t *outputPointer, - int dimensions) { +void floatToDataType(const float *inputPointer, data_t *outputPointer, int dimensions) { // Handle rescaling to integer storage values if necessary: if constexpr (std::is_same_v) { if constexpr (scalefactor::num != scalefactor::den) { - throw std::runtime_error( - "Index has a non-unity scale factor set, but is using float32 data " - "storage. This combination is not yet implemented."); + throw std::runtime_error("Index has a non-unity scale factor set, but is using float32 data " + "storage. This combination is not yet implemented."); } std::memcpy(outputPointer, inputPointer, sizeof(float) * dimensions); @@ -152,27 +141,22 @@ void floatToDataType(const float *inputPointer, data_t *outputPointer, } } else { // Re-scale the input values by multiplying by `scalefactor`: - constexpr float lowerBound = (float)std::numeric_limits::min() * - (float)scalefactor::num / - (float)scalefactor::den; - constexpr float upperBound = (float)std::numeric_limits::max() * - (float)scalefactor::num / - (float)scalefactor::den; + constexpr float lowerBound = + (float)std::numeric_limits::min() * (float)scalefactor::num / (float)scalefactor::den; + constexpr float upperBound = + (float)std::numeric_limits::max() * (float)scalefactor::num / (float)scalefactor::den; std::vector output(dimensions); // Re-scale the input values by multiplying by `scalefactor`: for (int i = 0; i < dimensions; i++) { if (inputPointer[i] > upperBound || inputPointer[i] < lowerBound) { - throw std::domain_error( - "One or more vectors contain values outside of [" + - std::to_string(lowerBound) + ", " + std::to_string(upperBound) + - "]. Index: " + std::to_string(i) + - ", invalid value: " + std::to_string(inputPointer[i])); + throw std::domain_error("One or more vectors contain values outside of [" + std::to_string(lowerBound) + ", " + + std::to_string(upperBound) + "]. Index: " + std::to_string(i) + + ", invalid value: " + std::to_string(inputPointer[i])); } - outputPointer[i] = - (inputPointer[i] * (float)scalefactor::den) / (float)scalefactor::num; + outputPointer[i] = (inputPointer[i] * (float)scalefactor::den) / (float)scalefactor::num; } } } @@ -181,17 +165,15 @@ template > std::vector floatToDataType(const std::vector input) { if constexpr (std::is_same_v) { if constexpr (scalefactor::num != scalefactor::den) { - throw std::runtime_error( - "Index has a non-unity scale factor set, but is using float32 data " - "storage. This combination is not yet implemented."); + throw std::runtime_error("Index has a non-unity scale factor set, but is using float32 data " + "storage. This combination is not yet implemented."); } return input; } std::vector output(input.size()); - floatToDataType(input.data(), output.data(), - input.size()); + floatToDataType(input.data(), output.data(), input.size()); return output; } @@ -200,9 +182,8 @@ NDArray dataTypeToFloat(NDArray input) { // Handle rescaling to integer storage values if necessary: if constexpr (std::is_same_v) { if constexpr (scalefactor::num != scalefactor::den) { - throw std::runtime_error( - "Index has a non-unity scale factor set, but is using float32 data " - "storage. This combination is not yet implemented."); + throw std::runtime_error("Index has a non-unity scale factor set, but is using float32 data " + "storage. This combination is not yet implemented."); } return input; @@ -214,22 +195,19 @@ NDArray dataTypeToFloat(NDArray input) { float *outputPointer = output.data.data(); for (unsigned long i = 0; i < input.data.size(); i++) { - outputPointer[i] = ((float)inputPointer[i] * (float)scalefactor::num) / - (float)scalefactor::den; + outputPointer[i] = ((float)inputPointer[i] * (float)scalefactor::num) / (float)scalefactor::den; } return output; } } -template > +template > void normalizeVector(const float *data, data_t *norm_array, int dimensions) { dist_t norm = 0.0; for (int i = 0; i < dimensions; i++) { if constexpr (scalefactor::num != scalefactor::den) { - dist_t point = (dist_t)(data[i] * (dist_t)scalefactor::num) / - (dist_t)scalefactor::den; + dist_t point = (dist_t)(data[i] * (dist_t)scalefactor::num) / (dist_t)scalefactor::den; norm += point * point; } else { norm += data[i] * data[i]; @@ -238,8 +216,7 @@ void normalizeVector(const float *data, data_t *norm_array, int dimensions) { norm = 1.0f / (sqrtf(norm) + 1e-30f); for (int i = 0; i < dimensions; i++) { if constexpr (scalefactor::num != scalefactor::den) { - dist_t element = - (data[i] * (dist_t)scalefactor::num) / (dist_t)scalefactor::den; + dist_t element = (data[i] * (dist_t)scalefactor::num) / (dist_t)scalefactor::den; dist_t normalizedElement = element * norm; norm_array[i] = (normalizedElement * scalefactor::den) / scalefactor::num; } else { @@ -249,14 +226,12 @@ void normalizeVector(const float *data, data_t *norm_array, int dimensions) { } } -template > +template > dist_t getNorm(const data_t *data, int dimensions) { dist_t norm = 0.0; for (int i = 0; i < dimensions; i++) { if constexpr (scalefactor::num != scalefactor::den) { - dist_t point = (dist_t)(data[i] * (dist_t)scalefactor::num) / - (dist_t)scalefactor::den; + dist_t point = (dist_t)(data[i] * (dist_t)scalefactor::num) / (dist_t)scalefactor::den; norm += point * point; } else { norm += data[i] * data[i]; @@ -265,14 +240,12 @@ dist_t getNorm(const data_t *data, int dimensions) { return sqrtf(norm); } -template > +template > bool isNormalized(const data_t *data, int dimensions, dist_t maxNorm) { dist_t norm = 0.0; for (int i = 0; i < dimensions; i++) { if constexpr (scalefactor::num != scalefactor::den) { - dist_t point = (dist_t)(data[i] * (dist_t)scalefactor::num) / - (dist_t)scalefactor::den; + dist_t point = (dist_t)(data[i] * (dist_t)scalefactor::num) / (dist_t)scalefactor::den; norm += point * point; } else { norm += data[i] * data[i]; @@ -281,15 +254,13 @@ bool isNormalized(const data_t *data, int dimensions, dist_t maxNorm) { return norm <= maxNorm; } -template > +template > std::string toFloatVectorString(data_t *vec, size_t size) { std::ostringstream ss; ss << "["; for (size_t i = 0; i < size; i++) { if constexpr (scalefactor::num != scalefactor::den) { - float point = (dist_t)(vec[i] * (dist_t)scalefactor::num) / - (dist_t)scalefactor::den; + float point = (dist_t)(vec[i] * (dist_t)scalefactor::num) / (dist_t)scalefactor::den; ss << ((float)point); } else { ss << ((float)vec[i]); @@ -303,9 +274,7 @@ std::string toFloatVectorString(data_t *vec, size_t size) { return ss.str(); } -template > +template > std::string toFloatVectorString(std::vector vec) { - return toFloatVectorString(vec.data(), - vec.size()); + return toFloatVectorString(vec.data(), vec.size()); } \ No newline at end of file diff --git a/cpp/src/cmake_install.cmake b/cpp/src/cmake_install.cmake deleted file mode 100644 index 66d3bbea..00000000 --- a/cpp/src/cmake_install.cmake +++ /dev/null @@ -1,39 +0,0 @@ -# Install script for directory: /Users/markkoh/spotify/voyager/cpp/src - -# Set the install prefix -if(NOT DEFINED CMAKE_INSTALL_PREFIX) - set(CMAKE_INSTALL_PREFIX "/usr/local") -endif() -string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") - -# Set the install configuration name. -if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) - if(BUILD_TYPE) - string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" - CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") - else() - set(CMAKE_INSTALL_CONFIG_NAME "") - endif() - message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") -endif() - -# Set the component getting installed. -if(NOT CMAKE_INSTALL_COMPONENT) - if(COMPONENT) - message(STATUS "Install component: \"${COMPONENT}\"") - set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") - else() - set(CMAKE_INSTALL_COMPONENT) - endif() -endif() - -# Is this installation the result of a crosscompile? -if(NOT DEFINED CMAKE_CROSSCOMPILING) - set(CMAKE_CROSSCOMPILING "FALSE") -endif() - -# Set default install directory permissions. -if(NOT DEFINED CMAKE_OBJDUMP) - set(CMAKE_OBJDUMP "/Library/Developer/CommandLineTools/usr/bin/objdump") -endif() - diff --git a/cpp/src/hnswalg.h b/cpp/src/hnswalg.h index ab9b27b4..80d3606b 100644 --- a/cpp/src/hnswalg.h +++ b/cpp/src/hnswalg.h @@ -40,8 +40,7 @@ class IndexCannotBeShrunkError : public std::runtime_error { public: - IndexCannotBeShrunkError(const std::string &what) - : std::runtime_error(what) {} + IndexCannotBeShrunkError(const std::string &what) : std::runtime_error(what) {} }; class IndexFullError : public std::runtime_error { @@ -53,22 +52,19 @@ namespace hnswlib { typedef unsigned int tableint; typedef unsigned int linklistsizeint; -template -class HierarchicalNSW : public AlgorithmInterface { +template class HierarchicalNSW : public AlgorithmInterface { public: static const tableint max_update_element_locks = 65536; - HierarchicalNSW(Space *s, - std::shared_ptr inputStream, - size_t max_elements = 0, bool search_only = false) + HierarchicalNSW(Space *s, std::shared_ptr inputStream, size_t max_elements = 0, + bool search_only = false) : search_only_(search_only) { loadIndex(inputStream, s, max_elements); } - HierarchicalNSW(Space *s, size_t max_elements, size_t M = 16, - size_t ef_construction = 200, size_t random_seed = 100) - : link_list_locks_(max_elements), - link_list_update_locks_(max_update_element_locks), + HierarchicalNSW(Space *s, size_t max_elements, size_t M = 16, size_t ef_construction = 200, + size_t random_seed = 100) + : link_list_locks_(max_elements), link_list_update_locks_(max_update_element_locks), element_levels_(max_elements) { max_elements_ = max_elements; @@ -86,14 +82,12 @@ class HierarchicalNSW : public AlgorithmInterface { update_probability_generator_.seed(random_seed + 1); size_links_level0_ = maxM0_ * sizeof(tableint) + sizeof(linklistsizeint); - size_data_per_element_ = - size_links_level0_ + data_size_ + sizeof(labeltype); + size_data_per_element_ = size_links_level0_ + data_size_ + sizeof(labeltype); offsetData_ = size_links_level0_; label_offset_ = size_links_level0_ + data_size_; offsetLevel0_ = 0; - data_level0_memory_ = - (char *)malloc(max_elements_ * size_data_per_element_); + data_level0_memory_ = (char *)malloc(max_elements_ * size_data_per_element_); if (data_level0_memory_ == nullptr) throw std::runtime_error("Not enough memory"); @@ -107,18 +101,15 @@ class HierarchicalNSW : public AlgorithmInterface { linkLists_ = (char **)malloc(sizeof(void *) * max_elements_); if (linkLists_ == nullptr) - throw std::runtime_error( - "Not enough memory: HierarchicalNSW failed to allocate linklists"); - size_links_per_element_ = - maxM_ * sizeof(tableint) + sizeof(linklistsizeint); + throw std::runtime_error("Not enough memory: HierarchicalNSW failed to allocate linklists"); + size_links_per_element_ = maxM_ * sizeof(tableint) + sizeof(linklistsizeint); mult_ = 1 / log(1.0 * M_); revSize_ = 1.0 / mult_; } struct CompareByFirst { - constexpr bool - operator()(std::pair const &a, - std::pair const &b) const noexcept { + constexpr bool operator()(std::pair const &a, + std::pair const &b) const noexcept { return a.first < b.first; } }; @@ -181,28 +172,21 @@ class HierarchicalNSW : public AlgorithmInterface { inline labeltype getExternalLabel(tableint internal_id) const { labeltype return_label; - memcpy(&return_label, - (data_level0_memory_ + internal_id * size_data_per_element_ + - label_offset_), + memcpy(&return_label, (data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_), sizeof(labeltype)); return return_label; } inline void setExternalLabel(tableint internal_id, labeltype label) const { - memcpy((data_level0_memory_ + internal_id * size_data_per_element_ + - label_offset_), - &label, sizeof(labeltype)); + memcpy((data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_), &label, sizeof(labeltype)); } inline labeltype *getExternalLabeLp(tableint internal_id) const { - return (labeltype *)(data_level0_memory_ + - internal_id * size_data_per_element_ + label_offset_); + return (labeltype *)(data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_); } inline data_t *getDataByInternalId(tableint internal_id) const { - return reinterpret_cast(data_level0_memory_ + - internal_id * size_data_per_element_ + - offsetData_); + return reinterpret_cast(data_level0_memory_ + internal_id * size_data_per_element_ + offsetData_); } int getRandomLevel(double reverse_size) { @@ -211,10 +195,8 @@ class HierarchicalNSW : public AlgorithmInterface { return (int)r; } - std::priority_queue, - std::vector>, CompareByFirst> - searchBaseLayer(tableint ep_id, const data_t *data_point, int layer, - VisitedList *vl = nullptr) { + std::priority_queue, std::vector>, CompareByFirst> + searchBaseLayer(tableint ep_id, const data_t *data_point, int layer, VisitedList *vl = nullptr) { bool wasPassedVisitedList = vl != nullptr; if (!wasPassedVisitedList) { vl = visited_list_pool_->getFreeVisitedList(); @@ -225,19 +207,14 @@ class HierarchicalNSW : public AlgorithmInterface { vl_type *visited_array = vl->mass; vl_type visited_array_tag = vl->curV; - std::priority_queue, - std::vector>, - CompareByFirst> + std::priority_queue, std::vector>, CompareByFirst> top_candidates; - std::priority_queue, - std::vector>, - CompareByFirst> + std::priority_queue, std::vector>, CompareByFirst> candidateSet; dist_t lowerBound; if (!isMarkedDeleted(ep_id)) { - dist_t dist = fstdistfunc_(data_point, getDataByInternalId(ep_id), - dist_func_param_); + dist_t dist = fstdistfunc_(data_point, getDataByInternalId(ep_id), dist_func_param_); top_candidates.emplace(dist, ep_id); lowerBound = dist; candidateSet.emplace(-dist, ep_id); @@ -249,8 +226,7 @@ class HierarchicalNSW : public AlgorithmInterface { while (!candidateSet.empty()) { std::pair curr_el_pair = candidateSet.top(); - if ((-curr_el_pair.first) > lowerBound && - top_candidates.size() == ef_construction_) { + if ((-curr_el_pair.first) > lowerBound && top_candidates.size() == ef_construction_) { break; } candidateSet.pop(); @@ -306,10 +282,8 @@ class HierarchicalNSW : public AlgorithmInterface { mutable std::atomic metric_hops; template - std::priority_queue, - std::vector>, CompareByFirst> - searchBaseLayerST(tableint ep_id, const data_t *data_point, size_t ef, - VisitedList *vl = nullptr) const { + std::priority_queue, std::vector>, CompareByFirst> + searchBaseLayerST(tableint ep_id, const data_t *data_point, size_t ef, VisitedList *vl = nullptr) const { bool wasPassedVisitedList = vl != nullptr; if (!wasPassedVisitedList) { vl = visited_list_pool_->getFreeVisitedList(); @@ -320,19 +294,14 @@ class HierarchicalNSW : public AlgorithmInterface { vl_type *visited_array = vl->mass; vl_type visited_array_tag = vl->curV; - std::priority_queue, - std::vector>, - CompareByFirst> + std::priority_queue, std::vector>, CompareByFirst> top_candidates; - std::priority_queue, - std::vector>, - CompareByFirst> + std::priority_queue, std::vector>, CompareByFirst> candidate_set; dist_t lowerBound; if (!has_deletions || !isMarkedDeleted(ep_id)) { - dist_t dist = fstdistfunc_(data_point, getDataByInternalId(ep_id), - dist_func_param_); + dist_t dist = fstdistfunc_(data_point, getDataByInternalId(ep_id), dist_func_param_); lowerBound = dist; top_candidates.emplace(dist, ep_id); candidate_set.emplace(-dist, ep_id); @@ -347,8 +316,7 @@ class HierarchicalNSW : public AlgorithmInterface { std::pair current_node_pair = candidate_set.top(); - if ((-current_node_pair.first) > lowerBound && - (top_candidates.size() == ef || has_deletions == false)) { + if ((-current_node_pair.first) > lowerBound && (top_candidates.size() == ef || has_deletions == false)) { break; } candidate_set.pop(); @@ -395,9 +363,8 @@ class HierarchicalNSW : public AlgorithmInterface { } void getNeighborsByHeuristic2( - std::priority_queue, - std::vector>, - CompareByFirst> &top_candidates, + std::priority_queue, std::vector>, CompareByFirst> + &top_candidates, const size_t M) { if (top_candidates.size() < M) { return; @@ -406,8 +373,7 @@ class HierarchicalNSW : public AlgorithmInterface { std::priority_queue> queue_closest; std::vector> return_list; while (top_candidates.size() > 0) { - queue_closest.emplace(-top_candidates.top().first, - top_candidates.top().second); + queue_closest.emplace(-top_candidates.top().first, top_candidates.top().second); top_candidates.pop(); } @@ -420,8 +386,7 @@ class HierarchicalNSW : public AlgorithmInterface { bool good = true; for (std::pair second_pair : return_list) { - dist_t curdist = fstdistfunc_(getDataByInternalId(second_pair.second), - getDataByInternalId(curent_pair.second), + dist_t curdist = fstdistfunc_(getDataByInternalId(second_pair.second), getDataByInternalId(curent_pair.second), dist_func_param_); ; if (curdist < dist_to_query) { @@ -440,40 +405,30 @@ class HierarchicalNSW : public AlgorithmInterface { } linklistsizeint *get_linklist0(tableint internal_id) const { - return (linklistsizeint *)(data_level0_memory_ + - internal_id * size_data_per_element_ + - offsetLevel0_); + return (linklistsizeint *)(data_level0_memory_ + internal_id * size_data_per_element_ + offsetLevel0_); }; - linklistsizeint *get_linklist0(tableint internal_id, - char *data_level0_memory_) const { - return (linklistsizeint *)(data_level0_memory_ + - internal_id * size_data_per_element_ + - offsetLevel0_); + linklistsizeint *get_linklist0(tableint internal_id, char *data_level0_memory_) const { + return (linklistsizeint *)(data_level0_memory_ + internal_id * size_data_per_element_ + offsetLevel0_); }; linklistsizeint *get_linklist(tableint internal_id, int level) const { - return (linklistsizeint *)(linkLists_[internal_id] + - (level - 1) * size_links_per_element_); + return (linklistsizeint *)(linkLists_[internal_id] + (level - 1) * size_links_per_element_); }; - linklistsizeint *get_linklist_at_level(tableint internal_id, - int level) const { - return level == 0 ? get_linklist0(internal_id) - : get_linklist(internal_id, level); + linklistsizeint *get_linklist_at_level(tableint internal_id, int level) const { + return level == 0 ? get_linklist0(internal_id) : get_linklist(internal_id, level); }; tableint mutuallyConnectNewElement( const data_t *data_point, tableint cur_c, - std::priority_queue, - std::vector>, - CompareByFirst> &top_candidates, + std::priority_queue, std::vector>, CompareByFirst> + &top_candidates, int level, bool isUpdate) { size_t Mcurmax = level ? maxM_ : maxM0_; getNeighborsByHeuristic2(top_candidates, M_); if (top_candidates.size() > M_) - throw std::runtime_error( - "Should be not be more than M_ candidates returned by the heuristic"); + throw std::runtime_error("Should be not be more than M_ candidates returned by the heuristic"); std::vector selectedNeighbors; selectedNeighbors.reserve(M_); @@ -492,8 +447,7 @@ class HierarchicalNSW : public AlgorithmInterface { ll_cur = get_linklist(cur_c, level); if (*ll_cur && !isUpdate) { - throw std::runtime_error( - "The newly inserted element should have blank link list"); + throw std::runtime_error("The newly inserted element should have blank link list"); } setListCount(ll_cur, selectedNeighbors.size()); tableint *data = (tableint *)(ll_cur + 1); @@ -501,8 +455,7 @@ class HierarchicalNSW : public AlgorithmInterface { if (data[idx] && !isUpdate) throw std::runtime_error("Possible memory corruption"); if (level > element_levels_[selectedNeighbors[idx]]) - throw std::runtime_error( - "Trying to make a link on a non-existent level"); + throw std::runtime_error("Trying to make a link on a non-existent level"); data[idx] = selectedNeighbors[idx]; } @@ -510,8 +463,7 @@ class HierarchicalNSW : public AlgorithmInterface { for (size_t idx = 0; idx < selectedNeighbors.size(); idx++) { - std::unique_lock lock( - link_list_locks_[selectedNeighbors[idx]]); + std::unique_lock lock(link_list_locks_[selectedNeighbors[idx]]); linklistsizeint *ll_other; if (level == 0) @@ -526,8 +478,7 @@ class HierarchicalNSW : public AlgorithmInterface { if (selectedNeighbors[idx] == cur_c) throw std::runtime_error("Trying to connect an element to itself"); if (level > element_levels_[selectedNeighbors[idx]]) - throw std::runtime_error( - "Trying to make a link on a non-existent level"); + throw std::runtime_error("Trying to make a link on a non-existent level"); tableint *data = (tableint *)(ll_other + 1); @@ -550,22 +501,17 @@ class HierarchicalNSW : public AlgorithmInterface { setListCount(ll_other, sz_link_list_other + 1); } else { // finding the "weakest" element to replace it with the new one - dist_t d_max = fstdistfunc_( - getDataByInternalId(cur_c), - getDataByInternalId(selectedNeighbors[idx]), dist_func_param_); + dist_t d_max = + fstdistfunc_(getDataByInternalId(cur_c), getDataByInternalId(selectedNeighbors[idx]), dist_func_param_); // Heuristic: - std::priority_queue, - std::vector>, - CompareByFirst> + std::priority_queue, std::vector>, CompareByFirst> candidates; candidates.emplace(d_max, cur_c); for (size_t j = 0; j < sz_link_list_other; j++) { - candidates.emplace( - fstdistfunc_(getDataByInternalId(data[j]), - getDataByInternalId(selectedNeighbors[idx]), - dist_func_param_), - data[j]); + candidates.emplace(fstdistfunc_(getDataByInternalId(data[j]), getDataByInternalId(selectedNeighbors[idx]), + dist_func_param_), + data[j]); } getNeighborsByHeuristic2(candidates, Mcurmax); @@ -602,14 +548,13 @@ class HierarchicalNSW : public AlgorithmInterface { void setEf(size_t ef) { ef_ = ef; } - std::priority_queue> - searchKnnInternal(data_t *query_data, int k, VisitedList *vl = nullptr) { + std::priority_queue> searchKnnInternal(data_t *query_data, int k, + VisitedList *vl = nullptr) { std::priority_queue> top_candidates; if (cur_element_count == 0) return top_candidates; tableint currObj = enterpoint_node_; - dist_t curdist = fstdistfunc_( - query_data, getDataByInternalId(enterpoint_node_), dist_func_param_); + dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_); for (size_t level = maxlevel_; level > 0; level--) { bool changed = true; @@ -623,8 +568,7 @@ class HierarchicalNSW : public AlgorithmInterface { tableint cand = datal[i]; if (cand < 0 || cand > max_elements_) throw std::runtime_error("cand error"); - dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), - dist_func_param_); + dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_); if (d < curdist) { curdist = d; @@ -653,15 +597,13 @@ class HierarchicalNSW : public AlgorithmInterface { void resizeIndex(size_t new_max_elements) { if (search_only_) - throw std::runtime_error( - "resizeIndex is not supported in search only mode"); + throw std::runtime_error("resizeIndex is not supported in search only mode"); std::unique_lock lock(resizeLock); if (new_max_elements < cur_element_count) - throw IndexCannotBeShrunkError( - "Cannot resize to " + std::to_string(new_max_elements) + - " elements, as this index already contains " + - std::to_string(cur_element_count) + " elements."); + throw IndexCannotBeShrunkError("Cannot resize to " + std::to_string(new_max_elements) + + " elements, as this index already contains " + std::to_string(cur_element_count) + + " elements."); delete visited_list_pool_; visited_list_pool_ = new VisitedListPool(1, new_max_elements); @@ -671,27 +613,21 @@ class HierarchicalNSW : public AlgorithmInterface { std::vector(new_max_elements).swap(link_list_locks_); // Reallocate base layer - char *data_level0_memory_new = (char *)realloc( - data_level0_memory_, new_max_elements * size_data_per_element_); + char *data_level0_memory_new = (char *)realloc(data_level0_memory_, new_max_elements * size_data_per_element_); if (data_level0_memory_new == nullptr) - throw std::runtime_error( - "Not enough memory: resizeIndex failed to allocate base layer"); + throw std::runtime_error("Not enough memory: resizeIndex failed to allocate base layer"); data_level0_memory_ = data_level0_memory_new; // Reallocate all other layers - char **linkLists_new = - (char **)realloc(linkLists_, sizeof(void *) * new_max_elements); + char **linkLists_new = (char **)realloc(linkLists_, sizeof(void *) * new_max_elements); if (linkLists_new == nullptr) - throw std::runtime_error( - "Not enough memory: resizeIndex failed to allocate other layers"); + throw std::runtime_error("Not enough memory: resizeIndex failed to allocate other layers"); linkLists_ = linkLists_new; max_elements_ = new_max_elements; } - void saveIndex(const std::string &filename) { - saveIndex(std::make_shared(filename)); - } + void saveIndex(const std::string &filename) { saveIndex(std::make_shared(filename)); } void saveIndex(std::shared_ptr output) { writeBinaryPOD(output, offsetLevel0_); @@ -709,21 +645,17 @@ class HierarchicalNSW : public AlgorithmInterface { writeBinaryPOD(output, mult_); writeBinaryPOD(output, ef_construction_); - output->write(data_level0_memory_, - cur_element_count * size_data_per_element_); + output->write(data_level0_memory_, cur_element_count * size_data_per_element_); for (size_t i = 0; i < cur_element_count; i++) { - unsigned int linkListSize = - element_levels_[i] > 0 ? size_links_per_element_ * element_levels_[i] - : 0; + unsigned int linkListSize = element_levels_[i] > 0 ? size_links_per_element_ * element_levels_[i] : 0; writeBinaryPOD(output, linkListSize); if (linkListSize) output->write(linkLists_[i], linkListSize); } } - void loadIndex(std::shared_ptr inputStream, - Space *s, size_t max_elements_i = 0) { + void loadIndex(std::shared_ptr inputStream, Space *s, size_t max_elements_i = 0) { size_t totalFileSize = 0; if (inputStream->isSeekable()) { totalFileSize = inputStream->getTotalLength(); @@ -732,8 +664,7 @@ class HierarchicalNSW : public AlgorithmInterface { if (totalFileSize > 0 && offsetLevel0_ > totalFileSize) { throw std::domain_error("Index appears to contain corrupted data; level " "0 offset parameter (" + - std::to_string(offsetLevel0_) + - ") exceeded size of index file (" + + std::to_string(offsetLevel0_) + ") exceeded size of index file (" + std::to_string(totalFileSize) + ")."); } @@ -751,12 +682,10 @@ class HierarchicalNSW : public AlgorithmInterface { readBinaryPOD(inputStream, enterpoint_node_); if (enterpoint_node_ >= cur_element_count) { - throw std::runtime_error( - "Index seems to be corrupted or unsupported. " - "Entry point into HNSW data structure was at element index " + - std::to_string(enterpoint_node_) + ", but only " + - std::to_string(cur_element_count) + - " elements are present in the index."); + throw std::runtime_error("Index seems to be corrupted or unsupported. " + "Entry point into HNSW data structure was at element index " + + std::to_string(enterpoint_node_) + ", but only " + std::to_string(cur_element_count) + + " elements are present in the index."); } readBinaryPOD(inputStream, maxM_); @@ -769,22 +698,18 @@ class HierarchicalNSW : public AlgorithmInterface { fstdistfunc_ = s->get_dist_func(); dist_func_param_ = s->get_dist_func_param(); - size_links_per_element_ = - maxM_ * sizeof(tableint) + sizeof(linklistsizeint); + size_links_per_element_ = maxM_ * sizeof(tableint) + sizeof(linklistsizeint); size_links_level0_ = maxM0_ * sizeof(tableint) + sizeof(linklistsizeint); - size_t expected_size_per_element = - size_links_level0_ + data_size_ + sizeof(labeltype); + size_t expected_size_per_element = size_links_level0_ + data_size_ + sizeof(labeltype); if (size_data_per_element_ != expected_size_per_element) { - throw std::domain_error( - "Storage data type does not match the index data being loaded; " - "expected " + - std::to_string(expected_size_per_element) + - " bytes per element, but loaded data contains " + - std::to_string(size_data_per_element_) + - " bytes per element. Data being loaded might not be a Voyager index, " - "may be corrupt, or may be using a different storage data type."); + throw std::domain_error("Storage data type does not match the index data being loaded; " + "expected " + + std::to_string(expected_size_per_element) + + " bytes per element, but loaded data contains " + std::to_string(size_data_per_element_) + + " bytes per element. Data being loaded might not be a Voyager index, " + "may be corrupt, or may be using a different storage data type."); } long long position = inputStream->getPosition(); @@ -792,68 +717,55 @@ class HierarchicalNSW : public AlgorithmInterface { if (inputStream->isSeekable()) { inputStream->advanceBy(cur_element_count * size_data_per_element_); for (size_t i = 0; i < cur_element_count; i++) { - if (inputStream->getPosition() < 0 || - inputStream->getPosition() >= (long long)totalFileSize) { + if (inputStream->getPosition() < 0 || inputStream->getPosition() >= (long long)totalFileSize) { throw std::runtime_error( "Index seems to be corrupted or unsupported. Seeked to " + - std::to_string(position + - (cur_element_count * size_data_per_element_) + - (sizeof(unsigned int) * i)) + + std::to_string(position + (cur_element_count * size_data_per_element_) + (sizeof(unsigned int) * i)) + " bytes to read linked list, but resulting stream position was " + - std::to_string(inputStream->getPosition()) + - " (of total file size " + std::to_string(totalFileSize) + + std::to_string(inputStream->getPosition()) + " (of total file size " + std::to_string(totalFileSize) + " bytes)."); } unsigned int linkListSize; readBinaryPOD(inputStream, linkListSize); if (linkListSize != 0) { - if ((size_t)inputStream->getPosition() + linkListSize > - totalFileSize) { - throw std::runtime_error( - "Index seems to be corrupted or unsupported. Advancing to the " - "next linked list requires " + - std::to_string(linkListSize) + - " additional bytes (from position " + - std::to_string(inputStream->getPosition()) + - "), but index data only has " + std::to_string(totalFileSize) + - " bytes in total."); + if ((size_t)inputStream->getPosition() + linkListSize > totalFileSize) { + throw std::runtime_error("Index seems to be corrupted or unsupported. Advancing to the " + "next linked list requires " + + std::to_string(linkListSize) + " additional bytes (from position " + + std::to_string(inputStream->getPosition()) + "), but index data only has " + + std::to_string(totalFileSize) + " bytes in total."); } inputStream->advanceBy(linkListSize); } } if (inputStream->getPosition() != (long long)totalFileSize) - throw std::runtime_error( - "Index seems to be corrupted or unsupported. After reading all " - "linked lists, extra data remained at the end of the index."); + throw std::runtime_error("Index seems to be corrupted or unsupported. After reading all " + "linked lists, extra data remained at the end of the index."); inputStream->setPosition(position); } data_level0_memory_ = (char *)malloc(max_elements * size_data_per_element_); if (data_level0_memory_ == nullptr) { - throw std::runtime_error( - "Not enough memory: loadIndex failed to allocate level0 (" + - std::to_string(max_elements * size_data_per_element_) + " bytes)"); + throw std::runtime_error("Not enough memory: loadIndex failed to allocate level0 (" + + std::to_string(max_elements * size_data_per_element_) + " bytes)"); } { size_t bytes_to_read = cur_element_count * size_data_per_element_; size_t bytes_read = inputStream->read(data_level0_memory_, bytes_to_read); if (bytes_read != bytes_to_read) { - throw std::runtime_error("Tried to read " + - std::to_string(bytes_to_read) + - " bytes from stream, but only received " + - std::to_string(bytes_read) + " bytes!"); + throw std::runtime_error("Tried to read " + std::to_string(bytes_to_read) + + " bytes from stream, but only received " + std::to_string(bytes_read) + " bytes!"); } } linkLists_ = (char **)malloc(sizeof(void *) * max_elements); if (linkLists_ == nullptr) - throw std::runtime_error( - "Not enough memory: loadIndex failed to allocate linklists (" + - std::to_string(sizeof(void *) * max_elements) + " bytes)"); + throw std::runtime_error("Not enough memory: loadIndex failed to allocate linklists (" + + std::to_string(sizeof(void *) * max_elements) + " bytes)"); size_t linkListBufferSize = sizeof(void *) * max_elements; std::vector linkListBuffer(linkListBufferSize); @@ -863,8 +775,7 @@ class HierarchicalNSW : public AlgorithmInterface { while (true) { long long bytes_to_read = linkListBuffer.size() - bytes_read; - long long bytes_read_this_iteration = inputStream->read( - linkListBuffer.data() + bytes_read, bytes_to_read); + long long bytes_read_this_iteration = inputStream->read(linkListBuffer.data() + bytes_read, bytes_to_read); if (bytes_read_this_iteration > 0) { bytes_read += bytes_read_this_iteration; @@ -877,11 +788,10 @@ class HierarchicalNSW : public AlgorithmInterface { try { linkListBuffer.resize(linkListBuffer.size() * 2); } catch (std::exception const &e) { - throw std::runtime_error( - "Failed to resize linked list buffer to " - "double its previous size (from " + - std::to_string(linkListBuffer.size()) + " to " + - std::to_string(linkListBuffer.size() * 2) + ")"); + throw std::runtime_error("Failed to resize linked list buffer to " + "double its previous size (from " + + std::to_string(linkListBuffer.size()) + " to " + + std::to_string(linkListBuffer.size() * 2) + ")"); } } else { // We've hit the end of the stream (as we read fewer bytes than asked @@ -900,8 +810,7 @@ class HierarchicalNSW : public AlgorithmInterface { if (!search_only_) { std::vector(max_elements).swap(link_list_locks_); - std::vector(max_update_element_locks) - .swap(link_list_update_locks_); + std::vector(max_update_element_locks).swap(link_list_update_locks_); } visited_list_pool_ = new VisitedListPool(1, max_elements); @@ -927,23 +836,17 @@ class HierarchicalNSW : public AlgorithmInterface { element_levels_[i] = linkListSize / size_links_per_element_; linkLists_[i] = (char *)malloc(linkListSize); if (linkLists_[i] == nullptr) - throw std::runtime_error( - "Not enough memory: loadIndex failed to allocate linklist"); + throw std::runtime_error("Not enough memory: loadIndex failed to allocate linklist"); - std::memcpy(linkLists_[i], - (linkListBuffer.data() + indexInLinkListBuffer), - linkListSize); + std::memcpy(linkLists_[i], (linkListBuffer.data() + indexInLinkListBuffer), linkListSize); indexInLinkListBuffer += linkListSize; } } - if (enterpoint_node_ > 0 && enterpoint_node_ != (tableint)-1 && - !linkLists_[enterpoint_node_]) { - throw std::runtime_error( - "Index seems to be corrupted or unsupported. " - "Entry point into HNSW data structure was at element index " + - std::to_string(enterpoint_node_) + - ", but no linked list was present at that index."); + if (enterpoint_node_ > 0 && enterpoint_node_ != (tableint)-1 && !linkLists_[enterpoint_node_]) { + throw std::runtime_error("Index seems to be corrupted or unsupported. " + "Entry point into HNSW data structure was at element index " + + std::to_string(enterpoint_node_) + ", but no linked list was present at that index."); } for (size_t i = 0; i < cur_element_count; i++) { @@ -958,14 +861,12 @@ class HierarchicalNSW : public AlgorithmInterface { std::vector getDataByLabel(labeltype label) const { if (search_only_) - throw std::runtime_error( - "getDataByLabel is not supported in search only mode"); + throw std::runtime_error("getDataByLabel is not supported in search only mode"); tableint label_c; auto search = label_lookup_.find(label); if (search == label_lookup_.end() || isMarkedDeleted(search->second)) { - throw std::runtime_error("Label " + std::to_string(label) + - " not found in index."); + throw std::runtime_error("Label " + std::to_string(label) + " not found in index."); } label_c = search->second; @@ -1004,8 +905,7 @@ class HierarchicalNSW : public AlgorithmInterface { } internalIDB = search->second; - return fstdistfunc_(getDataByInternalId(internalIDA), - getDataByInternalId(internalIDB), dist_func_param_); + return fstdistfunc_(getDataByInternalId(internalIDA), getDataByInternalId(internalIDB), dist_func_param_); } static const unsigned char DELETE_MARK = 0x01; @@ -1017,8 +917,7 @@ class HierarchicalNSW : public AlgorithmInterface { */ void markDelete(labeltype label) { if (search_only_) - throw std::runtime_error( - "markDelete is not supported in search only mode"); + throw std::runtime_error("markDelete is not supported in search only mode"); auto search = label_lookup_.find(label); if (search == label_lookup_.end()) { @@ -1041,8 +940,7 @@ class HierarchicalNSW : public AlgorithmInterface { *ll_cur |= DELETE_MARK; num_deleted_ += 1; } else { - throw std::runtime_error( - "The requested to delete element is already deleted"); + throw std::runtime_error("The requested to delete element is already deleted"); } } @@ -1071,8 +969,7 @@ class HierarchicalNSW : public AlgorithmInterface { *ll_cur &= ~DELETE_MARK; num_deleted_ -= 1; } else { - throw std::runtime_error( - "The requested to undelete element is not deleted"); + throw std::runtime_error("The requested to undelete element is not deleted"); } } @@ -1087,9 +984,7 @@ class HierarchicalNSW : public AlgorithmInterface { return *ll_cur & DELETE_MARK; } - unsigned short int getListCount(linklistsizeint *ptr) const { - return *((unsigned short int *)ptr); - } + unsigned short int getListCount(linklistsizeint *ptr) const { return *((unsigned short int *)ptr); } void setListCount(linklistsizeint *ptr, unsigned short int size) const { *((unsigned short int *)(ptr)) = *((unsigned short int *)&size); @@ -1102,8 +997,7 @@ class HierarchicalNSW : public AlgorithmInterface { addPoint(data_point, label, -1); } - void updatePoint(const data_t *dataPoint, tableint internalId, - float updateNeighborProbability) { + void updatePoint(const data_t *dataPoint, tableint internalId, float updateNeighborProbability) { // update the feature vector associated with existing point with new vector memcpy(getDataByInternalId(internalId), dataPoint, data_size_); @@ -1119,8 +1013,7 @@ class HierarchicalNSW : public AlgorithmInterface { for (int layer = 0; layer <= elemLevel; layer++) { std::unordered_set sCand; std::unordered_set sNeigh; - std::vector listOneHop = - getConnectionsWithLock(internalId, layer); + std::vector listOneHop = getConnectionsWithLock(internalId, layer); if (listOneHop.size() == 0) continue; @@ -1129,14 +1022,12 @@ class HierarchicalNSW : public AlgorithmInterface { for (auto &&elOneHop : listOneHop) { sCand.insert(elOneHop); - if (distribution(update_probability_generator_) > - updateNeighborProbability) + if (distribution(update_probability_generator_) > updateNeighborProbability) continue; sNeigh.insert(elOneHop); - std::vector listTwoHop = - getConnectionsWithLock(elOneHop, layer); + std::vector listTwoHop = getConnectionsWithLock(elOneHop, layer); for (auto &&elTwoHop : listTwoHop) { sCand.insert(elTwoHop); } @@ -1146,22 +1037,16 @@ class HierarchicalNSW : public AlgorithmInterface { // if (neigh == internalId) // continue; - std::priority_queue, - std::vector>, - CompareByFirst> + std::priority_queue, std::vector>, CompareByFirst> candidates; size_t size = - sCand.find(neigh) == sCand.end() - ? sCand.size() - : sCand.size() - 1; // sCand guaranteed to have size >= 1 + sCand.find(neigh) == sCand.end() ? sCand.size() : sCand.size() - 1; // sCand guaranteed to have size >= 1 size_t elementsToKeep = std::min(ef_construction_, size); for (auto &&cand : sCand) { if (cand == neigh) continue; - dist_t distance = - fstdistfunc_(getDataByInternalId(neigh), - getDataByInternalId(cand), dist_func_param_); + dist_t distance = fstdistfunc_(getDataByInternalId(neigh), getDataByInternalId(cand), dist_func_param_); if (candidates.size() < elementsToKeep) { candidates.emplace(distance, cand); } else { @@ -1190,18 +1075,14 @@ class HierarchicalNSW : public AlgorithmInterface { } } - repairConnectionsForUpdate(dataPoint, entryPointCopy, internalId, elemLevel, - maxLevelCopy); + repairConnectionsForUpdate(dataPoint, entryPointCopy, internalId, elemLevel, maxLevelCopy); }; - void repairConnectionsForUpdate(const data_t *dataPoint, - tableint entryPointInternalId, - tableint dataPointInternalId, + void repairConnectionsForUpdate(const data_t *dataPoint, tableint entryPointInternalId, tableint dataPointInternalId, int dataPointLevel, int maxLevel) { tableint currObj = entryPointInternalId; if (dataPointLevel < maxLevel) { - dist_t curdist = fstdistfunc_(dataPoint, getDataByInternalId(currObj), - dist_func_param_); + dist_t curdist = fstdistfunc_(dataPoint, getDataByInternalId(currObj), dist_func_param_); for (int level = maxLevel; level > dataPointLevel; level--) { bool changed = true; while (changed) { @@ -1213,8 +1094,7 @@ class HierarchicalNSW : public AlgorithmInterface { tableint *datal = (tableint *)(data + 1); for (int i = 0; i < size; i++) { tableint cand = datal[i]; - dist_t d = fstdistfunc_(dataPoint, getDataByInternalId(cand), - dist_func_param_); + dist_t d = fstdistfunc_(dataPoint, getDataByInternalId(cand), dist_func_param_); if (d < curdist) { curdist = d; currObj = cand; @@ -1226,18 +1106,13 @@ class HierarchicalNSW : public AlgorithmInterface { } if (dataPointLevel > maxLevel) - throw std::runtime_error( - "Level of item to be updated cannot be bigger than max level"); + throw std::runtime_error("Level of item to be updated cannot be bigger than max level"); for (int level = dataPointLevel; level >= 0; level--) { - std::priority_queue, - std::vector>, - CompareByFirst> + std::priority_queue, std::vector>, CompareByFirst> topCandidates = searchBaseLayer(currObj, dataPoint, level); - std::priority_queue, - std::vector>, - CompareByFirst> + std::priority_queue, std::vector>, CompareByFirst> filteredTopCandidates; while (topCandidates.size() > 0) { if (topCandidates.top().second != dataPointInternalId) @@ -1254,15 +1129,13 @@ class HierarchicalNSW : public AlgorithmInterface { bool epDeleted = isMarkedDeleted(entryPointInternalId); if (epDeleted) { filteredTopCandidates.emplace( - fstdistfunc_(dataPoint, getDataByInternalId(entryPointInternalId), - dist_func_param_), + fstdistfunc_(dataPoint, getDataByInternalId(entryPointInternalId), dist_func_param_), entryPointInternalId); if (filteredTopCandidates.size() > ef_construction_) filteredTopCandidates.pop(); } - currObj = mutuallyConnectNewElement(dataPoint, dataPointInternalId, - filteredTopCandidates, level, true); + currObj = mutuallyConnectNewElement(dataPoint, dataPointInternalId, filteredTopCandidates, level, true); } } } @@ -1289,8 +1162,8 @@ class HierarchicalNSW : public AlgorithmInterface { tableint existingInternalId = search->second; templock_curr.unlock(); - std::unique_lock lock_el_update(link_list_update_locks_[( - existingInternalId & (max_update_element_locks - 1))]); + std::unique_lock lock_el_update( + link_list_update_locks_[(existingInternalId & (max_update_element_locks - 1))]); if (isMarkedDeleted(existingInternalId)) { unmarkDeletedInternal(existingInternalId); @@ -1301,13 +1174,11 @@ class HierarchicalNSW : public AlgorithmInterface { } if (cur_element_count >= max_elements_) { - throw IndexFullError( - "Cannot insert elements; this index already contains " + - std::to_string(cur_element_count) + - " elements, and its maximum size is " + - std::to_string(max_elements_) + - ". Call resizeIndex first to increase the maximum size of the " - "index."); + throw IndexFullError("Cannot insert elements; this index already contains " + + std::to_string(cur_element_count) + " elements, and its maximum size is " + + std::to_string(max_elements_) + + ". Call resizeIndex first to increase the maximum size of the " + "index."); }; cur_c = cur_element_count; @@ -1317,8 +1188,7 @@ class HierarchicalNSW : public AlgorithmInterface { // Take update lock to prevent race conditions on an element with // insertion/update at the same time. - std::unique_lock lock_el_update( - link_list_update_locks_[(cur_c & (max_update_element_locks - 1))]); + std::unique_lock lock_el_update(link_list_update_locks_[(cur_c & (max_update_element_locks - 1))]); std::unique_lock lock_el(link_list_locks_[cur_c]); int curlevel = getRandomLevel(mult_); if (level > 0) @@ -1333,19 +1203,16 @@ class HierarchicalNSW : public AlgorithmInterface { tableint currObj = enterpoint_node_; tableint enterpoint_copy = enterpoint_node_; - memset(data_level0_memory_ + cur_c * size_data_per_element_ + offsetLevel0_, - 0, size_data_per_element_); + memset(data_level0_memory_ + cur_c * size_data_per_element_ + offsetLevel0_, 0, size_data_per_element_); // Initialisation of the data and label memcpy(getExternalLabeLp(cur_c), &label, sizeof(labeltype)); memcpy(getDataByInternalId(cur_c), data_point, data_size_); if (curlevel) { - linkLists_[cur_c] = - (char *)malloc(size_links_per_element_ * curlevel + 1); + linkLists_[cur_c] = (char *)malloc(size_links_per_element_ * curlevel + 1); if (linkLists_[cur_c] == nullptr) - throw std::runtime_error( - "Not enough memory: addPoint failed to allocate linklist"); + throw std::runtime_error("Not enough memory: addPoint failed to allocate linklist"); memset(linkLists_[cur_c], 0, size_links_per_element_ * curlevel + 1); } @@ -1353,8 +1220,7 @@ class HierarchicalNSW : public AlgorithmInterface { if (curlevel < maxlevelcopy) { - dist_t curdist = fstdistfunc_(data_point, getDataByInternalId(currObj), - dist_func_param_); + dist_t curdist = fstdistfunc_(data_point, getDataByInternalId(currObj), dist_func_param_); for (int level = maxlevelcopy; level > curlevel; level--) { bool changed = true; @@ -1370,8 +1236,7 @@ class HierarchicalNSW : public AlgorithmInterface { tableint cand = datal[i]; if (cand < 0 || cand > max_elements_) throw std::runtime_error("cand error"); - dist_t d = fstdistfunc_(data_point, getDataByInternalId(cand), - dist_func_param_); + dist_t d = fstdistfunc_(data_point, getDataByInternalId(cand), dist_func_param_); if (d < curdist) { curdist = d; currObj = cand; @@ -1387,20 +1252,15 @@ class HierarchicalNSW : public AlgorithmInterface { if (level > maxlevelcopy || level < 0) // possible? throw std::runtime_error("Level error"); - std::priority_queue, - std::vector>, - CompareByFirst> + std::priority_queue, std::vector>, CompareByFirst> top_candidates = searchBaseLayer(currObj, data_point, level); if (epDeleted) { - top_candidates.emplace( - fstdistfunc_(data_point, getDataByInternalId(enterpoint_copy), - dist_func_param_), - enterpoint_copy); + top_candidates.emplace(fstdistfunc_(data_point, getDataByInternalId(enterpoint_copy), dist_func_param_), + enterpoint_copy); if (top_candidates.size() > ef_construction_) top_candidates.pop(); } - currObj = mutuallyConnectNewElement(data_point, cur_c, top_candidates, - level, false); + currObj = mutuallyConnectNewElement(data_point, cur_c, top_candidates, level, false); } } else { @@ -1417,17 +1277,15 @@ class HierarchicalNSW : public AlgorithmInterface { return cur_c; }; - std::priority_queue> - searchKnn(const data_t *query_data, size_t k, VisitedList *vl = nullptr, - long queryEf = -1) { + std::priority_queue> searchKnn(const data_t *query_data, size_t k, + VisitedList *vl = nullptr, long queryEf = -1) { std::shared_lock lock(resizeLock); std::priority_queue> result; if (cur_element_count == 0) return result; tableint currObj = enterpoint_node_; - dist_t curdist = fstdistfunc_( - query_data, getDataByInternalId(enterpoint_node_), dist_func_param_); + dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_); for (int level = maxlevel_; level > 0; level--) { bool changed = true; @@ -1445,8 +1303,7 @@ class HierarchicalNSW : public AlgorithmInterface { tableint cand = datal[i]; if (cand < 0 || cand > max_elements_) throw std::runtime_error("cand error"); - dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), - dist_func_param_); + dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_); if (d < curdist) { curdist = d; @@ -1457,17 +1314,13 @@ class HierarchicalNSW : public AlgorithmInterface { } } - std::priority_queue, - std::vector>, - CompareByFirst> + std::priority_queue, std::vector>, CompareByFirst> top_candidates; size_t effective_ef = queryEf > 0 ? queryEf : ef_; if (num_deleted_) { - top_candidates = searchBaseLayerST( - currObj, query_data, std::max(effective_ef, k), vl); + top_candidates = searchBaseLayerST(currObj, query_data, std::max(effective_ef, k), vl); } else { - top_candidates = searchBaseLayerST( - currObj, query_data, std::max(effective_ef, k), vl); + top_candidates = searchBaseLayerST(currObj, query_data, std::max(effective_ef, k), vl); } while (top_candidates.size() > k) { @@ -1475,8 +1328,7 @@ class HierarchicalNSW : public AlgorithmInterface { } while (top_candidates.size() > 0) { std::pair rez = top_candidates.top(); - result.push(std::pair(rez.first, - getExternalLabel(rez.second))); + result.push(std::pair(rez.first, getExternalLabel(rez.second))); top_candidates.pop(); } return result; @@ -1511,8 +1363,7 @@ class HierarchicalNSW : public AlgorithmInterface { } std::cout << "Min inbound: " << min1 << ", Max inbound:" << max1 << "\n"; } - std::cout << "integrity ok, checked " << connections_checked - << " connections\n"; + std::cout << "integrity ok, checked " << connections_checked << " connections\n"; } }; diff --git a/cpp/src/hnswlib.h b/cpp/src/hnswlib.h index 13e21552..3a929050 100644 --- a/cpp/src/hnswlib.h +++ b/cpp/src/hnswlib.h @@ -75,13 +75,11 @@ template class pairGreater { template class AlgorithmInterface { public: virtual void addPoint(const data_t *datapoint, labeltype label) = 0; - virtual std::priority_queue> - searchKnn(const data_t *, size_t, VisitedList *a = nullptr, - long queryEf = -1) = 0; + virtual std::priority_queue> searchKnn(const data_t *, size_t, VisitedList *a = nullptr, + long queryEf = -1) = 0; // Return k nearest neighbor in the order of closer fist - virtual std::vector> - searchKnnCloserFirst(const data_t *query_data, size_t k); + virtual std::vector> searchKnnCloserFirst(const data_t *query_data, size_t k); virtual void saveIndex(const std::string &location) = 0; virtual ~AlgorithmInterface() {} @@ -89,8 +87,7 @@ template class AlgorithmInterface { template std::vector> -AlgorithmInterface::searchKnnCloserFirst( - const data_t *query_data, size_t k) { +AlgorithmInterface::searchKnnCloserFirst(const data_t *query_data, size_t k) { std::vector> result; // here searchKnn returns the result in the order of further first diff --git a/cpp/src/std_utils.h b/cpp/src/std_utils.h index 9486c9db..54f2a8aa 100644 --- a/cpp/src/std_utils.h +++ b/cpp/src/std_utils.h @@ -38,9 +38,7 @@ * * The method is borrowed from nmslib */ -template -inline void ParallelFor(size_t start, size_t end, size_t numThreads, - Function fn) { +template inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn) { if (numThreads <= 0) { numThreads = std::thread::hardware_concurrency(); } @@ -98,12 +96,9 @@ inline void ParallelFor(size_t start, size_t end, size_t numThreads, * This dramatically speeds up filtering of an std::priority_queue, as you no * longer need to modify the queue to iterate over it. */ -template -S &GetContainerForQueue(std::priority_queue &q) { +template S &GetContainerForQueue(std::priority_queue &q) { struct HackedQueue : private std::priority_queue { - static S &Container(std::priority_queue &q) { - return q.*&HackedQueue::c; - } + static S &Container(std::priority_queue &q) { return q.*&HackedQueue::c; } }; return HackedQueue::Container(q); } @@ -115,13 +110,10 @@ S &GetContainerForQueue(std::priority_queue &q) { * IndexID will be added as the second tuple value of each element of the queue. */ template -void mergePriorityQueues( - std::priority_queue> &dest, - std::priority_queue> &src, size_t maxElements, - indexID_t indexID, const label_t idMask, const std::set &labels, - const dist_t maximumDistance) { - std::vector> &items = - GetContainerForQueue(src); +void mergePriorityQueues(std::priority_queue> &dest, + std::priority_queue> &src, size_t maxElements, indexID_t indexID, + const label_t idMask, const std::set &labels, const dist_t maximumDistance) { + std::vector> &items = GetContainerForQueue(src); for (auto i = items.begin(); i != items.end(); i++) { // To avoid copying unnecessarily, only move elements if: // - We don't have maxElements in `dest` yet From 024eee07719d91cd82bcf8570729fb5a174bba06 Mon Sep 17 00:00:00 2001 From: Mark Koh Date: Fri, 22 Mar 2024 18:24:21 -0600 Subject: [PATCH 04/25] Clean up documentation and got tests running --- .gitignore | 14 ++++ CONTRIBUTING.md | 159 ++++++++++++++++++-------------------------- cpp/test/.gitignore | 2 + 3 files changed, 81 insertions(+), 94 deletions(-) create mode 100644 cpp/test/.gitignore diff --git a/.gitignore b/.gitignore index c4935192..e735e256 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,17 @@ java/classpath.txt java/linux-build/include/* python/voyager-headers .asv/ + +# Cmake +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +_deps +DartConfiguration.tcl diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9319a0c0..0ef71a57 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,38 +1,47 @@ # How to Contribute - We'd love to get patches from you! -## Getting Started +#### Workflow +We follow the [GitHub Flow Workflow](https://guides.github.com/introduction/flow/): -### Prerequisites +1. Fork the project +1. Check out the `master` branch +1. Create a feature branch +1. Write code and tests for your change +1. From your branch, make a pull request against `https://github.com/spotify/voyager` +1. Work with repo maintainers to get your change reviewed +1. Wait for your change to be pulled into `https://github.com/spotify/voyager/master` +1. Delete your feature branch +## Getting Started +### Prerequisites To compile Voyager from scratch, the following packages will need to be installed: - [Python 3.7](https://www.python.org/downloads/) or higher. - A C++ compiler, e.g. `gcc`, `clang`, etc. -### Building Voyager Python +### Building Voyager +#### Building Python +There are some nuances to building the Voyager python code. Please read on for more information. +For basic building, you should be able to simply run the following commands: ```shell -git clone git@github.com:spotify/voyager.git -cd voyager -pip3 install -r python/dev-requirements.txt -pip3 install . +cd python +pip install -r python/dev-requirements.txt +pip install . ``` To compile a debug build of `voyager` that allows using a debugger (like gdb or lldb), use the following command to build the package locally and install a symbolic link for debugging: ```shell cd python -DEBUG=1 python3 setup.py build develop +DEBUG=1 python setup.py build develop ``` -Then, you can `import voyager` from Python (or run the tests with `tox`) to test out your local changes. - > If you're on macOS or Linux, you can try to compile a debug build _faster_ by using [Ccache](https://ccache.dev/): > ## macOS > ```shell > brew install ccache -> rm -rf build && CC="ccache clang" CXX="ccache clang++" DEBUG=1 python3 -j8 -m pip install -e . +> rm -rf build && CC="ccache clang" CXX="ccache clang++" DEBUG=1 python -j8 -m pip install -e . > ``` > ## Linux > e.g. @@ -40,51 +49,29 @@ Then, you can `import voyager` from Python (or run the tests with `tox`) to test > sudo yum install ccache # or apt, if on a Debian > > # If using GCC: -> rm -rf build && CC="ccache gcc" CXX="scripts/ccache_g++" DEBUG=1 python3 setup.py build -j8 develop +> rm -rf build && CC="ccache gcc" CXX="scripts/ccache_g++" DEBUG=1 python setup.py build -j8 develop > > # ...or if using Clang: -> rm -rf build && CC="ccache clang" CXX="scripts/ccache_clang++" DEBUG=1 python3 setup.py build -j8 develop +> rm -rf build && CC="ccache clang" CXX="scripts/ccache_clang++" DEBUG=1 python setup.py build -j8 develop > ``` -### Updating Documentation -If you notice that the documentation is out of date, feel free to run these commands in order to update the docs and make a PR with the changes. - -#### Python -While `voyager` is mostly C++ code, it ships with `.pyi` files to allow for type hints in text editors and via MyPy. To update the Python type hint files, use the following commands: - -```shell -cd python -python3 -m scripts.generate_type_stubs_and_docs -# Documentation will be dumped into ../docs/python/ -``` - -#### Java -To update the javadocs for the java bindings, you can simply run: - +#### Building Java +To build the Java library with `maven`, use the following commands: ```shell cd java mvn package ``` -this will update the java documentation located in [docs/java/](https://github.com/spotify/voyager/tree/main/docs/java). - -## Workflow - -We follow the [GitHub Flow Workflow](https://guides.github.com/introduction/flow/): - -1. Fork the project -1. Check out the `master` branch -1. Create a feature branch -1. Write code and tests for your change -1. From your branch, make a pull request against `https://github.com/spotify/voyager` -1. Work with repo maintainers to get your change reviewed -1. Wait for your change to be pulled into `https://github.com/spotify/voyager/master` -1. Delete your feature branch +#### Building C++ +To build the C++ library with `cmake`, use the following commands: +```shell +cd cpp +cmake . +make +``` ## Testing - ### Python Tests - We use `tox` for testing - running tests from end-to-end should be as simple as: ``` @@ -114,17 +101,23 @@ Please note that `airspeed-velocity` can only run benchmarks against a git commi you have uncommited code that you want to run benchmarks for you need to commit it first. ### Java Tests - We provide java test execution as a maven test step. Thus you can run the tests with: ```shell cd java mvn verify -```` +``` +### C++ Tests +To run the C++ tests, use the following commands: +```shell +cd cpp +cmake . +make test +./test/test +``` ## Style - Use [`clang-format`](https://clang.llvm.org/docs/ClangFormat.html) for C++ code, and `black` with defaults for Python code. In order to check and run formatting within the python module (but not the c++ core module), you can use tox to facilitate this. @@ -145,9 +138,32 @@ clang-format --verbose --dry-run -i src/*.h clang-format --verbose -i src/*.h ``` +### Updating Documentation +We also welcome improvements to the project documentation or to the existing +docs. Please file an [issue](https://github.com/spotify/voyager/issues/new). -## Issues +If you notice that the generated API documentation is out of date, feel free to run these commands in order to update the docs and make a PR with the changes. + +#### Python +While `voyager` is mostly C++ code, it ships with `.pyi` files to allow for type hints in text editors and via MyPy. To update the Python type hint files, use the following commands: + +```shell +cd python +python3 -m scripts.generate_type_stubs_and_docs +# Documentation will be dumped into ../docs/python/ +``` + +#### Java +To update the javadocs for the java bindings, you can simply run: + +```shell +cd java +mvn package +``` +this will update the java documentation located in [docs/java/](https://github.com/spotify/voyager/tree/main/docs/java). + +## Issues When creating an issue please try to ahere to the following format: One line summary of the issue (less than 72 characters) @@ -164,47 +180,7 @@ When creating an issue please try to ahere to the following format: List all relevant steps to reproduce the observed behaviour. -## Pull Requests - -Files should be exempt of trailing spaces. - -We adhere to a specific format for pull requests. Please write your commit -messages along these guidelines. Please keep the line width no greater than 80 -columns (You can use `fmt -n -p -w 80` to accomplish this). - - One line description of your change (less than 72 characters) - - Problem - - Explain the context and why you're making that change. What is the problem - you're trying to solve? In some cases there is not a problem and this can be - thought of being the motivation for your change. - - Solution - - Describe the modifications you've done. - - Result - - What will change as a result of your pull request? Note that sometimes this - section is unnecessary because it is self-explanatory based on the solution. - -Some important notes regarding the summary line: - -* Describe what was done; not the result -* Use the active voice -* Use the present tense -* Capitalize properly -* Do not end in a period — this is a title/subject -* Prefix the subject with its scope - -## Documentation - -We also welcome improvements to the project documentation or to the existing -docs. Please file an [issue](https://github.com/spotify/voyager/issues/new). - ## First Contributions - If you are a first time contributor to `voyager`, familiarize yourself with the: * [Code of Conduct](CODE_OF_CONDUCT.md) * [GitHub Flow Workflow](https://guides.github.com/introduction/flow/) @@ -215,20 +191,15 @@ When you're ready, navigate to [issues](https://github.com/spotify/voyager/issue There is a lot to learn when making your first contribution. As you gain experience, you will be able to make contributions faster. You can submit an issue using the [question](https://github.com/spotify/voyager/labels/question) label if you encounter challenges. # License - By contributing your code, you agree to license your contribution under the terms of the [LICENSE](https://github.com/spotify/voyager/blob/master/LICENSE). # Code of Conduct - Read our [Code of Conduct](CODE_OF_CONDUCT.md) for the project. # Troubleshooting - ## Building the project - ### `ModuleNotFoundError: No module named 'pybind11'` - Try updating your version of `pip`: ```shell pip install --upgrade pip diff --git a/cpp/test/.gitignore b/cpp/test/.gitignore new file mode 100644 index 00000000..5301e597 --- /dev/null +++ b/cpp/test/.gitignore @@ -0,0 +1,2 @@ +test +test-*.cmake \ No newline at end of file From 074b1bbef7fd36cbf8dc6c4f4d1c28207882b722 Mon Sep 17 00:00:00 2001 From: Mark Koh Date: Fri, 22 Mar 2024 18:40:36 -0600 Subject: [PATCH 05/25] Uncomment the other tests but they failing right now --- .github/pull_request_template.md | 2 - cpp/test/test_main.cpp | 426 +++++++++++++++---------------- 2 files changed, 213 insertions(+), 215 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 2f9fab38..89ce7c24 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,5 +1,3 @@ -# Pull Request - ## Description diff --git a/cpp/test/test_main.cpp b/cpp/test/test_main.cpp index 1ad0f2e2..3e017e39 100644 --- a/cpp/test/test_main.cpp +++ b/cpp/test/test_main.cpp @@ -1,4 +1,4 @@ -#include +#include "TypedIndex.h" #include #include @@ -72,215 +72,215 @@ TEST_CASE("Basic init") { // dist_t, data_t, scalefactor, tolerance -// TEMPLATE_TEST_CASE("create_and_query", -// "[index_creation]", -// (std::tuple>), -// (std::tuple>), -// (std::tuple>)) { -// auto num_dimensions = GENERATE(4, 16, 128, 128, 256); -// auto num_elements = GENERATE(100, 1000); -// auto space = GENERATE(SpaceType::Euclidean, SpaceType::Cosine); - -// // It's a struggle to include these as std::ratio in the TEMPLATE test case so -// // we'll set distance tolerance here. -// float distance_tolerance = 0.0; -// if (std::is_same::type, struct E4M3>::value) { -// distance_tolerance = 0.20; -// } else if (std::is_same::type, char>::value) { -// distance_tolerance = 0.20; -// } else if (std::is_same::type, float>::value) { -// distance_tolerance = 2e-6; -// } - -// SECTION("(num_dimensions, num_elements, space): (" + std::to_string(num_dimensions) + "," + -// std::to_string(num_elements) + "," + std::to_string(space) + ")") { - -// // Generate a 2D Matrix of test data -// std::random_device rd; -// std::mt19937 gen(rd()); -// std::uniform_real_distribution dis(0.0, 1.0); -// auto input_data = std::vector(num_elements * num_dimensions); -// std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { -// float val = 2 * dis(gen) - 1; -// if (std::is_same::type, char>::value) { -// val = std::round(val * 127.0f) / 127.0f; -// } -// return val; -// }); - -// auto input_array = NDArray(input_data, {num_elements, num_dimensions}); - -// // Create Index -// auto index = TypedIndex::type, -// typename std::tuple_element<1, TestType>::type, -// typename std::tuple_element<2, TestType>::type>( -// space, num_dimensions, 20, num_elements); - -// index.setEF(num_elements); -// index.addItems(input_array); -// SECTION("Multiple query interface") { -// auto [labels, distances] = index.query(input_array); - -// if (!std::is_same::type, float>::value) { -// auto matches = 0; -// // Could be std::reduce or std::accumulate -// for (auto row = 0; row < num_elements; ++row) { -// matches += labels[row][0] == row; -// } -// REQUIRE((double)matches / (double)num_elements > 0.5); -// } else { -// // Could be std::reduce or std::accumulate -// std::vector expected(num_elements); -// std::iota(expected.begin(), expected.end(), 0); -// REQUIRE_THAT(flattenNDArray(labels), AllClose(expected)); -// } - -// REQUIRE_THAT(flattenNDArray(distances), -// AllClose(std::vector(num_elements, 0.0), 1e-7, distance_tolerance)); -// } - -// SECTION("Single query interface") { -// for (auto row = 0; row < num_elements; ++row) { -// auto [labels, distances] = -// index.query({input_array[row], input_array[row] + num_dimensions}); -// if (std::is_same::type, float>::value) { -// REQUIRE(labels[0] == row); -// } -// if(distances[0] >= distance_tolerance) { -// float a = 0; -// } -// REQUIRE(distances[0] < distance_tolerance); -// } -// } - -// // SECTION("Saving an index") { -// // auto output_file = std::tmpfile(); -// // index.saveIndex(std::make_shared(output_file)); -// // auto file_byte_count = std::ftell(output_file); -// // REQUIRE(file_byte_count > 0); -// // auto memory_output_stream = std::make_shared(); -// // index.saveIndex(memory_output_stream); -// // auto index_bytes = memory_output_stream->getValue().size(); -// // REQUIRE(index_bytes > 0); -// // REQUIRE(file_byte_count == index_bytes); -// // } -// } -// } - -// TEST_CASE("Spaces") { -// auto [space, expected_distances] = -// GENERATE(std::make_tuple>(SpaceType::Euclidean, -// {0.0, 1.0, 2.0, 2.0, 2.0}), -// std::make_tuple>(SpaceType::InnerProduct, -// {-2.0, -1.0, 0.0, 0.0, 0.0}), -// std::make_tuple>( -// SpaceType::Cosine, {0, 1.835e-1, 4.23e-1, 4.23e-1, 4.23e-1})); -// auto right_dimension = GENERATE(Catch::Generators::range(1, 128, 3)); -// auto left_dimension = GENERATE(Catch::Generators::range(1, 32, 5)); - -// auto num_dimensions = 3; -// auto data = NDArray({1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1}, {5, num_dimensions}); - -// auto input_data = std::vector(); -// for (int i = 0; i < data.shape[0]; ++i) { -// std::vector to_insert(left_dimension, 0); -// std::vector right(right_dimension, 0); -// to_insert.insert(to_insert.end(), data[0], data[0] + data.shape[1]); -// to_insert.insert(to_insert.end(), right.begin(), right.end()); -// input_data.insert(input_data.end(), to_insert.begin(), to_insert.end()); -// } - -// num_dimensions = right_dimension + left_dimension + data.shape[1]; - -// auto data_2 = NDArray(input_data, {data.shape[0], num_dimensions}); -// auto index = TypedIndex(space, num_dimensions, 16, 100); -// index.setEF(10); -// index.addItems(data_2); - -// auto [labels, distances] = index.query( -// std::vector(data_2[data_2.shape[0] - 1], data_2[data_2.shape[0] - 1] + num_dimensions), 5); -// REQUIRE_THAT(distances, AllClose(expected_distances, 1e-7, 1e-3)); -// } - -// TEST_CASE("Get Vectors") { -// auto num_dimensions = GENERATE(4, 16, 128, 256); -// auto num_elements = GENERATE(100, 1000); -// auto space = GENERATE(SpaceType::Euclidean, SpaceType::InnerProduct); - -// // Generate a 2D Matrix of test data -// std::random_device rd; -// std::mt19937 gen(rd()); -// std::uniform_real_distribution dis(0.0, 1.0); -// auto input_data = std::vector(num_elements * num_dimensions); -// std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { return 2 * dis(gen) - 1; }); -// auto input_array = NDArray(input_data, {num_elements, num_dimensions}); - -// auto index = TypedIndex(space, num_dimensions); -// auto labels = std::vector(num_elements); -// std::iota(labels.begin(), labels.end(), 0); - -// REQUIRE_THROWS(index.getVector(labels[0])); -// index.addItems(input_array); - -// SECTION("Test single vector retrieval") { -// for (auto i = 0; i < labels.size(); ++i) { -// REQUIRE_THAT(index.getVector(labels[i]), -// AllClose(std::vector(input_array[i], input_array[i] + num_dimensions))); -// } -// } - -// SECTION("Test all vectors retrieval") { -// auto vectors = index.getVectors(labels); -// for (auto i = 0; i < labels.size(); ++i) { -// REQUIRE_THAT(std::vector(vectors[i], vectors[i] + num_dimensions), -// AllClose(std::vector(input_array[i], input_array[i] + num_dimensions))); -// } -// } -// } - -// TEST_CASE("Query EF") { -// auto space = GENERATE(SpaceType::Euclidean, SpaceType::InnerProduct); -// auto [query_ef, rank_tolerance] = -// GENERATE(std::make_tuple(1, 100), std::make_tuple(2, 75), std::make_tuple(100, 1)); -// auto num_dimensions = 32; -// auto num_elements = 1000; - -// // Generate a 2D Matrix of test data -// std::random_device rd; -// std::mt19937 gen(rd()); -// std::uniform_real_distribution dis(0.0, 1.0); -// auto input_data = std::vector(num_elements * num_dimensions); -// std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { return 2 * dis(gen) - 1; }); -// auto input_array = NDArray(input_data, {num_elements, num_dimensions}); - -// auto index = TypedIndex(space, num_dimensions, 20, num_elements); -// index.setEF(num_elements); -// index.addItems(input_array); - -// auto [closest_labels_per_vector, _] = index.query(input_array, num_elements, -1, num_elements); -// SECTION("Multi query interface") { -// auto [labels, _] = index.query(input_array, 1, -1, query_ef); -// for (auto i = 0; i < labels.shape[0]; ++i) { -// auto returned_label = labels[0][0]; -// // Consider doing this in a loop with an early break. -// auto label_iter = std::find(closest_labels_per_vector[i], -// closest_labels_per_vector[i] + closest_labels_per_vector.shape[1], -// returned_label); -// auto actual_rank = std::distance(closest_labels_per_vector[i], label_iter); -// REQUIRE(actual_rank < rank_tolerance); -// } -// } - -// SECTION("Single query interface") { -// for (auto i = 0; i < input_array.shape[0]; ++i) { -// auto [returned_labels, _] = -// index.query({input_data[i], input_data[i] + num_dimensions}, 1, query_ef); -// auto returned_label = returned_labels[0]; -// auto label_iter = std::find(closest_labels_per_vector[i], -// closest_labels_per_vector[i] + closest_labels_per_vector.shape[1], -// returned_label); -// auto actual_rank = std::distance(closest_labels_per_vector[i], label_iter); -// REQUIRE(actual_rank < rank_tolerance); -// } -// } -// } \ No newline at end of file +TEMPLATE_TEST_CASE("create_and_query", + "[index_creation]", + (std::tuple>), + // (std::tuple>), + (std::tuple>)) { + auto num_dimensions = GENERATE(4, 16, 128, 128, 256); + auto num_elements = GENERATE(100, 1000); + auto space = GENERATE(SpaceType::Euclidean, SpaceType::Cosine); + + // It's a struggle to include these as std::ratio in the TEMPLATE test case so + // we'll set distance tolerance here. + float distance_tolerance = 0.0; + if (std::is_same::type, struct E4M3>::value) { + distance_tolerance = 0.20; + } else if (std::is_same::type, char>::value) { + distance_tolerance = 0.20; + } else if (std::is_same::type, float>::value) { + distance_tolerance = 2e-6; + } + + SECTION("(num_dimensions, num_elements, space): (" + std::to_string(num_dimensions) + "," + + std::to_string(num_elements) + "," + std::to_string(space) + ")") { + + // Generate a 2D Matrix of test data + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(0.0, 1.0); + auto input_data = std::vector(num_elements * num_dimensions); + std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { + float val = 2 * dis(gen) - 1; + if (std::is_same::type, char>::value) { + val = std::round(val * 127.0f) / 127.0f; + } + return val; + }); + + auto input_array = NDArray(input_data, {num_elements, num_dimensions}); + + // Create Index + auto index = TypedIndex::type, + typename std::tuple_element<1, TestType>::type, + typename std::tuple_element<2, TestType>::type>( + space, num_dimensions, 20, num_elements); + + index.setEF(num_elements); + index.addItems(input_array); + SECTION("Multiple query interface") { + auto [labels, distances] = index.query(input_array); + + if (!std::is_same::type, float>::value) { + auto matches = 0; + // Could be std::reduce or std::accumulate + for (auto row = 0; row < num_elements; ++row) { + matches += labels[row][0] == row; + } + REQUIRE((double)matches / (double)num_elements > 0.5); + } else { + // Could be std::reduce or std::accumulate + std::vector expected(num_elements); + std::iota(expected.begin(), expected.end(), 0); + REQUIRE_THAT(flattenNDArray(labels), AllClose(expected)); + } + + REQUIRE_THAT(flattenNDArray(distances), + AllClose(std::vector(num_elements, 0.0), 1e-7, distance_tolerance)); + } + + SECTION("Single query interface") { + for (auto row = 0; row < num_elements; ++row) { + auto [labels, distances] = + index.query({input_array[row], input_array[row] + num_dimensions}); + if (std::is_same::type, float>::value) { + REQUIRE(labels[0] == row); + } + if(distances[0] >= distance_tolerance) { + float a = 0; + } + REQUIRE(distances[0] < distance_tolerance); + } + } + + // SECTION("Saving an index") { + // auto output_file = std::tmpfile(); + // index.saveIndex(std::make_shared(output_file)); + // auto file_byte_count = std::ftell(output_file); + // REQUIRE(file_byte_count > 0); + // auto memory_output_stream = std::make_shared(); + // index.saveIndex(memory_output_stream); + // auto index_bytes = memory_output_stream->getValue().size(); + // REQUIRE(index_bytes > 0); + // REQUIRE(file_byte_count == index_bytes); + // } + } +} + +TEST_CASE("Spaces") { + auto [space, expected_distances] = + GENERATE(std::make_tuple>(SpaceType::Euclidean, + {0.0, 1.0, 2.0, 2.0, 2.0}), + std::make_tuple>(SpaceType::InnerProduct, + {-2.0, -1.0, 0.0, 0.0, 0.0}), + std::make_tuple>( + SpaceType::Cosine, {0, 1.835e-1, 4.23e-1, 4.23e-1, 4.23e-1})); + auto right_dimension = GENERATE(Catch::Generators::range(1, 128, 3)); + auto left_dimension = GENERATE(Catch::Generators::range(1, 32, 5)); + + auto num_dimensions = 3; + auto data = NDArray({1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1}, {5, num_dimensions}); + + auto input_data = std::vector(); + for (int i = 0; i < data.shape[0]; ++i) { + std::vector to_insert(left_dimension, 0); + std::vector right(right_dimension, 0); + to_insert.insert(to_insert.end(), data[0], data[0] + data.shape[1]); + to_insert.insert(to_insert.end(), right.begin(), right.end()); + input_data.insert(input_data.end(), to_insert.begin(), to_insert.end()); + } + + num_dimensions = right_dimension + left_dimension + data.shape[1]; + + auto data_2 = NDArray(input_data, {data.shape[0], num_dimensions}); + auto index = TypedIndex(space, num_dimensions, 16, 100); + index.setEF(10); + index.addItems(data_2); + + auto [labels, distances] = index.query( + std::vector(data_2[data_2.shape[0] - 1], data_2[data_2.shape[0] - 1] + num_dimensions), 5); + REQUIRE_THAT(distances, AllClose(expected_distances, 1e-7, 1e-3)); +} + +TEST_CASE("Get Vectors") { + auto num_dimensions = GENERATE(4, 16, 128, 256); + auto num_elements = GENERATE(100, 1000); + auto space = GENERATE(SpaceType::Euclidean, SpaceType::InnerProduct); + + // Generate a 2D Matrix of test data + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(0.0, 1.0); + auto input_data = std::vector(num_elements * num_dimensions); + std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { return 2 * dis(gen) - 1; }); + auto input_array = NDArray(input_data, {num_elements, num_dimensions}); + + auto index = TypedIndex(space, num_dimensions); + auto labels = std::vector(num_elements); + std::iota(labels.begin(), labels.end(), 0); + + REQUIRE_THROWS(index.getVector(labels[0])); + index.addItems(input_array); + + SECTION("Test single vector retrieval") { + for (auto i = 0; i < labels.size(); ++i) { + REQUIRE_THAT(index.getVector(labels[i]), + AllClose(std::vector(input_array[i], input_array[i] + num_dimensions))); + } + } + + SECTION("Test all vectors retrieval") { + auto vectors = index.getVectors(labels); + for (auto i = 0; i < labels.size(); ++i) { + REQUIRE_THAT(std::vector(vectors[i], vectors[i] + num_dimensions), + AllClose(std::vector(input_array[i], input_array[i] + num_dimensions))); + } + } +} + +TEST_CASE("Query EF") { + auto space = GENERATE(SpaceType::Euclidean, SpaceType::InnerProduct); + auto [query_ef, rank_tolerance] = + GENERATE(std::make_tuple(1, 100), std::make_tuple(2, 75), std::make_tuple(100, 1)); + auto num_dimensions = 32; + auto num_elements = 1000; + + // Generate a 2D Matrix of test data + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(0.0, 1.0); + auto input_data = std::vector(num_elements * num_dimensions); + std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { return 2 * dis(gen) - 1; }); + auto input_array = NDArray(input_data, {num_elements, num_dimensions}); + + auto index = TypedIndex(space, num_dimensions, 20, num_elements); + index.setEF(num_elements); + index.addItems(input_array); + + auto [closest_labels_per_vector, _] = index.query(input_array, num_elements, -1, num_elements); + SECTION("Multi query interface") { + auto [labels, _] = index.query(input_array, 1, -1, query_ef); + for (auto i = 0; i < labels.shape[0]; ++i) { + auto returned_label = labels[0][0]; + // Consider doing this in a loop with an early break. + auto label_iter = std::find(closest_labels_per_vector[i], + closest_labels_per_vector[i] + closest_labels_per_vector.shape[1], + returned_label); + auto actual_rank = std::distance(closest_labels_per_vector[i], label_iter); + REQUIRE(actual_rank < rank_tolerance); + } + } + + SECTION("Single query interface") { + for (auto i = 0; i < input_array.shape[0]; ++i) { + auto [returned_labels, _] = + index.query({input_data[i], input_data[i] + num_dimensions}, 1, query_ef); + auto returned_label = returned_labels[0]; + auto label_iter = std::find(closest_labels_per_vector[i], + closest_labels_per_vector[i] + closest_labels_per_vector.shape[1], + returned_label); + auto actual_rank = std::distance(closest_labels_per_vector[i], label_iter); + REQUIRE(actual_rank < rank_tolerance); + } + } +} \ No newline at end of file From 5a1e371f09060a4aa0a93eec5c33d87c17dce42b Mon Sep 17 00:00:00 2001 From: Mark Koh Date: Fri, 29 Mar 2024 15:03:15 -0600 Subject: [PATCH 06/25] Fix java and python cpp src paths --- CONTRIBUTING.md | 2 + cpp/test/test_main.cpp | 506 +++--- docs/java/allclasses-index.html | 237 +-- docs/java/allpackages-index.html | 183 +- .../voyager/jni/Index.QueryResults.html | 509 ++---- .../spotify/voyager/jni/Index.SpaceType.html | 472 ++--- .../voyager/jni/Index.StorageDataType.html | 476 ++---- docs/java/com/spotify/voyager/jni/Index.html | 1523 +++++++---------- .../voyager/jni/StringIndex.QueryResults.html | 472 ++--- .../com/spotify/voyager/jni/StringIndex.html | 979 ++++------- .../jni/class-use/Index.QueryResults.html | 261 +-- .../jni/class-use/Index.SpaceType.html | 398 ++--- .../jni/class-use/Index.StorageDataType.html | 366 ++-- .../spotify/voyager/jni/class-use/Index.html | 257 +-- .../class-use/StringIndex.QueryResults.html | 235 +-- .../voyager/jni/class-use/StringIndex.html | 265 +-- .../spotify/voyager/jni/package-summary.html | 266 +-- .../com/spotify/voyager/jni/package-tree.html | 171 +- .../com/spotify/voyager/jni/package-use.html | 243 +-- .../voyager/jni/utils/JniLibExtractor.html | 355 ++-- .../spotify/voyager/jni/utils/TinyJson.html | 388 ++--- .../jni/utils/class-use/JniLibExtractor.html | 150 +- .../voyager/jni/utils/class-use/TinyJson.html | 150 +- .../voyager/jni/utils/package-summary.html | 206 +-- .../voyager/jni/utils/package-tree.html | 157 +- .../voyager/jni/utils/package-use.html | 148 +- .../com/spotify/voyager/package-summary.html | 176 +- .../com/spotify/voyager/package-tree.html | 147 +- .../java/com/spotify/voyager/package-use.html | 148 +- docs/java/help-doc.html | 332 ++-- docs/java/index-all.html | 459 ++--- docs/java/index.html | 185 +- docs/java/jquery-ui.overrides.css | 3 +- docs/java/legal/jquery.md | 8 +- docs/java/legal/jqueryUI.md | 2 +- docs/java/overview-summary.html | 17 +- docs/java/overview-tree.html | 175 +- docs/java/script-dir/jquery-ui.min.css | 7 +- docs/java/script-dir/jquery-ui.min.js | 4 +- docs/java/stylesheet.css | 779 ++++----- java/Makefile | 2 +- python/setup.py | 2 +- python/voyager/__init__.pyi | 36 - 43 files changed, 3940 insertions(+), 7917 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0ef71a57..642321fd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -66,6 +66,7 @@ mvn package To build the C++ library with `cmake`, use the following commands: ```shell cd cpp +git submodule update --init --recursive cmake . make ``` @@ -112,6 +113,7 @@ mvn verify To run the C++ tests, use the following commands: ```shell cd cpp +git submodule update --init --recursive cmake . make test ./test/test diff --git a/cpp/test/test_main.cpp b/cpp/test/test_main.cpp index 3e017e39..b99af38b 100644 --- a/cpp/test/test_main.cpp +++ b/cpp/test/test_main.cpp @@ -16,45 +16,45 @@ // TODO: Extract data generation as a function or as a Catch2 Generator -template struct AllCloseMatcher : Catch::Matchers::MatcherGenericBase { - AllCloseMatcher(const std::vector &a, const float rtol = 1e-7, const float atol = 0) - : a_(a), rtol_(rtol), atol_(atol) {} - - bool match(const std::vector &b) const { - // Could use std::reduce, but early return is most likely faster - if (a_.size() != b.size()) { - return false; - } - // TODO: Replace with Ranges https://en.cppreference.com/w/cpp/ranges - for (int i = 0; i < a_.size(); ++i) { - if (!(std::fabs(a_[i] - b[i]) <= (atol_ + rtol_ * std::fabs(a_[i])))) { - return false; - } - } - return true; - } - - std::string describe() const override { return "IsClose"; } - -private: - const std::vector &a_; - const float atol_; - const float rtol_; -}; - -template -auto AllClose(const std::vector a, const float rtol = 1e-7, const float atol = 0) - -> AllCloseMatcher { - return AllCloseMatcher{a, rtol, atol}; -} - -template std::vector flattenNDArray(NDArray &arr) { - std::vector res(arr.shape[0]); - for (auto i = 0; i < arr.shape[0]; ++i) { - res[i] = arr[i][0]; - } - return res; -}; +// template struct AllCloseMatcher : Catch::Matchers::MatcherGenericBase { +// AllCloseMatcher(const std::vector &a, const float rtol = 1e-7, const float atol = 0) +// : a_(a), rtol_(rtol), atol_(atol) {} + +// bool match(const std::vector &b) const { +// // Could use std::reduce, but early return is most likely faster +// if (a_.size() != b.size()) { +// return false; +// } +// // TODO: Replace with Ranges https://en.cppreference.com/w/cpp/ranges +// for (int i = 0; i < a_.size(); ++i) { +// if (!(std::fabs(a_[i] - b[i]) <= (atol_ + rtol_ * std::fabs(a_[i])))) { +// return false; +// } +// } +// return true; +// } + +// std::string describe() const override { return "IsClose"; } + +// private: +// const std::vector &a_; +// const float atol_; +// const float rtol_; +// }; + +// template +// auto AllClose(const std::vector a, const float rtol = 1e-7, const float atol = 0) +// -> AllCloseMatcher { +// return AllCloseMatcher{a, rtol, atol}; +// } + +// template std::vector flattenNDArray(NDArray &arr) { +// std::vector res(arr.shape[0]); +// for (auto i = 0; i < arr.shape[0]; ++i) { +// res[i] = arr[i][0]; +// } +// return res; +// }; TEST_CASE("Basic init") { auto space = GENERATE(SpaceType::Euclidean, SpaceType::InnerProduct); @@ -70,217 +70,217 @@ TEST_CASE("Basic init") { } } -// dist_t, data_t, scalefactor, tolerance - -TEMPLATE_TEST_CASE("create_and_query", - "[index_creation]", - (std::tuple>), - // (std::tuple>), - (std::tuple>)) { - auto num_dimensions = GENERATE(4, 16, 128, 128, 256); - auto num_elements = GENERATE(100, 1000); - auto space = GENERATE(SpaceType::Euclidean, SpaceType::Cosine); - - // It's a struggle to include these as std::ratio in the TEMPLATE test case so - // we'll set distance tolerance here. - float distance_tolerance = 0.0; - if (std::is_same::type, struct E4M3>::value) { - distance_tolerance = 0.20; - } else if (std::is_same::type, char>::value) { - distance_tolerance = 0.20; - } else if (std::is_same::type, float>::value) { - distance_tolerance = 2e-6; - } - - SECTION("(num_dimensions, num_elements, space): (" + std::to_string(num_dimensions) + "," + - std::to_string(num_elements) + "," + std::to_string(space) + ")") { - - // Generate a 2D Matrix of test data - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution dis(0.0, 1.0); - auto input_data = std::vector(num_elements * num_dimensions); - std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { - float val = 2 * dis(gen) - 1; - if (std::is_same::type, char>::value) { - val = std::round(val * 127.0f) / 127.0f; - } - return val; - }); - - auto input_array = NDArray(input_data, {num_elements, num_dimensions}); - - // Create Index - auto index = TypedIndex::type, - typename std::tuple_element<1, TestType>::type, - typename std::tuple_element<2, TestType>::type>( - space, num_dimensions, 20, num_elements); - - index.setEF(num_elements); - index.addItems(input_array); - SECTION("Multiple query interface") { - auto [labels, distances] = index.query(input_array); - - if (!std::is_same::type, float>::value) { - auto matches = 0; - // Could be std::reduce or std::accumulate - for (auto row = 0; row < num_elements; ++row) { - matches += labels[row][0] == row; - } - REQUIRE((double)matches / (double)num_elements > 0.5); - } else { - // Could be std::reduce or std::accumulate - std::vector expected(num_elements); - std::iota(expected.begin(), expected.end(), 0); - REQUIRE_THAT(flattenNDArray(labels), AllClose(expected)); - } - - REQUIRE_THAT(flattenNDArray(distances), - AllClose(std::vector(num_elements, 0.0), 1e-7, distance_tolerance)); - } - - SECTION("Single query interface") { - for (auto row = 0; row < num_elements; ++row) { - auto [labels, distances] = - index.query({input_array[row], input_array[row] + num_dimensions}); - if (std::is_same::type, float>::value) { - REQUIRE(labels[0] == row); - } - if(distances[0] >= distance_tolerance) { - float a = 0; - } - REQUIRE(distances[0] < distance_tolerance); - } - } - - // SECTION("Saving an index") { - // auto output_file = std::tmpfile(); - // index.saveIndex(std::make_shared(output_file)); - // auto file_byte_count = std::ftell(output_file); - // REQUIRE(file_byte_count > 0); - // auto memory_output_stream = std::make_shared(); - // index.saveIndex(memory_output_stream); - // auto index_bytes = memory_output_stream->getValue().size(); - // REQUIRE(index_bytes > 0); - // REQUIRE(file_byte_count == index_bytes); - // } - } -} - -TEST_CASE("Spaces") { - auto [space, expected_distances] = - GENERATE(std::make_tuple>(SpaceType::Euclidean, - {0.0, 1.0, 2.0, 2.0, 2.0}), - std::make_tuple>(SpaceType::InnerProduct, - {-2.0, -1.0, 0.0, 0.0, 0.0}), - std::make_tuple>( - SpaceType::Cosine, {0, 1.835e-1, 4.23e-1, 4.23e-1, 4.23e-1})); - auto right_dimension = GENERATE(Catch::Generators::range(1, 128, 3)); - auto left_dimension = GENERATE(Catch::Generators::range(1, 32, 5)); - - auto num_dimensions = 3; - auto data = NDArray({1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1}, {5, num_dimensions}); - - auto input_data = std::vector(); - for (int i = 0; i < data.shape[0]; ++i) { - std::vector to_insert(left_dimension, 0); - std::vector right(right_dimension, 0); - to_insert.insert(to_insert.end(), data[0], data[0] + data.shape[1]); - to_insert.insert(to_insert.end(), right.begin(), right.end()); - input_data.insert(input_data.end(), to_insert.begin(), to_insert.end()); - } - - num_dimensions = right_dimension + left_dimension + data.shape[1]; - - auto data_2 = NDArray(input_data, {data.shape[0], num_dimensions}); - auto index = TypedIndex(space, num_dimensions, 16, 100); - index.setEF(10); - index.addItems(data_2); - - auto [labels, distances] = index.query( - std::vector(data_2[data_2.shape[0] - 1], data_2[data_2.shape[0] - 1] + num_dimensions), 5); - REQUIRE_THAT(distances, AllClose(expected_distances, 1e-7, 1e-3)); -} - -TEST_CASE("Get Vectors") { - auto num_dimensions = GENERATE(4, 16, 128, 256); - auto num_elements = GENERATE(100, 1000); - auto space = GENERATE(SpaceType::Euclidean, SpaceType::InnerProduct); - - // Generate a 2D Matrix of test data - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution dis(0.0, 1.0); - auto input_data = std::vector(num_elements * num_dimensions); - std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { return 2 * dis(gen) - 1; }); - auto input_array = NDArray(input_data, {num_elements, num_dimensions}); - - auto index = TypedIndex(space, num_dimensions); - auto labels = std::vector(num_elements); - std::iota(labels.begin(), labels.end(), 0); - - REQUIRE_THROWS(index.getVector(labels[0])); - index.addItems(input_array); - - SECTION("Test single vector retrieval") { - for (auto i = 0; i < labels.size(); ++i) { - REQUIRE_THAT(index.getVector(labels[i]), - AllClose(std::vector(input_array[i], input_array[i] + num_dimensions))); - } - } - - SECTION("Test all vectors retrieval") { - auto vectors = index.getVectors(labels); - for (auto i = 0; i < labels.size(); ++i) { - REQUIRE_THAT(std::vector(vectors[i], vectors[i] + num_dimensions), - AllClose(std::vector(input_array[i], input_array[i] + num_dimensions))); - } - } -} - -TEST_CASE("Query EF") { - auto space = GENERATE(SpaceType::Euclidean, SpaceType::InnerProduct); - auto [query_ef, rank_tolerance] = - GENERATE(std::make_tuple(1, 100), std::make_tuple(2, 75), std::make_tuple(100, 1)); - auto num_dimensions = 32; - auto num_elements = 1000; - - // Generate a 2D Matrix of test data - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution dis(0.0, 1.0); - auto input_data = std::vector(num_elements * num_dimensions); - std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { return 2 * dis(gen) - 1; }); - auto input_array = NDArray(input_data, {num_elements, num_dimensions}); - - auto index = TypedIndex(space, num_dimensions, 20, num_elements); - index.setEF(num_elements); - index.addItems(input_array); - - auto [closest_labels_per_vector, _] = index.query(input_array, num_elements, -1, num_elements); - SECTION("Multi query interface") { - auto [labels, _] = index.query(input_array, 1, -1, query_ef); - for (auto i = 0; i < labels.shape[0]; ++i) { - auto returned_label = labels[0][0]; - // Consider doing this in a loop with an early break. - auto label_iter = std::find(closest_labels_per_vector[i], - closest_labels_per_vector[i] + closest_labels_per_vector.shape[1], - returned_label); - auto actual_rank = std::distance(closest_labels_per_vector[i], label_iter); - REQUIRE(actual_rank < rank_tolerance); - } - } - - SECTION("Single query interface") { - for (auto i = 0; i < input_array.shape[0]; ++i) { - auto [returned_labels, _] = - index.query({input_data[i], input_data[i] + num_dimensions}, 1, query_ef); - auto returned_label = returned_labels[0]; - auto label_iter = std::find(closest_labels_per_vector[i], - closest_labels_per_vector[i] + closest_labels_per_vector.shape[1], - returned_label); - auto actual_rank = std::distance(closest_labels_per_vector[i], label_iter); - REQUIRE(actual_rank < rank_tolerance); - } - } -} \ No newline at end of file +// // dist_t, data_t, scalefactor, tolerance + +// TEMPLATE_TEST_CASE("create_and_query", +// "[index_creation]", +// (std::tuple>), +// // (std::tuple>), +// (std::tuple>)) { +// auto num_dimensions = GENERATE(4, 16, 128, 128, 256); +// auto num_elements = GENERATE(100, 1000); +// auto space = GENERATE(SpaceType::Euclidean, SpaceType::Cosine); + +// // It's a struggle to include these as std::ratio in the TEMPLATE test case so +// // we'll set distance tolerance here. +// float distance_tolerance = 0.0; +// if (std::is_same::type, struct E4M3>::value) { +// distance_tolerance = 0.20; +// } else if (std::is_same::type, char>::value) { +// distance_tolerance = 0.20; +// } else if (std::is_same::type, float>::value) { +// distance_tolerance = 2e-6; +// } + +// SECTION("(num_dimensions, num_elements, space): (" + std::to_string(num_dimensions) + "," + +// std::to_string(num_elements) + "," + std::to_string(space) + ")") { + +// // Generate a 2D Matrix of test data +// std::random_device rd; +// std::mt19937 gen(rd()); +// std::uniform_real_distribution dis(0.0, 1.0); +// auto input_data = std::vector(num_elements * num_dimensions); +// std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { +// float val = 2 * dis(gen) - 1; +// if (std::is_same::type, char>::value) { +// val = std::round(val * 127.0f) / 127.0f; +// } +// return val; +// }); + +// auto input_array = NDArray(input_data, {num_elements, num_dimensions}); + +// // Create Index +// auto index = TypedIndex::type, +// typename std::tuple_element<1, TestType>::type, +// typename std::tuple_element<2, TestType>::type>( +// space, num_dimensions, 20, num_elements); + +// index.setEF(num_elements); +// index.addItems(input_array); +// SECTION("Multiple query interface") { +// auto [labels, distances] = index.query(input_array); + +// if (!std::is_same::type, float>::value) { +// auto matches = 0; +// // Could be std::reduce or std::accumulate +// for (auto row = 0; row < num_elements; ++row) { +// matches += labels[row][0] == row; +// } +// REQUIRE((double)matches / (double)num_elements > 0.5); +// } else { +// // Could be std::reduce or std::accumulate +// std::vector expected(num_elements); +// std::iota(expected.begin(), expected.end(), 0); +// REQUIRE_THAT(flattenNDArray(labels), AllClose(expected)); +// } + +// REQUIRE_THAT(flattenNDArray(distances), +// AllClose(std::vector(num_elements, 0.0), 1e-7, distance_tolerance)); +// } + +// SECTION("Single query interface") { +// for (auto row = 0; row < num_elements; ++row) { +// auto [labels, distances] = +// index.query({input_array[row], input_array[row] + num_dimensions}); +// if (std::is_same::type, float>::value) { +// REQUIRE(labels[0] == row); +// } +// if(distances[0] >= distance_tolerance) { +// float a = 0; +// } +// REQUIRE(distances[0] < distance_tolerance); +// } +// } + +// // SECTION("Saving an index") { +// // auto output_file = std::tmpfile(); +// // index.saveIndex(std::make_shared(output_file)); +// // auto file_byte_count = std::ftell(output_file); +// // REQUIRE(file_byte_count > 0); +// // auto memory_output_stream = std::make_shared(); +// // index.saveIndex(memory_output_stream); +// // auto index_bytes = memory_output_stream->getValue().size(); +// // REQUIRE(index_bytes > 0); +// // REQUIRE(file_byte_count == index_bytes); +// // } +// } +// } + +// TEST_CASE("Spaces") { +// auto [space, expected_distances] = +// GENERATE(std::make_tuple>(SpaceType::Euclidean, +// {0.0, 1.0, 2.0, 2.0, 2.0}), +// std::make_tuple>(SpaceType::InnerProduct, +// {-2.0, -1.0, 0.0, 0.0, 0.0}), +// std::make_tuple>( +// SpaceType::Cosine, {0, 1.835e-1, 4.23e-1, 4.23e-1, 4.23e-1})); +// auto right_dimension = GENERATE(Catch::Generators::range(1, 128, 3)); +// auto left_dimension = GENERATE(Catch::Generators::range(1, 32, 5)); + +// auto num_dimensions = 3; +// auto data = NDArray({1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1}, {5, num_dimensions}); + +// auto input_data = std::vector(); +// for (int i = 0; i < data.shape[0]; ++i) { +// std::vector to_insert(left_dimension, 0); +// std::vector right(right_dimension, 0); +// to_insert.insert(to_insert.end(), data[0], data[0] + data.shape[1]); +// to_insert.insert(to_insert.end(), right.begin(), right.end()); +// input_data.insert(input_data.end(), to_insert.begin(), to_insert.end()); +// } + +// num_dimensions = right_dimension + left_dimension + data.shape[1]; + +// auto data_2 = NDArray(input_data, {data.shape[0], num_dimensions}); +// auto index = TypedIndex(space, num_dimensions, 16, 100); +// index.setEF(10); +// index.addItems(data_2); + +// auto [labels, distances] = index.query( +// std::vector(data_2[data_2.shape[0] - 1], data_2[data_2.shape[0] - 1] + num_dimensions), 5); +// REQUIRE_THAT(distances, AllClose(expected_distances, 1e-7, 1e-3)); +// } + +// TEST_CASE("Get Vectors") { +// auto num_dimensions = GENERATE(4, 16, 128, 256); +// auto num_elements = GENERATE(100, 1000); +// auto space = GENERATE(SpaceType::Euclidean, SpaceType::InnerProduct); + +// // Generate a 2D Matrix of test data +// std::random_device rd; +// std::mt19937 gen(rd()); +// std::uniform_real_distribution dis(0.0, 1.0); +// auto input_data = std::vector(num_elements * num_dimensions); +// std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { return 2 * dis(gen) - 1; }); +// auto input_array = NDArray(input_data, {num_elements, num_dimensions}); + +// auto index = TypedIndex(space, num_dimensions); +// auto labels = std::vector(num_elements); +// std::iota(labels.begin(), labels.end(), 0); + +// REQUIRE_THROWS(index.getVector(labels[0])); +// index.addItems(input_array); + +// SECTION("Test single vector retrieval") { +// for (auto i = 0; i < labels.size(); ++i) { +// REQUIRE_THAT(index.getVector(labels[i]), +// AllClose(std::vector(input_array[i], input_array[i] + num_dimensions))); +// } +// } + +// SECTION("Test all vectors retrieval") { +// auto vectors = index.getVectors(labels); +// for (auto i = 0; i < labels.size(); ++i) { +// REQUIRE_THAT(std::vector(vectors[i], vectors[i] + num_dimensions), +// AllClose(std::vector(input_array[i], input_array[i] + num_dimensions))); +// } +// } +// } + +// TEST_CASE("Query EF") { +// auto space = GENERATE(SpaceType::Euclidean, SpaceType::InnerProduct); +// auto [query_ef, rank_tolerance] = +// GENERATE(std::make_tuple(1, 100), std::make_tuple(2, 75), std::make_tuple(100, 1)); +// auto num_dimensions = 32; +// auto num_elements = 1000; + +// // Generate a 2D Matrix of test data +// std::random_device rd; +// std::mt19937 gen(rd()); +// std::uniform_real_distribution dis(0.0, 1.0); +// auto input_data = std::vector(num_elements * num_dimensions); +// std::generate(input_data.begin(), input_data.end(), [&dis, &gen]() { return 2 * dis(gen) - 1; }); +// auto input_array = NDArray(input_data, {num_elements, num_dimensions}); + +// auto index = TypedIndex(space, num_dimensions, 20, num_elements); +// index.setEF(num_elements); +// index.addItems(input_array); + +// auto [closest_labels_per_vector, _] = index.query(input_array, num_elements, -1, num_elements); +// SECTION("Multi query interface") { +// auto [labels, _] = index.query(input_array, 1, -1, query_ef); +// for (auto i = 0; i < labels.shape[0]; ++i) { +// auto returned_label = labels[0][0]; +// // Consider doing this in a loop with an early break. +// auto label_iter = std::find(closest_labels_per_vector[i], +// closest_labels_per_vector[i] + closest_labels_per_vector.shape[1], +// returned_label); +// auto actual_rank = std::distance(closest_labels_per_vector[i], label_iter); +// REQUIRE(actual_rank < rank_tolerance); +// } +// } + +// SECTION("Single query interface") { +// for (auto i = 0; i < input_array.shape[0]; ++i) { +// auto [returned_labels, _] = +// index.query({input_data[i], input_data[i] + num_dimensions}, 1, query_ef); +// auto returned_label = returned_labels[0]; +// auto label_iter = std::find(closest_labels_per_vector[i], +// closest_labels_per_vector[i] + closest_labels_per_vector.shape[1], +// returned_label); +// auto actual_rank = std::distance(closest_labels_per_vector[i], label_iter); +// REQUIRE(actual_rank < rank_tolerance); +// } +// } +// } \ No newline at end of file diff --git a/docs/java/allclasses-index.html b/docs/java/allclasses-index.html index c9bd72ac..190d93ed 100644 --- a/docs/java/allclasses-index.html +++ b/docs/java/allclasses-index.html @@ -1,217 +1,108 @@ - - + - -All Classes (voyager 2.0.9 API) + +All Classes and Interfaces (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +
-

All Classes

-
-
- +
+
+
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/allpackages-index.html b/docs/java/allpackages-index.html index be18550c..ebc304c5 100644 --- a/docs/java/allpackages-index.html +++ b/docs/java/allpackages-index.html @@ -1,181 +1,78 @@ - - + - -All Packages (voyager 2.0.9 API) + +All Packages (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +

All Packages

-
- +
+ +
 
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/Index.QueryResults.html b/docs/java/com/spotify/voyager/jni/Index.QueryResults.html index b08120e3..b3ba3d07 100644 --- a/docs/java/com/spotify/voyager/jni/Index.QueryResults.html +++ b/docs/java/com/spotify/voyager/jni/Index.QueryResults.html @@ -1,434 +1,261 @@ - - + - -Index.QueryResults (voyager 2.0.9 API) + +Index.QueryResults (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ - +
+
- -

Class Index.QueryResults

+ +

Class Index.QueryResults

-
- -
-
    -
  • -
    +
    java.lang.Object +
    com.spotify.voyager.jni.Index.QueryResults
    +
    +
    +
    Enclosing class:
    Index

    -
    public static class Index.QueryResults
    -extends Object
    +
    public static class Index.QueryResults +extends Object
    A container for query results, returned by Index. Note that this class is instantiated from C++, and as such, any changes to its location, visibility, or constructor will need to include corresponding C++ changes.
    -
  • -
-
-
-
    -
  • + +
    +
      -
      -
        -
      • - - -

        Field Summary

        - - - - - - - - - - - - - - - - - -
        Fields 
        Modifier and TypeFieldDescription
        float[]distances +
      • +
        +

        Field Summary

        +
        Fields
        +
        +
        Modifier and Type
        +
        Field
        +
        Description
        +
        final float[]
        + +
        A list of distances from each item ID to the query vector for this query.
        -
      • long[]labels + +
        final long[]
        + +
        A list of item IDs ("labels").
        -
        -
      • -
      +
+
+ -
-
    -
  • - - -

    Constructor Summary

    - - - - - - - - - - -
    Constructors 
    ConstructorDescription
    QueryResults​(long[] labels, - float[] distances) +
  • +
    +

    Constructor Summary

    +
    Constructors
    +
    +
    Constructor
    +
    Description
    +
    QueryResults(long[] labels, + float[] distances)
    +
    Instantiates a new QueryResults object, provided two identical-length arrays of labels and their corresponding distances.
    -
  • -
  • -
+
+
+ -
- + + + +
 
+ + + +
+

Methods inherited from class java.lang.Object

+clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
- -
-
    -
  • + +
    +
      -
      -
        -
      • - - -

        Field Detail

        - - - -
          -
        • -

          labels

          -
          public final long[] labels
          +
        • +
          +

          Field Details

          +
            +
          • +
            +

            labels

            +
            public final long[] labels
            A list of item IDs ("labels").
            +
          • -
          - - - -
            -
          • -

            distances

            -
            public final float[] distances
            +
          • +
            +

            distances

            +
            public final float[] distances
            A list of distances from each item ID to the query vector for this query.
            -
          • -
          +
      + -
      -
        -
      • - - -

        Constructor Detail

        - - - -
          -
        • -

          QueryResults

          -
          public QueryResults​(long[] labels,
          -                    float[] distances)
          +
        • +
          +

          Constructor Details

          +
            +
          • +
            +

            QueryResults

            +
            public QueryResults(long[] labels, + float[] distances)
            Instantiates a new QueryResults object, provided two identical-length arrays of labels and their corresponding distances. This method should probably not be used directly, as this class is primarily used as a return type from the query method on Index.
            -
            -
            Throws:
            -
            IllegalArgumentException - if the length of the labels and distances arrays vary
            +
            +
            Throws:
            +
            IllegalArgumentException - if the length of the labels and distances arrays vary
            -
          • -
          +
      + -
      -
        -
      • - - -

        Method Detail

        - - - -
          -
        • -

          toString

          -
          public String toString()
          -
          -
          Overrides:
          -
          toString in class Object
          +
        • +
          +

          Method Details

          + - - - -
            -
          • -

            getLabels

            -
            public long[] getLabels()
            +
          • +
            +

            getLabels

            +
            public long[] getLabels()
            Retrieve the list of item IDs ("labels") returned by this query. This array is sorted by distance: the first item is the closest to the query vector, the second is second-closest, and so on. The items in this array correspond 1:1 with the distances returned by getDistances().
            +
          • -
          - - - -
            -
          • -

            getDistances

            -
            public float[] getDistances()
            +
          • +
            +

            getDistances

            +
            public float[] getDistances()
            Retrieve the list of distances between query vectors and item vectors for the results of this query. This array is sorted by distance: the first distance corresponds with the item the closest to the query vector, the second is second-closest, and so on. The items in this array correspond 1:1 with the labels returned by getLabels().
            -
          • -
          +
    -
- - + +
-
-
- -
- - - - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/Index.SpaceType.html b/docs/java/com/spotify/voyager/jni/Index.SpaceType.html index 9c7ca202..9f100553 100644 --- a/docs/java/com/spotify/voyager/jni/Index.SpaceType.html +++ b/docs/java/com/spotify/voyager/jni/Index.SpaceType.html @@ -1,416 +1,244 @@ - - + - -Index.SpaceType (voyager 2.0.9 API) + +Index.SpaceType (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+
+ +
- +
+
- -

Enum Index.SpaceType

+ +

Enum Index.SpaceType

-
- -
- -
-
-
    -
  • + +
    +
      -
      -
        -
      • - - -

        Enum Constant Summary

        - - - - - - - - - - - - - - - - - - -
        Enum Constants 
        Enum ConstantDescription
        Cosine -
        Cosine distance; i.e. normalized dot product.
        -
        Euclidean +
      • +
        +

        Enum Constant Summary

        +
        Enum Constants
        +
        +
        Enum Constant
        +
        Description
        + +
        +
        Cosine distance; i.e.
        +
        + +
        Euclidean distance, also known as L2 distance.
        -
      • InnerProduct + + +
        Inner (dot) product.
        -
        -
      • -
      +
+
+ -
- +
+ + + +
+

Methods inherited from class java.lang.Enum

+clone, compareTo, equals, finalize, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
+
+

Methods inherited from class java.lang.Object

+getClass, notify, notifyAll, wait, wait, wait
- -
-
    -
  • + +
    +
      -
      -
        -
      • - - -

        Enum Constant Detail

        - - - -
          -
        • -

          Euclidean

          -
          public static final Index.SpaceType Euclidean
          +
        • +
          +

          Enum Constant Details

          +
            +
          • +
            +

            Euclidean

            +
            public static final Index.SpaceType Euclidean
            Euclidean distance, also known as L2 distance. Computed by taking the square root of the sum of squared differences between each element of each vector.
            +
          • -
          - - - -
            -
          • -

            InnerProduct

            -
            public static final Index.SpaceType InnerProduct
            +
          • +
            +

            InnerProduct

            +
            public static final Index.SpaceType InnerProduct
            Inner (dot) product. Computed by taking the sum of the products of each element of each vector. Since v2.0, this is implemented using the order preserving transform defined in this paper from Microsoft: https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/XboxInnerProduct.pdf
            +
          • -
          - - - -
            -
          • -

            Cosine

            -
            public static final Index.SpaceType Cosine
            +
          • +
            +

            Cosine

            +
            public static final Index.SpaceType Cosine
            Cosine distance; i.e. normalized dot product. Computed by taking the sum of the products of each element of each vector, divided by the product of the magnitudes of each vector.
            -
          • -
          +
      + -
      -
        -
      • - - -

        Method Detail

        - - - -
          -
        • -

          values

          -
          public static Index.SpaceType[] values()
          +
        • +
          +

          Method Details

          +
            +
          • +
            +

            values

            +
            public static Index.SpaceType[] values()
            Returns an array containing the constants of this enum type, in -the order they are declared. This method may be used to iterate -over the constants as follows: -
            -for (Index.SpaceType c : Index.SpaceType.values())
            -    System.out.println(c);
            -
            -
            -
            Returns:
            +the order they are declared.
+
+
Returns:
an array containing the constants of this enum type, in the order they are declared
+ - - - - -
    -
  • -

    valueOf

    -
    public static Index.SpaceType valueOf​(String name)
    +
  • +
    +

    valueOf

    +
    public static Index.SpaceType valueOf(String name)
    Returns the enum constant of this type with the specified name. The string must match exactly an identifier used to declare an enum constant in this type. (Extraneous whitespace characters are not permitted.)
    -
    -
    Parameters:
    +
    +
    Parameters:
    name - the name of the enum constant to be returned.
    -
    Returns:
    +
    Returns:
    the enum constant with the specified name
    -
    Throws:
    -
    IllegalArgumentException - if this enum type has no constant with the specified name
    -
    NullPointerException - if the argument is null
    +
    Throws:
    +
    IllegalArgumentException - if this enum type has no constant with the specified name
    +
    NullPointerException - if the argument is null
    -
  • -
+ - - - + +
-
-
- -
- - - - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/Index.StorageDataType.html b/docs/java/com/spotify/voyager/jni/Index.StorageDataType.html index 71682d01..655350c7 100644 --- a/docs/java/com/spotify/voyager/jni/Index.StorageDataType.html +++ b/docs/java/com/spotify/voyager/jni/Index.StorageDataType.html @@ -1,262 +1,167 @@ - - + - -Index.StorageDataType (voyager 2.0.9 API) + +Index.StorageDataType (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ - +
+
- -

Enum Index.StorageDataType

+ +

Enum Index.StorageDataType

-
- -
- -
-
-
    -
  • + +
    +
      -
      -
        -
      • - - -

        Enum Constant Summary

        - - - - - - - - - - - - - - - - - - -
        Enum Constants 
        Enum ConstantDescription
        E4M3 +
      • +
        +

        Enum Constant Summary

        +
        Enum Constants
        +
        +
        Enum Constant
        +
        Description
        + +
        A custom 8-bit floating point data type with range [-448, 448] and variable precision.
        -
      • Float32 + + +
        A 32-bit floating point ("Float") data type.
        -
        Float8 + + +
        An 8-bit floating point data type that expects all values to be on [-1, 1].
        -
        -
      • -
      +
+
+ -
- +
+
+ + +
+

Methods inherited from class java.lang.Enum

+clone, compareTo, equals, finalize, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
+
+

Methods inherited from class java.lang.Object

+getClass, notify, notifyAll, wait, wait, wait
- -
-
    -
  • + +
    +
      -
      -
        -
      • - - -

        Enum Constant Detail

        - - - -
          -
        • -

          Float8

          -
          public static final Index.StorageDataType Float8
          +
        • +
          +

          Enum Constant Details

          +
            +
          • +
            +

            Float8

            +
            public static final Index.StorageDataType Float8
            An 8-bit floating point data type that expects all values to be on [-1, 1]. This data type provides adequate precision for many use cases, but cuts down memory usage bu a factor of 4x compared to Float32, while also increasing query speed. @@ -264,159 +169,78 @@

            Float8

            Float8 provides 8 bits of resolution; i.e.: the distance between successive values is 1/127, or 0.00787. For a variable-precision (i.e.: _actually_ floating point) representation, use E4M3.

            +
          • -
          - - - - - - - -
            -
          • -

            E4M3

            -
            public static final Index.StorageDataType E4M3
            +
          • +
            +

            E4M3

            +
            public static final Index.StorageDataType E4M3
            A custom 8-bit floating point data type with range [-448, 448] and variable precision. Use this data type to get 4x less memory usage compared to Float32, but when the values of vectors to be stored in an Index may exceed [-1, 1].

            E4M3 uses a 4-bit exponent and 3-bit mantissa field, and was inspired by the paper "FP8 - Formats for Deep Learning" by Micikevicus et al (arXiv:2209.05433). - -

            Note that using E4M3 with the Cosine space may result in negative distances at query time - due to reduced floating-point precision. While confusing, the query results are still - correctly ordered.

            -
          • -
          + Formats for Deep Learning" by Micikevicus et al (arXiv:2209.05433).
+ + -
-
    -
  • - - -

    Method Detail

    - - - -
      -
    • -

      values

      -
      public static Index.StorageDataType[] values()
      +
    • +
      +

      Method Details

      +
        +
      • +
        +

        values

        +
        public static Index.StorageDataType[] values()
        Returns an array containing the constants of this enum type, in -the order they are declared. This method may be used to iterate -over the constants as follows: -
        -for (Index.StorageDataType c : Index.StorageDataType.values())
        -    System.out.println(c);
        -
        -
        -
        Returns:
        +the order they are declared. +
        +
        Returns:
        an array containing the constants of this enum type, in the order they are declared
        +
      • -
      - - - -
        -
      • -

        valueOf

        -
        public static Index.StorageDataType valueOf​(String name)
        +
      • +
        +

        valueOf

        +
        public static Index.StorageDataType valueOf(String name)
        Returns the enum constant of this type with the specified name. The string must match exactly an identifier used to declare an enum constant in this type. (Extraneous whitespace characters are not permitted.)
        -
        -
        Parameters:
        +
        +
        Parameters:
        name - the name of the enum constant to be returned.
        -
        Returns:
        +
        Returns:
        the enum constant with the specified name
        -
        Throws:
        -
        IllegalArgumentException - if this enum type has no constant with the specified name
        -
        NullPointerException - if the argument is null
        +
        Throws:
        +
        IllegalArgumentException - if this enum type has no constant with the specified name
        +
        NullPointerException - if the argument is null
        -
      • -
      +
- - - + +
-
-
- -
- - - - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/Index.html b/docs/java/com/spotify/voyager/jni/Index.html index d4f14a29..85c1d62e 100644 --- a/docs/java/com/spotify/voyager/jni/Index.html +++ b/docs/java/com/spotify/voyager/jni/Index.html @@ -1,145 +1,89 @@ - - + - -Index (voyager 2.0.9 API) + +Index (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+
+ +
- +
+
- -

Class Index

+ +

Class Index

-
- -
-
    -
  • -
    +
    java.lang.Object +
    com.spotify.voyager.jni.Index
    +
    +
    +
    All Implemented Interfaces:
    -
    Closeable, AutoCloseable
    +
    Closeable, AutoCloseable

    -
    public class Index
    -extends Object
    -implements Closeable
    +
    public class Index +extends Object +implements Closeable
    A Voyager index, providing storage of floating-point vectors and the ability to efficiently search among those vectors. @@ -162,414 +106,302 @@

    Class Index

    // Serialize this index to use it again later: index.saveIndex("my_tiny_index.voy");
    -
  • -
-
-
-
    -
  • + +
    +
      -
      -
        -
      • - - -

        Nested Class Summary

        - - - - - - - - - - - - - - - - - - - - - - -
        Nested Classes 
        Modifier and TypeClassDescription
        static class Index.QueryResults +
      • +
        +

        Nested Class Summary

        +
        Nested Classes
        +
        +
        Modifier and Type
        +
        Class
        +
        Description
        +
        static class 
        + +
        A container for query results, returned by Index.
        -
      • static class Index.SpaceType + +
        static enum 
        + +
        The space, also known as distance metric, to use when searching.
        -
        static class Index.StorageDataType + +
        static enum 
        + +
        The datatype used to use when storing vectors on disk.
        -
        -
      • -
      +
+
+ -
-
    -
  • - - -

    Constructor Summary

    - - - - - - - - - - - - - - -
    Constructors 
    ConstructorDescription
    Index​(Index.SpaceType space, - int numDimensions) +
  • +
    +

    Constructor Summary

    +
    Constructors
    +
    +
    Constructor
    +
    Description
    +
    Index(Index.SpaceType space, + int numDimensions)
    +
    Create a new Index that uses the given Index.SpaceType to store and compare numDimensions-dimensional vectors.
    -
  • Index​(Index.SpaceType space, - int numDimensions, - long indexM, - long efConstruction, - long randomSeed, - long maxElements, - Index.StorageDataType storageDataType) + +
    Index(Index.SpaceType space, + int numDimensions, + long indexM, + long efConstruction, + long randomSeed, + long maxElements, + Index.StorageDataType storageDataType)
    +
    Create a new Index that uses the given Index.SpaceType to store and compare numDimensions-dimensional vectors.
    -
    -
  • -
+
+ + -
- + + + + +
+

Methods inherited from class java.lang.Object

+clone, equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
- -
-
    -
  • + +
    +
      -
      -
        -
      • - - -

        Constructor Detail

        - - - -
          -
        • -

          Index

          -
          public Index​(Index.SpaceType space,
          -             int numDimensions)
          +
        • +
          +

          Constructor Details

          +
            +
          • +
            +

            Index

            +
            public Index(Index.SpaceType space, + int numDimensions)
            Create a new Index that uses the given Index.SpaceType to store and compare numDimensions-dimensional vectors.
            -
            -
            Parameters:
            +
            +
            Parameters:
            space - the space type to use when storing and comparing vectors.
            numDimensions - the number of dimensions per vector.
            +
          • -
          - - - -
            -
          • -

            Index

            -
            public Index​(Index.SpaceType space,
            -             int numDimensions,
            -             long indexM,
            -             long efConstruction,
            -             long randomSeed,
            -             long maxElements,
            -             Index.StorageDataType storageDataType)
            +
          • +
            +

            Index

            +
            public Index(Index.SpaceType space, + int numDimensions, + long indexM, + long efConstruction, + long randomSeed, + long maxElements, + Index.StorageDataType storageDataType)
            Create a new Index that uses the given Index.SpaceType to store and compare numDimensions-dimensional vectors.
            -
            -
            Parameters:
            +
            +
            Parameters:
            space - The space type to use when storing and comparing vectors.
            numDimensions - The number of dimensions per vector.
            indexM - Controls the degree of interconnectedness between vectors. Higher values of @@ -584,364 +416,293 @@

            Index

            storageDataType - The datatype to use under-the-hood when storing vectors. Different data type options trade off precision for memory usage and query speed; see Index.StorageDataType for available data types.
            -
          • -
          +
      + -
      -
        -
      • - - -

        Method Detail

        - - - -
          -
        • -

          load

          -
          public static Index load​(String filename,
          -                         Index.SpaceType space,
          -                         int numDimensions,
          -                         Index.StorageDataType storageDataType)
          +
        • +
          +

          Method Details

          +
            +
          • +
            +

            load

            +
            public static Index load(String filename, + Index.SpaceType space, + int numDimensions, + Index.StorageDataType storageDataType)
            Load a Voyager index file and create a new Index initialized with the data in that file.
            -
            -
            Parameters:
            +
            +
            Parameters:
            filename - A filename to load.
            space - The Index.SpaceType to use when loading the index.
            numDimensions - The number of dimensions per vector.
            storageDataType - The Index.StorageDataType used by the index being loaded.
            -
            Returns:
            +
            Returns:
            An Index whose contents have been initialized with the data provided by the file.
            -
            Throws:
            -
            RuntimeException - if the index cannot be loaded from the file, or the file contains +
            Throws:
            +
            RuntimeException - if the index cannot be loaded from the file, or the file contains invalid data.
            +
          • -
          - - - -
            -
          • -

            load

            -
            public static Index load​(String filename)
            +
          • +
            +

            load

            +
            public static Index load(String filename)
            Load a Voyager index file and create a new Index initialized with the data in that file.
            -
            -
            Parameters:
            +
            +
            Parameters:
            filename - A filename to load.
            -
            Returns:
            +
            Returns:
            An Index whose contents have been initialized with the data provided by the file.
            -
            Throws:
            -
            RuntimeException - if the index cannot be loaded from the file, the file contains invalid +
            Throws:
            +
            RuntimeException - if the index cannot be loaded from the file, the file contains invalid data, or the file contains an older version of the Voyager file format that requires additional arguments to be provided.
            +
          • -
          - - - -
            -
          • -

            load

            -
            public static Index load​(InputStream inputStream,
            -                         Index.SpaceType space,
            -                         int numDimensions,
            -                         Index.StorageDataType storageDataType)
            +
          • +
            +

            load

            +
            public static Index load(InputStream inputStream, + Index.SpaceType space, + int numDimensions, + Index.StorageDataType storageDataType)
            Interpret the contents of a java.io.InputStream as the contents of a Voyager index file and create a new Index initialized with the data provided by that stream.
            -
            -
            Parameters:
            -
            inputStream - A InputStream that will provide the contents of a Voyager +
            +
            Parameters:
            +
            inputStream - A InputStream that will provide the contents of a Voyager index.
            space - The Index.SpaceType to use when loading the index.
            numDimensions - The number of dimensions per vector.
            storageDataType - The Index.StorageDataType used by the index being loaded.
            -
            Returns:
            +
            Returns:
            An Index whose contents have been initialized with the data provided by the input stream.
            -
            Throws:
            -
            RuntimeException - if the index cannot be loaded from the stream, or the stream contains +
            Throws:
            +
            RuntimeException - if the index cannot be loaded from the stream, or the stream contains invalid data.
            +
          • -
          - - - -
            -
          • -

            load

            -
            public static Index load​(InputStream inputStream)
            +
          • +
            +

            load

            +
            public static Index load(InputStream inputStream)
            Interpret the contents of a java.io.InputStream as the contents of a Voyager index file and create a new Index initialized with the data provided by that stream.
            -
            -
            Parameters:
            -
            inputStream - A InputStream that will provide the contents of a Voyager +
            +
            Parameters:
            +
            inputStream - A InputStream that will provide the contents of a Voyager index.
            -
            Returns:
            +
            Returns:
            An Index whose contents have been initialized with the data provided by the input stream.
            -
            Throws:
            -
            RuntimeException - if the index cannot be loaded from the stream, or the stream contains +
            Throws:
            +
            RuntimeException - if the index cannot be loaded from the stream, or the stream contains invalid data, or the file contains an older version of the Voyager file format that requires additional arguments to be provided.
            +
          • -
          - - - -
            -
          • -

            close

            -
            public void close()
            -           throws IOException
            +
          • +
            +

            close

            +
            public void close() + throws IOException
            Close this Index and release any memory held by it. Note that this method must be called to release the memory backing this Index; failing to do so may cause a memory leak.

            Any calls to methods after close() is called will fail, as the underlying native C++ object will have been deallocated.

            -
            -
            Specified by:
            -
            close in interface AutoCloseable
            -
            Specified by:
            -
            close in interface Closeable
            -
            Throws:
            -
            IOException - if the C++ destructor fails.
            +
            +
            Specified by:
            +
            close in interface AutoCloseable
            +
            Specified by:
            +
            close in interface Closeable
            +
            Throws:
            +
            IOException - if the C++ destructor fails.
            +
          • -
          - - - - - - - -
            -
          • -

            setEf

            -
            public void setEf​(long ef)
            +
          • +
            +

            setEf

            +
            public void setEf(long ef)
            Set the default EF ("query search depth") to use when query(float[], int) is called.
            -
            -
            Parameters:
            +
            +
            Parameters:
            ef - The new default EF value to use. This value can be overridden on a per-query basis at query time.
            +
          • -
          - - - -
            -
          • -

            getEf

            -
            public int getEf()
            +
          • +
            +

            getEf

            +
            public int getEf()
            Get the default EF ("query search depth") that will be uses when query(float[], int) is called.
            -
            -
            Returns:
            +
            +
            Returns:
            The current default EF value, used by the Index if no value is provided at query time.
            +
          • -
          - - - - - - - -
            -
          • -

            getNumDimensions

            -
            public int getNumDimensions()
            +
          • +
            +

            getNumDimensions

            +
            public int getNumDimensions()
            Get the number of dimensions used in this Index.
            -
            -
            Returns:
            +
            +
            Returns:
            The number of dimensions used by this Index, and which all vectors within this Index must have.
            +
          • -
          - - - -
            -
          • -

            setNumThreads

            -
            public void setNumThreads​(int numThreads)
            +
          • +
            +

            setNumThreads

            +
            public void setNumThreads(int numThreads)
            Set the default number of threads to use when adding multiple vectors in bulk, or when querying for multiple vectors simultaneously.
            -
            -
            Parameters:
            +
            +
            Parameters:
            numThreads - The default number of threads used for bulk-add or bulk-query methods if not overridden in each method call. Note that this affects the number of threads started for each method call - Voyager keeps no long-lived thread pool. For maximum efficiency, pass as much data as possible to each bulk-add or bulk-query method call to minimize overhead.
            +
          • -
          - - - -
            -
          • -

            getNumThreads

            -
            public int getNumThreads()
            +
          • +
            +

            getNumThreads

            +
            public int getNumThreads()
            Get the default number of threads used when adding multiple vectors in bulk oor when querying for multiple vectors simultaneously.
            -
            -
            Returns:
            +
            +
            Returns:
            The default number of threads used for bulk-add or bulk-query methods if not overridden in each method call.
            +
          • -
          - - - -
            -
          • -

            saveIndex

            -
            public void saveIndex​(String pathToIndex)
            +
          • +
            +

            saveIndex

            +
            public void saveIndex(String pathToIndex)
            Save this Index to a file at the provided filename. This file can be reloaded by using Index.load(...).
            -
            -
            Parameters:
            +
            +
            Parameters:
            pathToIndex - The output filename to write to.
            +
          • -
          - - - -
            -
          • -

            saveIndex

            -
            public void saveIndex​(OutputStream outputStream)
            +
          • +
            +

            saveIndex

            +
            public void saveIndex(OutputStream outputStream)
            Save this Index to the provided output stream. The stream will not be closed automatically - be sure to close the stream saveIndex has completed. The data written to the stream can be reloaded by using Index.load(...).
            -
            -
            Parameters:
            +
            +
            Parameters:
            outputStream - The output stream to write to. This stream will not be closed automatically.
            +
          • -
          - - - -
            -
          • -

            asBytes

            -
            public byte[] asBytes()
            -
            Returns the contents of this index as an array of bytes. The resulting bytes will contain the - same data as if this index was serialized to disk and then read back into memory again.
            -
            -
            Returns:
            -
            A byte array representing the contents of the index
            -
            -
          • -
          - - - -
            -
          • -

            addItem

            -
            public void addItem​(float[] vector)
            +
          • +
            +

            addItem

            +
            public void addItem(float[] vector)
            Add an item (a vector) to this Index. The item will automatically be given an identifier equal to the return value of getNumElements().
            -
            -
            Parameters:
            +
            +
            Parameters:
            vector - The vector to add to the index.
            -
            Throws:
            -
            RuntimeException - If the provided vector does not contain exactly getNumDimensions() dimensions.
            +
            Throws:
            +
            RuntimeException - If the provided vector does not contain exactly getNumDimensions() dimensions.
            +
          • -
          - - - -
            -
          • -

            addItem

            -
            public void addItem​(float[] vector,
            -                    long id)
            +
          • +
            +

            addItem

            +
            public void addItem(float[] vector, + long id)
            Add an item (a vector) to this Index with the provided identifier.
            -
            -
            Parameters:
            +
            +
            Parameters:
            vector - The vector to add to the index.
            id - The 64-bit integer denoting the identifier of this vector.
            -
            Throws:
            -
            RuntimeException - If the provided vector does not contain exactly getNumDimensions() dimensions.
            +
            Throws:
            +
            RuntimeException - If the provided vector does not contain exactly getNumDimensions() dimensions.
            +
          • -
          - - - -
            -
          • -

            addItems

            -
            public void addItems​(float[][] vectors,
            -                     int numThreads)
            +
          • +
            +

            addItems

            +
            public void addItems(float[][] vectors, + int numThreads)
            Add multiple items (vectors) to this Index.
            -
            -
            Parameters:
            +
            +
            Parameters:
            vectors - The vectors to add to the index.
            numThreads - The number of threads to use when adding the provided vectors. If -1 (the default), the number of CPUs available on the current machine will be used.
            -
            Throws:
            -
            RuntimeException - If any of the provided vectors do not contain exactly getNumDimensions() dimensions.
            +
            Throws:
            +
            RuntimeException - If any of the provided vectors do not contain exactly getNumDimensions() dimensions.
            +
          • -
          - - - -
            -
          • -

            addItems

            -
            public void addItems​(float[][] vectors,
            -                     long[] ids,
            -                     int numThreads)
            +
          • +
            +

            addItems

            +
            public void addItems(float[][] vectors, + long[] ids, + int numThreads)
            Add multiple items (vectors) to this Index.
            -
            -
            Parameters:
            +
            +
            Parameters:
            vectors - The vectors to add to the index.
            ids - The 64-bit identifiers that correspond with each of the provided vectors.
            numThreads - The number of threads to use when adding the provided vectors. If -1 (the @@ -949,149 +710,128 @@

            addItems

            causes a temporary C++ thread pool to be used. Instead of calling addItems(float[][],int) in a tight loop, consider passing more data to each addItems(float[][],int) call instead to reduce overhead.
            -
            Throws:
            -
            RuntimeException - If any of the provided vectors do not contain exactly getNumDimensions() dimensions.
            -
            RuntimeException - If the list of IDs does not have the same length as the list of +
            Throws:
            +
            RuntimeException - If any of the provided vectors do not contain exactly getNumDimensions() dimensions.
            +
            RuntimeException - If the list of IDs does not have the same length as the list of provided vectors.
            +
          • -
          - - - -
            -
          • -

            getVector

            -
            public float[] getVector​(long id)
            +
          • +
            +

            getVector

            +
            public float[] getVector(long id)
            Get the vector for the provided identifier.
            -
            -
            Parameters:
            +
            +
            Parameters:
            id - The identifier whose vector will be fetched.
            -
            Returns:
            +
            Returns:
            A float array representing the values of the vector.
            -
            Throws:
            -
            RuntimeException - If the provided identifier is not present in the Index.
            +
            Throws:
            +
            RuntimeException - If the provided identifier is not present in the Index.
            +
          • -
          - - - -
            -
          • -

            getVectors

            -
            public float[][] getVectors​(long[] ids)
            +
          • +
            +

            getVectors

            +
            public float[][] getVectors(long[] ids)
            Get the vectors for a provided array of identifiers.
            -
            -
            Parameters:
            +
            +
            Parameters:
            ids - The identifiers whose vector will be fetched.
            -
            Returns:
            +
            Returns:
            A nested float array representing the values of the vectors corresponding with each ID.
            -
            Throws:
            -
            RuntimeException - If any of the provided identifiers are not present in the Index.
            +
            Throws:
            +
            RuntimeException - If any of the provided identifiers are not present in the Index.
            +
          • -
          - - - -
            -
          • -

            getIDs

            -
            public long[] getIDs()
            +
          • +
            +

            getIDs

            +
            public long[] getIDs()
            Get the list of identifiers currently stored by this index.
            -
            -
            Returns:
            +
            +
            Returns:
            a long array of identifiers.
            +
          • -
          - - - -
            -
          • -

            query

            -
            public Index.QueryResults query​(float[] queryVector,
            -                                int k)
            +
          • +
            +

            query

            +
            public Index.QueryResults query(float[] queryVector, + int k)
            Query this Index for approximate nearest neighbors of a single query vector.
            -
            -
            Parameters:
            +
            +
            Parameters:
            queryVector - A query vector to use for searching.
            k - The number of nearest neighbors to return.
            -
            Returns:
            +
            Returns:
            A Index.QueryResults object, containing the neighbors found that are (approximately) nearest to the query vector.
            -
            Throws:
            -
            RuntimeException - if fewer than k results can be found in the index.
            +
            Throws:
            +
            RuntimeException - if fewer than k results can be found in the index.
            +
          • -
          - - - -
            -
          • -

            query

            -
            public Index.QueryResults[] query​(float[][] queryVectors,
            -                                  int k,
            -                                  int numThreads)
            +
          • +
            +

            query

            +
            public Index.QueryResults[] query(float[][] queryVectors, + int k, + int numThreads)
            Query this Index for approximate nearest neighbors of multiple query vectors.
            -
            -
            Parameters:
            +
            +
            Parameters:
            queryVectors - The query vectors to use for searching.
            k - The number of nearest neighbors to return for each query vector
            numThreads - The number of threads to use when searching. If -1, all available CPU cores will be used. Note that passing a number of threads other than 1 will cause a temporary C++ thread pool to be used. Instead of calling query(float[],int) in a tight loop, consider passing more data to each call instead to reduce overhead.
            -
            Returns:
            +
            Returns:
            An array of Index.QueryResults objects, each containing the neighbors found that are (approximately) nearest to the corresponding query vector. The returned list of Index.QueryResults will contain the same number of elements as queryVectors.
            -
            Throws:
            -
            RuntimeException - if fewer than k results can be found in the index for one or +
            Throws:
            +
            RuntimeException - if fewer than k results can be found in the index for one or more queries.
            +
          • -
          - - - -
            -
          • -

            query

            -
            public Index.QueryResults query​(float[] queryVector,
            -                                int k,
            -                                long queryEf)
            +
          • +
            +

            query

            +
            public Index.QueryResults query(float[] queryVector, + int k, + long queryEf)
            Query this Index for approximate nearest neighbors of a single query vector.
            -
            -
            Parameters:
            +
            +
            Parameters:
            queryVector - A query vector to use for searching.
            k - The number of nearest neighbors to return.
            queryEf - The per-query "ef" value to use. Larger values produce more accurate results at the expense of query time.
            -
            Returns:
            +
            Returns:
            A Index.QueryResults object, containing the neighbors found that are (approximately) nearest to the query vector.
            -
            Throws:
            -
            RuntimeException - if fewer than k results can be found in the index.
            +
            Throws:
            +
            RuntimeException - if fewer than k results can be found in the index.
            +
          • -
          - - - -
            -
          • -

            query

            -
            public Index.QueryResults[] query​(float[][] queryVectors,
            -                                  int k,
            -                                  int numThreads,
            -                                  long queryEf)
            +
          • +
            +

            query

            +
            public Index.QueryResults[] query(float[][] queryVectors, + int k, + int numThreads, + long queryEf)
            Query this Index for approximate nearest neighbors of multiple query vectors.
            -
            -
            Parameters:
            +
            +
            Parameters:
            queryVectors - The query vectors to use for searching.
            k - The number of nearest neighbors to return for each query vector
            numThreads - The number of threads to use when searching. If -1, all available CPU cores @@ -1100,194 +840,115 @@

            query

            more data to each call instead to reduce overhead.
            queryEf - The per-query "ef" value to use. Larger values produce more accurate results at the expense of query time.
            -
            Returns:
            +
            Returns:
            An array of Index.QueryResults objects, each containing the neighbors found that are (approximately) nearest to the corresponding query vector. The returned list of Index.QueryResults will contain the same number of elements as queryVectors.
            -
            Throws:
            -
            RuntimeException - if fewer than k results can be found in the index for one or +
            Throws:
            +
            RuntimeException - if fewer than k results can be found in the index for one or more queries.
            +
          • -
          - - - -
            -
          • -

            markDeleted

            -
            public void markDeleted​(long label)
            +
          • +
            +

            markDeleted

            +
            public void markDeleted(long label)
            Mark an element of the index as deleted. Deleted elements will be skipped when querying, but will still be present in the index.
            -
            -
            Parameters:
            +
            +
            Parameters:
            label - The ID of the element to mark as deleted.
            -
            Throws:
            -
            RuntimeException - If the provided identifier is not present in the Index.
            +
            Throws:
            +
            RuntimeException - If the provided identifier is not present in the Index.
            +
          • -
          - - - -
            -
          • -

            unmarkDeleted

            -
            public void unmarkDeleted​(long label)
            +
          • +
            +

            unmarkDeleted

            +
            public void unmarkDeleted(long label)
            Un-mark an element of the index as deleted, making it available again.
            -
            -
            Parameters:
            +
            +
            Parameters:
            label - The ID of the element to unmark as deleted.
            -
            Throws:
            -
            RuntimeException - If the provided identifier is not present in the Index.
            +
            Throws:
            +
            RuntimeException - If the provided identifier is not present in the Index.
            +
          • -
          - - - -
            -
          • -

            resizeIndex

            -
            public void resizeIndex​(long newSize)
            +
          • +
            +

            resizeIndex

            +
            public void resizeIndex(long newSize)
            Change the maximum number of elements currently storable by this Index. This operation reallocates the memory used by the index and can be quite slow, so it may be useful to set the maximum number of elements in advance if that number is known.
            -
            -
            Parameters:
            +
            +
            Parameters:
            newSize - The new number of maximum elements to resize this Index to.
            +
          • -
          - - - -
            -
          • -

            getMaxElements

            -
            public long getMaxElements()
            +
          • +
            +

            getMaxElements

            +
            public long getMaxElements()
            Get the maximum number of elements currently storable by this Index. If more elements are added than getMaxElements(), the index will be automatically (but slowly) resized.
            -
            -
            Returns:
            +
            +
            Returns:
            The number of elements (vectors) that are currently storable in this Index.
            +
          • -
          - - - -
            -
          • -

            getNumElements

            -
            public long getNumElements()
            +
          • +
            +

            getNumElements

            +
            public long getNumElements()
            Get the number of elements currently in this Index.
            -
            -
            Returns:
            +
            +
            Returns:
            The number of elements (vectors) in this Index. This count includes any deleted elements.
            +
          • -
          - - - -
            -
          • -

            getEfConstruction

            -
            public long getEfConstruction()
            +
          • +
            +

            getEfConstruction

            +
            public long getEfConstruction()
            Get the EF Construction value used when adding new elements to this Index.
            -
            -
            Returns:
            +
            +
            Returns:
            The current EF Construction value (i.e.: the number of neighbors to search for when adding new elements).
            +
          • -
          - - - -
            -
          • -

            getM

            -
            public long getM()
            +
          • +
            +

            getM

            +
            public long getM()
            Get the M value used when adding new elements to this Index.
            -
            -
            Returns:
            +
            +
            Returns:
            The current M value (i.e.: the number of links between adjacent vectors to create when adding elements).
            -
          • -
          +
    -
- - + +
-
-
- -
- - - - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/StringIndex.QueryResults.html b/docs/java/com/spotify/voyager/jni/StringIndex.QueryResults.html index 6cc79d66..b1ca05b8 100644 --- a/docs/java/com/spotify/voyager/jni/StringIndex.QueryResults.html +++ b/docs/java/com/spotify/voyager/jni/StringIndex.QueryResults.html @@ -1,389 +1,219 @@ - - + - -StringIndex.QueryResults (voyager 2.0.9 API) + +StringIndex.QueryResults (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+
+ +
- +
+
- -

Class StringIndex.QueryResults

+ +

Class StringIndex.QueryResults

-
- -
-
    -
  • -
    +
    java.lang.Object +
    com.spotify.voyager.jni.StringIndex.QueryResults
    +
    +
    +
    Enclosing class:
    StringIndex

    -
    public static class StringIndex.QueryResults
    -extends Object
    +
    public static class StringIndex.QueryResults +extends Object
    A wrapper class for nearest neighbor query results.
    -
  • -
-
-
-
-
-
    -
  • + +
    +
      -
      -
        -
      • - - -

        Constructor Detail

        - - - -
          -
        • -

          QueryResults

          -
          public QueryResults​(String[] names,
          -                    float[] distances)
          -
        • -
        +
      • +
        +

        Constructor Details

        +
          +
        • +
          +

          QueryResults

          +
          public QueryResults(String[] names, + float[] distances)
          +
        +
      • -
        -
          -
        • - - -

          Method Detail

          - - - -
            -
          • -

            getNames

            -
            public String[] getNames()
            +
          • +
            +

            Method Details

            +
              +
            • +
              +

              getNames

              +
              public String[] getNames()
              +
            • -
            - - - -
              -
            • -

              getDistances

              -
              public float[] getDistances()
              +
            • +
              +

              getDistances

              +
              public float[] getDistances()
              +
            • -
            - - - -
              -
            • -

              getName

              -
              public String getName​(int index)
              +
            • +
              +

              getName

              +
              public String getName(int index)
              +
            • -
            - - - -
              -
            • -

              getDistance

              -
              public float getDistance​(int index)
              +
            • +
              +

              getDistance

              +
              public float getDistance(int index)
              +
            • -
            - - - -
              -
            • -

              getNumResults

              -
              public int getNumResults()
              +
            • +
              +

              getNumResults

              +
              public int getNumResults()
              +
            • -
            - - - - +
      -
-
-
+ +
-
-
- -
- - - - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/StringIndex.html b/docs/java/com/spotify/voyager/jni/StringIndex.html index 91f3b81c..48446e66 100644 --- a/docs/java/com/spotify/voyager/jni/StringIndex.html +++ b/docs/java/com/spotify/voyager/jni/StringIndex.html @@ -1,413 +1,270 @@ - - + - -StringIndex (voyager 2.0.9 API) + +StringIndex (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+
+ +
- +
+
- -

Class StringIndex

+ +

Class StringIndex

-
- -
-
    -
  • -
    +
    java.lang.Object +
    com.spotify.voyager.jni.StringIndex
    +
    +
    +
    All Implemented Interfaces:
    -
    Closeable, AutoCloseable
    +
    Closeable, AutoCloseable

    -
    public class StringIndex
    -extends Object
    -implements Closeable
    +
    public class StringIndex +extends Object +implements Closeable
    Wrapper around com.spotify.voyager.jni.Index with a simplified interface which maps the index ID to a provided String.

    StringIndex can only accommodate up to 2^31 - 1 (2.1B) items, despite typical Voyager indices allowing up to 2^63 - 1 (9e18) items.

    -
  • -
-
-
-
    -
  • + +
    +
      -
      -
        -
      • - - -

        Nested Class Summary

        - - - - - - - - - - - - -
        Nested Classes 
        Modifier and TypeClassDescription
        static class StringIndex.QueryResults +
      • +
        +

        Nested Class Summary

        +
        Nested Classes
        +
        +
        Modifier and Type
        +
        Class
        +
        Description
        +
        static class 
        + +
        A wrapper class for nearest neighbor query results.
        -
      • -
      • -
      +
+
+ -
-
    -
  • - - -

    Constructor Summary

    - - - - - - - - - - - - - - -
    Constructors 
    ConstructorDescription
    StringIndex​(Index.SpaceType spaceType, - int numDimensions) +
  • +
    +

    Constructor Summary

    +
    Constructors
    +
    +
    Constructor
    +
    Description
    +
    StringIndex(Index.SpaceType spaceType, + int numDimensions)
    +
    Instantiate a new empty index with the specified space type and dimensionality
    -
  • StringIndex​(Index.SpaceType spaceType, - int numDimensions, - long indexM, - long efConstruction, - long randomSeed, - long maxElements, - Index.StorageDataType storageDataType) + +
    StringIndex(Index.SpaceType spaceType, + int numDimensions, + long indexM, + long efConstruction, + long randomSeed, + long maxElements, + Index.StorageDataType storageDataType)
    +
    Instantiate an empty index with the specified index parameters
    -
    -
  • -
+
+ + -
- + +
void
+
saveIndex(String outputDirectory, + String indexFilename, + String nameListFilename)
+
 
+ + + +
+

Methods inherited from class java.lang.Object

+clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
- -
-
    -
  • + +
    +
      -
      -
        -
      • - - -

        Constructor Detail

        - - - -
          -
        • -

          StringIndex

          -
          public StringIndex​(Index.SpaceType spaceType,
          -                   int numDimensions)
          +
        • +
          +

          Constructor Details

          +
            +
          • +
            +

            StringIndex

            +
            public StringIndex(Index.SpaceType spaceType, + int numDimensions)
            Instantiate a new empty index with the specified space type and dimensionality
            -
            -
            Parameters:
            +
            +
            Parameters:
            spaceType - Type of space and distance calculation used when determining distance between embeddings in the index, @see com.spotify.voyager.jni.Index.SpaceType
            numDimensions - Number of dimensions of each embedding stored in the underlying HNSW index
            +
          • -
          - - - -
            -
          • -

            StringIndex

            -
            public StringIndex​(Index.SpaceType spaceType,
            -                   int numDimensions,
            -                   long indexM,
            -                   long efConstruction,
            -                   long randomSeed,
            -                   long maxElements,
            -                   Index.StorageDataType storageDataType)
            +
          • +
            +

            StringIndex

            +
            public StringIndex(Index.SpaceType spaceType, + int numDimensions, + long indexM, + long efConstruction, + long randomSeed, + long maxElements, + Index.StorageDataType storageDataType)
            Instantiate an empty index with the specified index parameters
            -
            -
            Parameters:
            +
            +
            Parameters:
            spaceType - Type of space and distance calculation used when determining distance between embeddings in the index, @see com.spotify.voyager.jni.Index.SpaceType
            numDimensions - Number of dimensions of each embedding stored in the underlying HNSW index
            @@ -420,225 +277,180 @@

            StringIndex

            storageDataType - Type to store the embedding values as, @see com.spotify.voyager.jni.StorageDataType
            -
          • -
          +
      + -
      -
        -
      • - - -

        Method Detail

        - - - -
          -
        • -

          load

          -
          public static StringIndex load​(String indexFilename,
          -                               String nameListFilename,
          -                               Index.SpaceType spaceType,
          -                               int numDimensions,
          -                               Index.StorageDataType storageDataType)
          +
        • +
          +

          Method Details

          +
            +
          • +
            +

            load

            +
            public static StringIndex load(String indexFilename, + String nameListFilename, + Index.SpaceType spaceType, + int numDimensions, + Index.StorageDataType storageDataType)
            Load a previously constructed index from the provided file location. It is important that the dimensions, space type, and storage data type provided are the same that the index was constructed with.
            -
            -
            Parameters:
            +
            +
            Parameters:
            indexFilename - Filename of the underlying HNSW index
            nameListFilename - Filename of the JSON encoded names list
            spaceType -
            numDimensions - Number of dimensions of each embedding stored in the underlying HNSW index
            storageDataType -
            -
            Returns:
            +
            Returns:
            reference to the loaded StringIndex
            -
            See Also:
            -
            Index.SpaceType, -Index.StorageDataType
            +
            See Also:
            +
            + +
            +
          • -
          - - - -
            -
          • -

            load

            -
            public static StringIndex load​(InputStream indexInputStream,
            -                               InputStream nameListInputStream,
            -                               Index.SpaceType spaceType,
            -                               int numDimensions,
            -                               Index.StorageDataType storageDataType)
            +
          • +
            +

            load

            +
            public static StringIndex load(InputStream indexInputStream, + InputStream nameListInputStream, + Index.SpaceType spaceType, + int numDimensions, + Index.StorageDataType storageDataType)
            Load a previously constructed index from the provided input streams. It is important that the dimensions, space type, and storage data type provided are the same that the index was constructed with.
            -
            -
            Parameters:
            +
            +
            Parameters:
            indexInputStream - input stream pointing to the underlying HNSW index
            nameListInputStream - input stream pointing to the JSON encoded names list
            spaceType -
            numDimensions - Number of dimensions of each embedding stored in the underlying HNSW index
            storageDataType -
            -
            Returns:
            +
            Returns:
            reference to the loaded StringIndex
            -
            See Also:
            -
            Index.SpaceType, -Index.StorageDataType
            +
            See Also:
            +
            + +
            +
          • -
          - - - -
            -
          • -

            load

            -
            public static StringIndex load​(String indexFilename,
            -                               String nameListFilename)
            +
          • +
            +

            load

            +
            public static StringIndex load(String indexFilename, + String nameListFilename)
            Load a previously constructed index from the provided file location. The space type, dimensions, and storage data type are read from the file metadata.
            -
            -
            Parameters:
            +
            +
            Parameters:
            indexFilename - Filename of the underlying HNSW index
            nameListFilename - Filename of the JSON encoded names list
            -
            Returns:
            +
            Returns:
            reference to the loaded StringIndex
            +
          • -
          - - - -
            -
          • -

            load

            -
            public static StringIndex load​(InputStream indexInputStream,
            -                               InputStream nameListInputStream)
            +
          • +
            +

            load

            +
            public static StringIndex load(InputStream indexInputStream, + InputStream nameListInputStream)
            Load a previously constructed index from the provided input stream. The space type, dimensions, and storage data type are read from the file metadata.
            -
            -
            Parameters:
            +
            +
            Parameters:
            indexInputStream - input stream pointing to the underlying HNSW index
            nameListInputStream - input stream pointing to the JSON encoded names list
            -
            Returns:
            +
            Returns:
            reference to the loaded StringIndex
            +
          • -
          - - - -
            -
          • -

            saveIndex

            -
            public void saveIndex​(String outputDirectory)
            -               throws IOException
            +
          • +
            +

            saveIndex

            +
            public void saveIndex(String outputDirectory) + throws IOException
            Save the underlying index and JSON encoded name list to the provided output directory
            -
            -
            Parameters:
            +
            +
            Parameters:
            outputDirectory - directory to output files to
            -
            Throws:
            -
            IOException - when there is an error writing to JSON or saving to disk
            +
            Throws:
            +
            IOException - when there is an error writing to JSON or saving to disk
            +
          • -
          - - - - - - - -
            -
          • -

            saveIndex

            -
            public void saveIndex​(OutputStream indexOutputStream,
            -                      OutputStream namesListOutputStream)
            -               throws IOException
            +
          • +
            +

            saveIndex

            +
            public void saveIndex(OutputStream indexOutputStream, + OutputStream namesListOutputStream) + throws IOException
            Save the underlying HNSW index and JSON encoded names list to the provided output streams
            -
            -
            Parameters:
            +
            +
            Parameters:
            indexOutputStream - output stream pointing to the location to save the HNSW index
            namesListOutputStream - output stream pointing to the location to save the JSON names list
            -
            Throws:
            -
            IOException - when there is an error writing to JSON or the output streams
            +
            Throws:
            +
            IOException - when there is an error writing to JSON or the output streams
            +
          • -
          - - - -
            -
          • -

            addItem

            -
            public void addItem​(String name,
            -                    float[] vector)
            -
          • -
          - - - -
            -
          • -

            addItem

            -
            public void addItem​(String name,
            -                    List<Float> vector)
            -
          • -
          - - - -
            -
          • -

            addItems

            -
            public void addItems​(Map<String,​List<Float>> vectors)
            +
          • +
            +

            addItem

            +
            public void addItem(String name, + float[] vector)
            +
          • -
          - - - -
            -
          • -

            getNumElements

            -
            public long getNumElements()
            +
          • +
            +

            addItem

            +
            public void addItem(String name, + List<Float> vector)
            +
          • -
          - - - -
            -
          • -

            getVector

            -
            public float[] getVector​(String name)
            +
          • +
            +

            addItems

            +
            public void addItems(Map<String,List<Float>> vectors)
            +
          • -
          - - - -
            -
          • -

            query

            -
            public StringIndex.QueryResults query​(float[] queryVector,
            -                                      int numNeighbors,
            -                                      int ef)
            +
          • +
            +

            query

            +
            public StringIndex.QueryResults query(float[] queryVector, + int numNeighbors, + int ef)
            Find the nearest neighbors of the provided embedding.
            -
            -
            Parameters:
            +
            +
            Parameters:
            queryVector - The vector to center the search around.
            numNeighbors - The number of neighbors to return. The number of results returned may be smaller than this value if the index does not contain enough items.
            @@ -646,154 +458,59 @@

            query

            Increasing this value can improve recall (up to a point) at the cost of increased search latency. The minimum value of this parameter is the requested number of neighbors, and the maximum value is the number of items in the index. -
            Returns:
            +
            Returns:
            a QueryResults object, containing the names of the neighbors and each neighbor's distance from the query vector, sorted in ascending order of distance
            +
          • -
          - - - -
            -
          • -

            query

            -
            public StringIndex.QueryResults[] query​(float[][] queryVectors,
            -                                        int numNeighbors,
            -                                        int numThreads,
            -                                        int ef)
            +
          • +
            +

            query

            +
            public StringIndex.QueryResults[] query(float[][] queryVectors, + int numNeighbors, + int numThreads, + int ef)
            Query for multiple target vectors in parallel.
            -
            -
            Parameters:
            +
            +
            Parameters:
            queryVectors - Array of query vectors to search around
            numNeighbors - Number of neighbors to get for each target
            numThreads - Number of threads to use for the underlying index search. -1 uses all available CPU cores
            ef - Search depth in the graph
            -
            Returns:
            +
            Returns:
            Array of QueryResults, one for each target vector
            +
          • -
          - - - - - - - -
            -
          • -

            resizeIndex

            -
            public void resizeIndex​(long newSize)
            -
            Change the maximum number of elements currently storable by this Index. This operation - reallocates the memory used by the index and can be quite slow, so it may be useful to set the - maximum number of elements in advance if that number is known.
            -
            -
            Parameters:
            -
            newSize - The new number of maximum elements to resize this Index to.
            -
            -
          • -
          - - - -
            -
          • -

            getMaxElements

            -
            public long getMaxElements()
            -
            Get the maximum number of elements currently storable by this Index. If more elements - are added than getMaxElements(), the index will be automatically (but slowly) resized.
            -
            -
            Returns:
            -
            The number of elements (vectors) that are currently storable in this Index.
            +
          • +
            +

            close

            +
            public void close() + throws IOException
            +
            +
            Specified by:
            +
            close in interface AutoCloseable
            +
            Specified by:
            +
            close in interface Closeable
            +
            Throws:
            +
            IOException
            -
          • -
          +
    -
- - + + - - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/class-use/Index.QueryResults.html b/docs/java/com/spotify/voyager/jni/class-use/Index.QueryResults.html index 014e9654..d5fd0ef3 100644 --- a/docs/java/com/spotify/voyager/jni/class-use/Index.QueryResults.html +++ b/docs/java/com/spotify/voyager/jni/class-use/Index.QueryResults.html @@ -1,227 +1,114 @@ - - + - -Uses of Class com.spotify.voyager.jni.Index.QueryResults (voyager 2.0.9 API) + +Uses of Class com.spotify.voyager.jni.Index.QueryResults (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +
-

Uses of Class
com.spotify.voyager.jni.Index.QueryResults

+

Uses of Class
com.spotify.voyager.jni.Index.QueryResults

-
-
+
- - -
+ - - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/class-use/Index.SpaceType.html b/docs/java/com/spotify/voyager/jni/class-use/Index.SpaceType.html index df6c8a94..6dd69515 100644 --- a/docs/java/com/spotify/voyager/jni/class-use/Index.SpaceType.html +++ b/docs/java/com/spotify/voyager/jni/class-use/Index.SpaceType.html @@ -1,316 +1,186 @@ - - + - -Uses of Class com.spotify.voyager.jni.Index.SpaceType (voyager 2.0.9 API) + +Uses of Enum com.spotify.voyager.jni.Index.SpaceType (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +
-

Uses of Class
com.spotify.voyager.jni.Index.SpaceType

+

Uses of Enum
com.spotify.voyager.jni.Index.SpaceType

-
-
+
- - -
+ - - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/class-use/Index.StorageDataType.html b/docs/java/com/spotify/voyager/jni/class-use/Index.StorageDataType.html index 8842bfe8..1d82d934 100644 --- a/docs/java/com/spotify/voyager/jni/class-use/Index.StorageDataType.html +++ b/docs/java/com/spotify/voyager/jni/class-use/Index.StorageDataType.html @@ -1,294 +1,168 @@ - - + - -Uses of Class com.spotify.voyager.jni.Index.StorageDataType (voyager 2.0.9 API) + +Uses of Enum com.spotify.voyager.jni.Index.StorageDataType (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +
-

Uses of Class
com.spotify.voyager.jni.Index.StorageDataType

+

Uses of Enum
com.spotify.voyager.jni.Index.StorageDataType

-
-
+
- - -
+ - - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/class-use/Index.html b/docs/java/com/spotify/voyager/jni/class-use/Index.html index 5ff1d36f..33486a55 100644 --- a/docs/java/com/spotify/voyager/jni/class-use/Index.html +++ b/docs/java/com/spotify/voyager/jni/class-use/Index.html @@ -1,229 +1,116 @@ - - + - -Uses of Class com.spotify.voyager.jni.Index (voyager 2.0.9 API) + +Uses of Class com.spotify.voyager.jni.Index (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +
-

Uses of Class
com.spotify.voyager.jni.Index

+

Uses of Class
com.spotify.voyager.jni.Index

-
-
+
- - -
+ - - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/class-use/StringIndex.QueryResults.html b/docs/java/com/spotify/voyager/jni/class-use/StringIndex.QueryResults.html index ee165f1a..85b1831b 100644 --- a/docs/java/com/spotify/voyager/jni/class-use/StringIndex.QueryResults.html +++ b/docs/java/com/spotify/voyager/jni/class-use/StringIndex.QueryResults.html @@ -1,210 +1,101 @@ - - + - -Uses of Class com.spotify.voyager.jni.StringIndex.QueryResults (voyager 2.0.9 API) + +Uses of Class com.spotify.voyager.jni.StringIndex.QueryResults (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +
-

Uses of Class
com.spotify.voyager.jni.StringIndex.QueryResults

+

Uses of Class
com.spotify.voyager.jni.StringIndex.QueryResults

-
-
+
- - -
+ - - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/class-use/StringIndex.html b/docs/java/com/spotify/voyager/jni/class-use/StringIndex.html index e29c3576..bc3ff237 100644 --- a/docs/java/com/spotify/voyager/jni/class-use/StringIndex.html +++ b/docs/java/com/spotify/voyager/jni/class-use/StringIndex.html @@ -1,229 +1,116 @@ - - + - -Uses of Class com.spotify.voyager.jni.StringIndex (voyager 2.0.9 API) + +Uses of Class com.spotify.voyager.jni.StringIndex (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +
-

Uses of Class
com.spotify.voyager.jni.StringIndex

+

Uses of Class
com.spotify.voyager.jni.StringIndex

-
-
+
- - -
+ - - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/package-summary.html b/docs/java/com/spotify/voyager/jni/package-summary.html index a420875e..6d05db85 100644 --- a/docs/java/com/spotify/voyager/jni/package-summary.html +++ b/docs/java/com/spotify/voyager/jni/package-summary.html @@ -1,218 +1,136 @@ - - + - -com.spotify.voyager.jni (voyager 2.0.9 API) + +com.spotify.voyager.jni (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +
-

Package com.spotify.voyager.jni

+

Package com.spotify.voyager.jni

-
-
- - +
+
package com.spotify.voyager.jni
+
Java Native Interface (JNI) bindings to expose functionality from Voyager's C++ code into Java.
-
    -
  • - - - - - - - - - - - - - - - - - - - - - - - - -
    Class Summary 
    ClassDescription
    Index +
    +
      +
    • + +
    • +
    • +
      +
      +
      +
      +
      Class
      +
      Description
      + +
      A Voyager index, providing storage of floating-point vectors and the ability to efficiently search among those vectors.
      -
    Index.QueryResults + + +
    A container for query results, returned by Index.
    -
    StringIndex + + +
    +
    The space, also known as distance metric, to use when searching.
    +
    + +
    +
    The datatype used to use when storing vectors on disk.
    +
    + +
    Wrapper around com.spotify.voyager.jni.Index with a simplified interface which maps the index ID to a provided String.
    -
    StringIndex.QueryResults + + +
    A wrapper class for nearest neighbor query results.
    -
    -
  • -
  • - - - - - - - - - - - - - - - - -
    Enum Summary 
    EnumDescription
    Index.SpaceType -
    The space, also known as distance metric, to use when searching.
    -
    Index.StorageDataType -
    The datatype used to use when storing vectors on disk.
    -
    +
+
+
+ - + - - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/package-tree.html b/docs/java/com/spotify/voyager/jni/package-tree.html index 3e6d292c..1c6e8aa7 100644 --- a/docs/java/com/spotify/voyager/jni/package-tree.html +++ b/docs/java/com/spotify/voyager/jni/package-tree.html @@ -1,183 +1,94 @@ - - + - -com.spotify.voyager.jni Class Hierarchy (voyager 2.0.9 API) + +com.spotify.voyager.jni Class Hierarchy (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +

Hierarchy For Package com.spotify.voyager.jni

-Package Hierarchies: +Package Hierarchies:
-
-
+

Class Hierarchy

-
+

Enum Hierarchy

-
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/package-use.html b/docs/java/com/spotify/voyager/jni/package-use.html index 0d826eb4..8950d0ed 100644 --- a/docs/java/com/spotify/voyager/jni/package-use.html +++ b/docs/java/com/spotify/voyager/jni/package-use.html @@ -1,221 +1,110 @@ - - + - -Uses of Package com.spotify.voyager.jni (voyager 2.0.9 API) + +Uses of Package com.spotify.voyager.jni (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +

Uses of Package
com.spotify.voyager.jni

-
-
+
+ -
+
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/utils/JniLibExtractor.html b/docs/java/com/spotify/voyager/jni/utils/JniLibExtractor.html index 30fd9d01..8ac7c8e5 100644 --- a/docs/java/com/spotify/voyager/jni/utils/JniLibExtractor.html +++ b/docs/java/com/spotify/voyager/jni/utils/JniLibExtractor.html @@ -1,308 +1,163 @@ - - + - -JniLibExtractor (voyager 2.0.9 API) + +JniLibExtractor (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ - +
+
- -

Class JniLibExtractor

+ +

Class JniLibExtractor

-
- -
-
    -
  • -
    -
    public class JniLibExtractor
    -extends Object
    -
  • -
+
java.lang.Object +
com.spotify.voyager.jni.utils.JniLibExtractor
-
-
-
-
    -
  • + +
    +
      -
      -
        -
      • - - -

        Constructor Detail

        - - - -
          -
        • -

          JniLibExtractor

          -
          public JniLibExtractor()
          -
        • -
        +
      • +
        +

        Constructor Details

        +
          +
        • +
          +

          JniLibExtractor

          +
          public JniLibExtractor()
          +
        - -
        -
          -
        • - - -

          Method Detail

          - - - -
            -
          • -

            extractBinaries

            -
            public static String extractBinaries​(String libname)
          • -
          + +
        • +
          +

          Method Details

          +
            +
          • +
            +

            extractBinaries

            +
            public static String extractBinaries(String libname)
            +
        -
-
-
+ +
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/utils/TinyJson.html b/docs/java/com/spotify/voyager/jni/utils/TinyJson.html index cc6b836a..836f88c5 100644 --- a/docs/java/com/spotify/voyager/jni/utils/TinyJson.html +++ b/docs/java/com/spotify/voyager/jni/utils/TinyJson.html @@ -1,331 +1,181 @@ - - + - -TinyJson (voyager 2.0.9 API) + +TinyJson (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ - +
+
- -

Class TinyJson

+ +

Class TinyJson

-
- -
-
    -
  • +
    java.lang.Object +
    com.spotify.voyager.jni.utils.TinyJson
    +
    +

    -
    public class TinyJson
    -extends Object
    +
    public class TinyJson +extends Object
    A dependency-free, super tiny JSON serde class that only supports reading and writing lists of strings.
    -
  • -
-
-
-
-
-
-
-
+ +
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/utils/class-use/JniLibExtractor.html b/docs/java/com/spotify/voyager/jni/utils/class-use/JniLibExtractor.html index c0e94a31..6d9be940 100644 --- a/docs/java/com/spotify/voyager/jni/utils/class-use/JniLibExtractor.html +++ b/docs/java/com/spotify/voyager/jni/utils/class-use/JniLibExtractor.html @@ -1,150 +1,62 @@ - - + - -Uses of Class com.spotify.voyager.jni.utils.JniLibExtractor (voyager 2.0.9 API) + +Uses of Class com.spotify.voyager.jni.utils.JniLibExtractor (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +
-

Uses of Class
com.spotify.voyager.jni.utils.JniLibExtractor

+

Uses of Class
com.spotify.voyager.jni.utils.JniLibExtractor

-
No usage of com.spotify.voyager.jni.utils.JniLibExtractor
-
+No usage of com.spotify.voyager.jni.utils.JniLibExtractor
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/utils/class-use/TinyJson.html b/docs/java/com/spotify/voyager/jni/utils/class-use/TinyJson.html index 1008ee1e..ef3cc403 100644 --- a/docs/java/com/spotify/voyager/jni/utils/class-use/TinyJson.html +++ b/docs/java/com/spotify/voyager/jni/utils/class-use/TinyJson.html @@ -1,150 +1,62 @@ - - + - -Uses of Class com.spotify.voyager.jni.utils.TinyJson (voyager 2.0.9 API) + +Uses of Class com.spotify.voyager.jni.utils.TinyJson (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +
-

Uses of Class
com.spotify.voyager.jni.utils.TinyJson

+

Uses of Class
com.spotify.voyager.jni.utils.TinyJson

-
No usage of com.spotify.voyager.jni.utils.TinyJson
-
+No usage of com.spotify.voyager.jni.utils.TinyJson
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/utils/package-summary.html b/docs/java/com/spotify/voyager/jni/utils/package-summary.html index 1a95c40c..feea2a05 100644 --- a/docs/java/com/spotify/voyager/jni/utils/package-summary.html +++ b/docs/java/com/spotify/voyager/jni/utils/package-summary.html @@ -1,175 +1,105 @@ - - + - -com.spotify.voyager.jni.utils (voyager 2.0.9 API) + +com.spotify.voyager.jni.utils (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+
+ +
+
-

Package com.spotify.voyager.jni.utils

+

Package com.spotify.voyager.jni.utils

+
+
+
package com.spotify.voyager.jni.utils
+
+
    +
  • + +
-
+
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/utils/package-tree.html b/docs/java/com/spotify/voyager/jni/utils/package-tree.html index d3de1d94..e0c78606 100644 --- a/docs/java/com/spotify/voyager/jni/utils/package-tree.html +++ b/docs/java/com/spotify/voyager/jni/utils/package-tree.html @@ -1,166 +1,77 @@ - - + - -com.spotify.voyager.jni.utils Class Hierarchy (voyager 2.0.9 API) + +com.spotify.voyager.jni.utils Class Hierarchy (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +

Hierarchy For Package com.spotify.voyager.jni.utils

-Package Hierarchies: +Package Hierarchies:
-
-
+

Class Hierarchy

-
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/jni/utils/package-use.html b/docs/java/com/spotify/voyager/jni/utils/package-use.html index a47b1e2d..482fd7e4 100644 --- a/docs/java/com/spotify/voyager/jni/utils/package-use.html +++ b/docs/java/com/spotify/voyager/jni/utils/package-use.html @@ -1,150 +1,62 @@ - - + - -Uses of Package com.spotify.voyager.jni.utils (voyager 2.0.9 API) + +Uses of Package com.spotify.voyager.jni.utils (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +

Uses of Package
com.spotify.voyager.jni.utils

-
No usage of com.spotify.voyager.jni.utils
-
+No usage of com.spotify.voyager.jni.utils
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/package-summary.html b/docs/java/com/spotify/voyager/package-summary.html index 3393d377..5f3bc85d 100644 --- a/docs/java/com/spotify/voyager/package-summary.html +++ b/docs/java/com/spotify/voyager/package-summary.html @@ -1,106 +1,67 @@ - - + - -com.spotify.voyager (voyager 2.0.9 API) + +com.spotify.voyager (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+
+ +
+
-

Package com.spotify.voyager

+

Package com.spotify.voyager

-
-
- - +
+
package com.spotify.voyager
+
Voyager is a Java and Python library that provides approximate nearest-neighbor search of vector data. For most use cases, Index will be the primary interface to Voyager's functionality. @@ -126,54 +87,29 @@

Package com.spotify.voyager

index.saveIndex("my_tiny_index.voy");
+
+
    +
  • + +
  • +
+
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/package-tree.html b/docs/java/com/spotify/voyager/package-tree.html index 632020bf..a196b9aa 100644 --- a/docs/java/com/spotify/voyager/package-tree.html +++ b/docs/java/com/spotify/voyager/package-tree.html @@ -1,153 +1,66 @@ - - + - -com.spotify.voyager Class Hierarchy (voyager 2.0.9 API) + +com.spotify.voyager Class Hierarchy (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +

Hierarchy For Package com.spotify.voyager

-Package Hierarchies: +Package Hierarchies:
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/com/spotify/voyager/package-use.html b/docs/java/com/spotify/voyager/package-use.html index b8e9a581..a4d7e226 100644 --- a/docs/java/com/spotify/voyager/package-use.html +++ b/docs/java/com/spotify/voyager/package-use.html @@ -1,150 +1,62 @@ - - + - -Uses of Package com.spotify.voyager (voyager 2.0.9 API) + +Uses of Package com.spotify.voyager (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +

Uses of Package
com.spotify.voyager

-
No usage of com.spotify.voyager
-
+No usage of com.spotify.voyager
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/help-doc.html b/docs/java/help-doc.html index 5b04da64..721c2ba0 100644 --- a/docs/java/help-doc.html +++ b/docs/java/help-doc.html @@ -1,116 +1,108 @@ - - + - -API Help (voyager 2.0.9 API) + +API Help (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+
+ +
+
-
-

How This API Document Is Organized

-
This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.
+

JavaDoc Help

+ +
+
+

Navigation

+Starting from the Overview page, you can browse the documentation using the links in each page, and in the navigation bar at the top of each page. The Index and Search box allow you to navigate to specific declarations and summary pages, including: All Packages, All Classes and Interfaces +
-
-
    -
  • -
    -

    Overview

    +
    +
    +

    Kinds of Pages

    +The following sections describe the different kinds of pages in this collection. +
    +

    Overview

    The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.

    -
  • -
  • -
    -

    Package

    -

    Each package has a page that contains a list of its classes and interfaces, with a summary for each. These pages may contain six categories:

    -
      +
      +

      Package

      +

      Each package has a page that contains a list of its classes and interfaces, with a summary for each. These pages may contain the following categories:

      +
      • Interfaces
      • Classes
      • Enums
      • @@ -119,12 +111,10 @@

        Package

      • Annotation Types
      - -
    • -
      -

      Class or Interface

      -

      Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

      -
        +
        +

        Class or Interface

        +

        Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a declaration and description, member summary tables, and detailed member descriptions. Entries in each of these sections are omitted if they are empty or not applicable.

        +
        • Class Inheritance Diagram
        • Direct Subclasses
        • All Known Subinterfaces
        • @@ -133,150 +123,64 @@

          Class or Interface

        • Class or Interface Description

        -
          +
          • Nested Class Summary
          • +
          • Enum Constant Summary
          • Field Summary
          • Property Summary
          • Constructor Summary
          • Method Summary
          • -
          -
          -
            -
          • Field Detail
          • -
          • Property Detail
          • -
          • Constructor Detail
          • -
          • Method Detail
          • -
          -

          Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

          -
        - -
      • -
        -

        Annotation Type

        -

        Each annotation type has its own separate page with the following sections:

        -
          -
        • Annotation Type Declaration
        • -
        • Annotation Type Description
        • Required Element Summary
        • Optional Element Summary
        • -
        • Element Detail
        -
        -
      • -
      • -
        -

        Enum

        -

        Each enum has its own separate page with the following sections:

        -
          -
        • Enum Declaration
        • -
        • Enum Description
        • -
        • Enum Constant Summary
        • -
        • Enum Constant Detail
        • +
          +
            +
          • Enum Constant Details
          • +
          • Field Details
          • +
          • Property Details
          • +
          • Constructor Details
          • +
          • Method Details
          • +
          • Element Details
          +

          Note: Annotation interfaces have required and optional elements, but not methods. Only enum classes have enum constants. The components of a record class are displayed as part of the declaration of the record class. Properties are a feature of JavaFX.

          +

          The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

        -
      • -
      • -
        -

        Use

        -

        Each documented package, class and interface has its own Use page. This page describes what packages, classes, methods, constructors and fields use any part of the given class or package. Given a class or interface A, its "Use" page includes subclasses of A, fields declared as A, methods that return A, and methods and constructors with parameters of type A. You can access this page by first going to the package, class or interface, then clicking on the "Use" link in the navigation bar.

        +
        +

        Other Files

        +

        Packages and modules may contain pages with additional information related to the declarations nearby.

        -
      • -
      • -
        -

        Tree (Class Hierarchy)

        +
        +

        Use

        +

        Each documented package, class and interface has its own Use page. This page describes what packages, classes, methods, constructors and fields use any part of the given class or package. Given a class or interface A, its Use page includes subclasses of A, fields declared as A, methods that return A, and methods and constructors with parameters of type A. You can access this page by first going to the package, class or interface, then clicking on the USE link in the navigation bar.

        +
        +
        +

        Tree (Class Hierarchy)

        There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. Classes are organized by inheritance structure starting with java.lang.Object. Interfaces do not inherit from java.lang.Object.

        -
          -
        • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
        • -
        • When viewing a particular package, class or interface page, clicking on "Tree" displays the hierarchy for only that package.
        • +
            +
          • When viewing the Overview page, clicking on TREE displays the hierarchy for all packages.
          • +
          • When viewing a particular package, class or interface page, clicking on TREE displays the hierarchy for only that package.
        -
      • -
      • -
        -

        Deprecated API

        -

        The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.

        -
        -
      • -
      • -
        -

        Index

        -

        The Index contains an alphabetic index of all classes, interfaces, constructors, methods, and fields, as well as lists of all packages and all classes.

        -
        -
      • -
      • -
        -

        All Classes

        -

        The All Classes link shows all classes and interfaces except non-static nested types.

        -
        -
      • -
      • -
        -

        Serialized Form

        -

        Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.

        +
        +

        All Packages

        +

        The All Packages page contains an alphabetic index of all packages contained in the documentation.

        -
      • -
      • -
        -

        Constant Field Values

        -

        The Constant Field Values page lists the static final fields and their values.

        +
        +

        All Classes and Interfaces

        +

        The All Classes and Interfaces page contains an alphabetic index of all classes and interfaces contained in the documentation, including annotation interfaces, enum classes, and record classes.

        -
      • -
      • -
        -

        Search

        -

        You can search for definitions of modules, packages, types, fields, methods and other terms defined in the API, using some or all of the name. "Camel-case" abbreviations are supported: for example, "InpStr" will find "InputStream" and "InputStreamReader".

        +
        +

        Index

        +

        The Index contains an alphabetic index of all classes, interfaces, constructors, methods, and fields in the documentation, as well as summary pages such as All Packages, All Classes and Interfaces.

        -
      • -
      +

-This help file applies to API documentation generated by the standard doclet.
-
+This help file applies to API documentation generated by the standard doclet.
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/index-all.html b/docs/java/index-all.html index 5fd89dc9..d5491f35 100644 --- a/docs/java/index-all.html +++ b/docs/java/index-all.html @@ -1,141 +1,89 @@ - - + - -Index (voyager 2.0.9 API) + +Index (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +
-
A C D E F G I J L M Q R S T U V W 
All Classes All Packages - - -

A

-
-
addItem(float[]) - Method in class com.spotify.voyager.jni.Index
+
+

Index

+
+A C D E F G I J L M Q R S T U V W 
All Classes and Interfaces|All Packages +

A

+
+
addItem(float[]) - Method in class com.spotify.voyager.jni.Index
Add an item (a vector) to this Index.
-
addItem(float[], long) - Method in class com.spotify.voyager.jni.Index
+
addItem(float[], long) - Method in class com.spotify.voyager.jni.Index
Add an item (a vector) to this Index with the provided identifier.
-
addItem(String, float[]) - Method in class com.spotify.voyager.jni.StringIndex
+
addItem(String, float[]) - Method in class com.spotify.voyager.jni.StringIndex
 
-
addItem(String, List<Float>) - Method in class com.spotify.voyager.jni.StringIndex
+
addItem(String, List<Float>) - Method in class com.spotify.voyager.jni.StringIndex
 
-
addItems(float[][], int) - Method in class com.spotify.voyager.jni.Index
+
addItems(float[][], int) - Method in class com.spotify.voyager.jni.Index
Add multiple items (vectors) to this Index.
-
addItems(float[][], long[], int) - Method in class com.spotify.voyager.jni.Index
+
addItems(float[][], long[], int) - Method in class com.spotify.voyager.jni.Index
Add multiple items (vectors) to this Index.
-
addItems(Map<String, List<Float>>) - Method in class com.spotify.voyager.jni.StringIndex
+
addItems(Map<String, List<Float>>) - Method in class com.spotify.voyager.jni.StringIndex
 
-
asBytes() - Method in class com.spotify.voyager.jni.Index
-
-
Returns the contents of this index as an array of bytes.
-
- - - -

C

-
-
close() - Method in class com.spotify.voyager.jni.Index
+

C

+
+
close() - Method in class com.spotify.voyager.jni.Index
Close this Index and release any memory held by it.
-
close() - Method in class com.spotify.voyager.jni.StringIndex
+
close() - Method in class com.spotify.voyager.jni.StringIndex
 
com.spotify.voyager - package com.spotify.voyager
@@ -148,444 +96,345 @@

C

com.spotify.voyager.jni.utils - package com.spotify.voyager.jni.utils
 
-
Cosine - com.spotify.voyager.jni.Index.SpaceType
+
Cosine - Enum constant in enum com.spotify.voyager.jni.Index.SpaceType
-
Cosine distance; i.e. normalized dot product.
+
Cosine distance; i.e.
- - - -

D

-
-
distances - Variable in class com.spotify.voyager.jni.Index.QueryResults
+

D

+
+
distances - Variable in class com.spotify.voyager.jni.Index.QueryResults
A list of distances from each item ID to the query vector for this query.
- - - -

E

-
-
E4M3 - com.spotify.voyager.jni.Index.StorageDataType
+

E

+
+
E4M3 - Enum constant in enum com.spotify.voyager.jni.Index.StorageDataType
A custom 8-bit floating point data type with range [-448, 448] and variable precision.
-
Euclidean - com.spotify.voyager.jni.Index.SpaceType
+
Euclidean - Enum constant in enum com.spotify.voyager.jni.Index.SpaceType
Euclidean distance, also known as L2 distance.
-
extractBinaries(String) - Static method in class com.spotify.voyager.jni.utils.JniLibExtractor
+
extractBinaries(String) - Static method in class com.spotify.voyager.jni.utils.JniLibExtractor
 
- - - -

F

-
-
finalize() - Method in class com.spotify.voyager.jni.Index
+

F

+
+
finalize() - Method in class com.spotify.voyager.jni.Index
 
-
Float32 - com.spotify.voyager.jni.Index.StorageDataType
+
Float32 - Enum constant in enum com.spotify.voyager.jni.Index.StorageDataType
A 32-bit floating point ("Float") data type.
-
Float8 - com.spotify.voyager.jni.Index.StorageDataType
+
Float8 - Enum constant in enum com.spotify.voyager.jni.Index.StorageDataType
An 8-bit floating point data type that expects all values to be on [-1, 1].
- - - -

G

-
-
getDistance(int) - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
+

G

+
+
getDistance(int) - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
 
-
getDistances() - Method in class com.spotify.voyager.jni.Index.QueryResults
+
getDistances() - Method in class com.spotify.voyager.jni.Index.QueryResults
Retrieve the list of distances between query vectors and item vectors for the results of this query.
-
getDistances() - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
+
getDistances() - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
 
-
getEf() - Method in class com.spotify.voyager.jni.Index
+
getEf() - Method in class com.spotify.voyager.jni.Index
Get the default EF ("query search depth") that will be uses when Index.query(float[], int) is called.
-
getEfConstruction() - Method in class com.spotify.voyager.jni.Index
+
getEfConstruction() - Method in class com.spotify.voyager.jni.Index
Get the EF Construction value used when adding new elements to this Index.
-
getIDs() - Method in class com.spotify.voyager.jni.Index
+
getIDs() - Method in class com.spotify.voyager.jni.Index
Get the list of identifiers currently stored by this index.
-
getLabels() - Method in class com.spotify.voyager.jni.Index.QueryResults
+
getLabels() - Method in class com.spotify.voyager.jni.Index.QueryResults
Retrieve the list of item IDs ("labels") returned by this query.
-
getM() - Method in class com.spotify.voyager.jni.Index
+
getM() - Method in class com.spotify.voyager.jni.Index
Get the M value used when adding new elements to this Index.
-
getMaxElements() - Method in class com.spotify.voyager.jni.Index
-
-
Get the maximum number of elements currently storable by this Index.
-
-
getMaxElements() - Method in class com.spotify.voyager.jni.StringIndex
+
getMaxElements() - Method in class com.spotify.voyager.jni.Index
Get the maximum number of elements currently storable by this Index.
-
getName(int) - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
+
getName(int) - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
 
-
getNames() - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
+
getNames() - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
 
-
getNumDimensions() - Method in class com.spotify.voyager.jni.Index
+
getNumDimensions() - Method in class com.spotify.voyager.jni.Index
Get the number of dimensions used in this Index.
-
getNumElements() - Method in class com.spotify.voyager.jni.Index
+
getNumElements() - Method in class com.spotify.voyager.jni.Index
Get the number of elements currently in this Index.
-
getNumElements() - Method in class com.spotify.voyager.jni.StringIndex
-
 
-
getNumResults() - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
+
getNumResults() - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
 
-
getNumThreads() - Method in class com.spotify.voyager.jni.Index
+
getNumThreads() - Method in class com.spotify.voyager.jni.Index
Get the default number of threads used when adding multiple vectors in bulk oor when querying for multiple vectors simultaneously.
-
getSpace() - Method in class com.spotify.voyager.jni.Index
+
getSpace() - Method in class com.spotify.voyager.jni.Index
Get the Index.SpaceType that this Index uses to store and compare vectors.
-
getVector(long) - Method in class com.spotify.voyager.jni.Index
+
getVector(long) - Method in class com.spotify.voyager.jni.Index
Get the vector for the provided identifier.
-
getVector(String) - Method in class com.spotify.voyager.jni.StringIndex
-
 
-
getVectors(long[]) - Method in class com.spotify.voyager.jni.Index
+
getVectors(long[]) - Method in class com.spotify.voyager.jni.Index
Get the vectors for a provided array of identifiers.
- - - -

I

-
-
Index - Class in com.spotify.voyager.jni
+

I

+
+
Index - Class in com.spotify.voyager.jni
A Voyager index, providing storage of floating-point vectors and the ability to efficiently search among those vectors.
-
Index(Index.SpaceType, int) - Constructor for class com.spotify.voyager.jni.Index
+
Index(Index.SpaceType, int) - Constructor for class com.spotify.voyager.jni.Index
Create a new Index that uses the given Index.SpaceType to store and compare numDimensions-dimensional vectors.
-
Index(Index.SpaceType, int, long, long, long, long, Index.StorageDataType) - Constructor for class com.spotify.voyager.jni.Index
+
Index(Index.SpaceType, int, long, long, long, long, Index.StorageDataType) - Constructor for class com.spotify.voyager.jni.Index
Create a new Index that uses the given Index.SpaceType to store and compare numDimensions-dimensional vectors.
-
Index.QueryResults - Class in com.spotify.voyager.jni
+
Index.QueryResults - Class in com.spotify.voyager.jni
A container for query results, returned by Index.
-
Index.SpaceType - Enum in com.spotify.voyager.jni
+
Index.SpaceType - Enum in com.spotify.voyager.jni
The space, also known as distance metric, to use when searching.
-
Index.StorageDataType - Enum in com.spotify.voyager.jni
+
Index.StorageDataType - Enum in com.spotify.voyager.jni
The datatype used to use when storing vectors on disk.
-
InnerProduct - com.spotify.voyager.jni.Index.SpaceType
+
InnerProduct - Enum constant in enum com.spotify.voyager.jni.Index.SpaceType
Inner (dot) product.
- - - -

J

-
-
JniLibExtractor - Class in com.spotify.voyager.jni.utils
+

J

+
+
JniLibExtractor - Class in com.spotify.voyager.jni.utils
 
-
JniLibExtractor() - Constructor for class com.spotify.voyager.jni.utils.JniLibExtractor
+
JniLibExtractor() - Constructor for class com.spotify.voyager.jni.utils.JniLibExtractor
 
- - - -

L

-
-
labels - Variable in class com.spotify.voyager.jni.Index.QueryResults
+

L

+
+
labels - Variable in class com.spotify.voyager.jni.Index.QueryResults
A list of item IDs ("labels").
-
load(InputStream) - Static method in class com.spotify.voyager.jni.Index
+
load(InputStream) - Static method in class com.spotify.voyager.jni.Index
Interpret the contents of a java.io.InputStream as the contents of a Voyager index file and create a new Index initialized with the data provided by that stream.
-
load(InputStream, Index.SpaceType, int, Index.StorageDataType) - Static method in class com.spotify.voyager.jni.Index
+
load(InputStream, Index.SpaceType, int, Index.StorageDataType) - Static method in class com.spotify.voyager.jni.Index
Interpret the contents of a java.io.InputStream as the contents of a Voyager index file and create a new Index initialized with the data provided by that stream.
-
load(InputStream, InputStream) - Static method in class com.spotify.voyager.jni.StringIndex
+
load(InputStream, InputStream) - Static method in class com.spotify.voyager.jni.StringIndex
Load a previously constructed index from the provided input stream.
-
load(InputStream, InputStream, Index.SpaceType, int, Index.StorageDataType) - Static method in class com.spotify.voyager.jni.StringIndex
+
load(InputStream, InputStream, Index.SpaceType, int, Index.StorageDataType) - Static method in class com.spotify.voyager.jni.StringIndex
Load a previously constructed index from the provided input streams.
-
load(String) - Static method in class com.spotify.voyager.jni.Index
+
load(String) - Static method in class com.spotify.voyager.jni.Index
Load a Voyager index file and create a new Index initialized with the data in that file.
-
load(String, Index.SpaceType, int, Index.StorageDataType) - Static method in class com.spotify.voyager.jni.Index
+
load(String, Index.SpaceType, int, Index.StorageDataType) - Static method in class com.spotify.voyager.jni.Index
Load a Voyager index file and create a new Index initialized with the data in that file.
-
load(String, String) - Static method in class com.spotify.voyager.jni.StringIndex
+
load(String, String) - Static method in class com.spotify.voyager.jni.StringIndex
Load a previously constructed index from the provided file location.
-
load(String, String, Index.SpaceType, int, Index.StorageDataType) - Static method in class com.spotify.voyager.jni.StringIndex
+
load(String, String, Index.SpaceType, int, Index.StorageDataType) - Static method in class com.spotify.voyager.jni.StringIndex
Load a previously constructed index from the provided file location.
- - - -

M

-
-
markDeleted(long) - Method in class com.spotify.voyager.jni.Index
+

M

+
+
markDeleted(long) - Method in class com.spotify.voyager.jni.Index
Mark an element of the index as deleted.
- - - -

Q

-
-
query(float[][], int, int) - Method in class com.spotify.voyager.jni.Index
+

Q

+
+
query(float[][], int, int) - Method in class com.spotify.voyager.jni.Index
Query this Index for approximate nearest neighbors of multiple query vectors.
-
query(float[][], int, int, int) - Method in class com.spotify.voyager.jni.StringIndex
+
query(float[][], int, int, int) - Method in class com.spotify.voyager.jni.StringIndex
Query for multiple target vectors in parallel.
-
query(float[][], int, int, long) - Method in class com.spotify.voyager.jni.Index
+
query(float[][], int, int, long) - Method in class com.spotify.voyager.jni.Index
Query this Index for approximate nearest neighbors of multiple query vectors.
-
query(float[], int) - Method in class com.spotify.voyager.jni.Index
+
query(float[], int) - Method in class com.spotify.voyager.jni.Index
Query this Index for approximate nearest neighbors of a single query vector.
-
query(float[], int, int) - Method in class com.spotify.voyager.jni.StringIndex
+
query(float[], int, int) - Method in class com.spotify.voyager.jni.StringIndex
Find the nearest neighbors of the provided embedding.
-
query(float[], int, long) - Method in class com.spotify.voyager.jni.Index
+
query(float[], int, long) - Method in class com.spotify.voyager.jni.Index
Query this Index for approximate nearest neighbors of a single query vector.
-
QueryResults(long[], float[]) - Constructor for class com.spotify.voyager.jni.Index.QueryResults
+
QueryResults(long[], float[]) - Constructor for class com.spotify.voyager.jni.Index.QueryResults
Instantiates a new QueryResults object, provided two identical-length arrays of labels and their corresponding distances.
-
QueryResults(String[], float[]) - Constructor for class com.spotify.voyager.jni.StringIndex.QueryResults
+
QueryResults(String[], float[]) - Constructor for class com.spotify.voyager.jni.StringIndex.QueryResults
 
- - - -

R

-
-
readStringList(InputStream) - Static method in class com.spotify.voyager.jni.utils.TinyJson
+

R

+
+
readStringList(InputStream) - Static method in class com.spotify.voyager.jni.utils.TinyJson
 
-
resizeIndex(long) - Method in class com.spotify.voyager.jni.Index
-
-
Change the maximum number of elements currently storable by this Index.
-
-
resizeIndex(long) - Method in class com.spotify.voyager.jni.StringIndex
+
resizeIndex(long) - Method in class com.spotify.voyager.jni.Index
Change the maximum number of elements currently storable by this Index.
- - - -

S

-
-
saveIndex(OutputStream) - Method in class com.spotify.voyager.jni.Index
+

S

+
+
saveIndex(OutputStream) - Method in class com.spotify.voyager.jni.Index
Save this Index to the provided output stream.
-
saveIndex(OutputStream, OutputStream) - Method in class com.spotify.voyager.jni.StringIndex
+
saveIndex(OutputStream, OutputStream) - Method in class com.spotify.voyager.jni.StringIndex
Save the underlying HNSW index and JSON encoded names list to the provided output streams
-
saveIndex(String) - Method in class com.spotify.voyager.jni.Index
+
saveIndex(String) - Method in class com.spotify.voyager.jni.Index
Save this Index to a file at the provided filename.
-
saveIndex(String) - Method in class com.spotify.voyager.jni.StringIndex
+
saveIndex(String) - Method in class com.spotify.voyager.jni.StringIndex
Save the underlying index and JSON encoded name list to the provided output directory
-
saveIndex(String, String, String) - Method in class com.spotify.voyager.jni.StringIndex
+
saveIndex(String, String, String) - Method in class com.spotify.voyager.jni.StringIndex
 
-
setEf(long) - Method in class com.spotify.voyager.jni.Index
+
setEf(long) - Method in class com.spotify.voyager.jni.Index
Set the default EF ("query search depth") to use when Index.query(float[], int) is called.
-
setNumThreads(int) - Method in class com.spotify.voyager.jni.Index
+
setNumThreads(int) - Method in class com.spotify.voyager.jni.Index
Set the default number of threads to use when adding multiple vectors in bulk, or when querying for multiple vectors simultaneously.
-
StringIndex - Class in com.spotify.voyager.jni
+
StringIndex - Class in com.spotify.voyager.jni
Wrapper around com.spotify.voyager.jni.Index with a simplified interface which maps the index ID to a provided String.
-
StringIndex(Index.SpaceType, int) - Constructor for class com.spotify.voyager.jni.StringIndex
+
StringIndex(Index.SpaceType, int) - Constructor for class com.spotify.voyager.jni.StringIndex
Instantiate a new empty index with the specified space type and dimensionality
-
StringIndex(Index.SpaceType, int, long, long, long, long, Index.StorageDataType) - Constructor for class com.spotify.voyager.jni.StringIndex
+
StringIndex(Index.SpaceType, int, long, long, long, long, Index.StorageDataType) - Constructor for class com.spotify.voyager.jni.StringIndex
Instantiate an empty index with the specified index parameters
-
StringIndex.QueryResults - Class in com.spotify.voyager.jni
+
StringIndex.QueryResults - Class in com.spotify.voyager.jni
A wrapper class for nearest neighbor query results.
- - - -

T

-
-
TinyJson - Class in com.spotify.voyager.jni.utils
+

T

+
+
TinyJson - Class in com.spotify.voyager.jni.utils
A dependency-free, super tiny JSON serde class that only supports reading and writing lists of strings.
-
TinyJson() - Constructor for class com.spotify.voyager.jni.utils.TinyJson
+
TinyJson() - Constructor for class com.spotify.voyager.jni.utils.TinyJson
 
-
toString() - Method in class com.spotify.voyager.jni.Index.QueryResults
+
toString() - Method in class com.spotify.voyager.jni.Index.QueryResults
 
-
toString() - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
+
toString() - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
 
- - - -

U

-
-
unmarkDeleted(long) - Method in class com.spotify.voyager.jni.Index
+

U

+
+
unmarkDeleted(long) - Method in class com.spotify.voyager.jni.Index
Un-mark an element of the index as deleted, making it available again.
- - - -

V

-
-
valueOf(String) - Static method in enum com.spotify.voyager.jni.Index.SpaceType
+

V

+
+
valueOf(String) - Static method in enum com.spotify.voyager.jni.Index.SpaceType
Returns the enum constant of this type with the specified name.
-
valueOf(String) - Static method in enum com.spotify.voyager.jni.Index.StorageDataType
+
valueOf(String) - Static method in enum com.spotify.voyager.jni.Index.StorageDataType
Returns the enum constant of this type with the specified name.
-
values() - Static method in enum com.spotify.voyager.jni.Index.SpaceType
+
values() - Static method in enum com.spotify.voyager.jni.Index.SpaceType
Returns an array containing the constants of this enum type, in the order they are declared.
-
values() - Static method in enum com.spotify.voyager.jni.Index.StorageDataType
+
values() - Static method in enum com.spotify.voyager.jni.Index.StorageDataType
Returns an array containing the constants of this enum type, in the order they are declared.
- - - -

W

-
-
writeStringList(Iterable<String>, OutputStream) - Static method in class com.spotify.voyager.jni.utils.TinyJson
+

W

+
+
writeStringList(Iterable<String>, OutputStream) - Static method in class com.spotify.voyager.jni.utils.TinyJson
 
-A C D E F G I J L M Q R S T U V W 
All Classes All Packages
-
+A C D E F G I J L M Q R S T U V W 
All Classes and Interfaces|All Packages
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/index.html b/docs/java/index.html index dc66bcd5..cff4bde0 100644 --- a/docs/java/index.html +++ b/docs/java/index.html @@ -1,177 +1,80 @@ - - + - -Overview (voyager 2.0.9 API) + +Overview (voyager 2.0.6 API) + - + + + - + - - - - - + + - - -
+
+ +
-

voyager 2.0.9 API

+

voyager 2.0.6 API

-
- - - - - - - - - - - - - - - - - - - - -
Packages 
PackageDescription
com.spotify.voyager +
+
Packages
+
+
Package
+
Description
+ +
Voyager is a Java and Python library that provides approximate nearest-neighbor search of vector data.
-
com.spotify.voyager.jni + + +
Java Native Interface (JNI) bindings to expose functionality from Voyager's C++ code into Java.
-
com.spotify.voyager.jni.utils 
+
+ +
 
+
- - - -

Copyright © 2024. All rights reserved.

- diff --git a/docs/java/jquery-ui.overrides.css b/docs/java/jquery-ui.overrides.css index facf852c..f89acb63 100644 --- a/docs/java/jquery-ui.overrides.css +++ b/docs/java/jquery-ui.overrides.css @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -31,5 +31,4 @@ a.ui-button:active, .ui-button.ui-state-active:hover { /* Overrides the color of selection used in jQuery UI */ background: #F8981D; - border: 1px solid #F8981D; } diff --git a/docs/java/legal/jquery.md b/docs/java/legal/jquery.md index d468b318..8054a34c 100644 --- a/docs/java/legal/jquery.md +++ b/docs/java/legal/jquery.md @@ -1,9 +1,9 @@ -## jQuery v3.6.1 +## jQuery v3.5.1 ### jQuery License ``` -jQuery v 3.6.1 -Copyright OpenJS Foundation and other contributors, https://openjsf.org/ +jQuery v 3.5.1 +Copyright JS Foundation and other contributors, https://js.foundation/ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -26,7 +26,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ****************************************** -The jQuery JavaScript Library v3.6.1 also includes Sizzle.js +The jQuery JavaScript Library v3.5.1 also includes Sizzle.js Sizzle.js includes the following license: diff --git a/docs/java/legal/jqueryUI.md b/docs/java/legal/jqueryUI.md index 8bda9d7a..8031bdb5 100644 --- a/docs/java/legal/jqueryUI.md +++ b/docs/java/legal/jqueryUI.md @@ -1,4 +1,4 @@ -## jQuery UI v1.13.2 +## jQuery UI v1.12.1 ### jQuery UI License ``` diff --git a/docs/java/overview-summary.html b/docs/java/overview-summary.html index 929e37b5..ab7c8109 100644 --- a/docs/java/overview-summary.html +++ b/docs/java/overview-summary.html @@ -1,18 +1,21 @@ - - + - -voyager 2.0.9 API + +voyager 2.0.6 API + + + + + + - - - +
+ Formats for Deep Learning" by Micikevicus et al (arXiv:2209.05433). + +

Note that using E4M3 with the Cosine space may result in negative distances at query time + due to reduced floating-point precision. While confusing, the query results are still + correctly ordered.

diff --git a/docs/java/com/spotify/voyager/jni/Index.html b/docs/java/com/spotify/voyager/jni/Index.html index 85c1d62e..6d804697 100644 --- a/docs/java/com/spotify/voyager/jni/Index.html +++ b/docs/java/com/spotify/voyager/jni/Index.html @@ -1,26 +1,21 @@ - + - -Index (voyager 2.0.6 API) + +Index (voyager 2.0.9 API) - + - - + -
+
-
- -
+ +

Copyright © 2024. All rights reserved.

+ diff --git a/docs/java/com/spotify/voyager/jni/utils/class-use/JniLibExtractor.html b/docs/java/com/spotify/voyager/jni/utils/class-use/JniLibExtractor.html index 522d7795..c0e94a31 100644 --- a/docs/java/com/spotify/voyager/jni/utils/class-use/JniLibExtractor.html +++ b/docs/java/com/spotify/voyager/jni/utils/class-use/JniLibExtractor.html @@ -1,62 +1,150 @@ + - + Uses of Class com.spotify.voyager.jni.utils.JniLibExtractor (voyager 2.0.9 API) - - - - + - + + - - + + + + + - - -
-
+ + +
-
-

Uses of Class
com.spotify.voyager.jni.utils.JniLibExtractor

+

Uses of Class
com.spotify.voyager.jni.utils.JniLibExtractor

-No usage of com.spotify.voyager.jni.utils.JniLibExtractor
+
No usage of com.spotify.voyager.jni.utils.JniLibExtractor
+
-
- -
+ +

Copyright © 2024. All rights reserved.

+ diff --git a/docs/java/com/spotify/voyager/jni/utils/class-use/TinyJson.html b/docs/java/com/spotify/voyager/jni/utils/class-use/TinyJson.html index 38b25f05..1008ee1e 100644 --- a/docs/java/com/spotify/voyager/jni/utils/class-use/TinyJson.html +++ b/docs/java/com/spotify/voyager/jni/utils/class-use/TinyJson.html @@ -1,62 +1,150 @@ + - + Uses of Class com.spotify.voyager.jni.utils.TinyJson (voyager 2.0.9 API) - - - - + - + + - - + + + + + - - -
-
+ + +
-
-

Uses of Class
com.spotify.voyager.jni.utils.TinyJson

+

Uses of Class
com.spotify.voyager.jni.utils.TinyJson

-No usage of com.spotify.voyager.jni.utils.TinyJson
+
No usage of com.spotify.voyager.jni.utils.TinyJson
+
-
- -
+ +

Copyright © 2024. All rights reserved.

+ diff --git a/docs/java/com/spotify/voyager/jni/utils/package-summary.html b/docs/java/com/spotify/voyager/jni/utils/package-summary.html index a554a34f..1a95c40c 100644 --- a/docs/java/com/spotify/voyager/jni/utils/package-summary.html +++ b/docs/java/com/spotify/voyager/jni/utils/package-summary.html @@ -1,114 +1,175 @@ + - + com.spotify.voyager.jni.utils (voyager 2.0.9 API) - - - - + - + + - - + + + + + - - -
-
+ + +
-
-

Package com.spotify.voyager.jni.utils

-
-
-
package com.spotify.voyager.jni.utils
-
-
    -
  • - -
  • -
  • -
    -
    Classes
    -
    -
    Class
    -
    Description
    - -
     
    - -
    +
    +
      +
    • + + + + + + + + + + + + + + + + +
      Class Summary 
      ClassDescription
      JniLibExtractor 
      TinyJson
      A dependency-free, super tiny JSON serde class that only supports reading and writing lists of strings.
      - - - +
    -
+
-
- -
+ +

Copyright © 2024. All rights reserved.

+ diff --git a/docs/java/com/spotify/voyager/jni/utils/package-tree.html b/docs/java/com/spotify/voyager/jni/utils/package-tree.html index c9f9fb2b..d3de1d94 100644 --- a/docs/java/com/spotify/voyager/jni/utils/package-tree.html +++ b/docs/java/com/spotify/voyager/jni/utils/package-tree.html @@ -1,77 +1,166 @@ + - + com.spotify.voyager.jni.utils Class Hierarchy (voyager 2.0.9 API) - - - - + - + + - - + + + + + - - -
-
+ + +
-

Hierarchy For Package com.spotify.voyager.jni.utils

-
-Package Hierarchies: -
+
+

Class Hierarchy

+
-
- -
+ +

Copyright © 2024. All rights reserved.

+ diff --git a/docs/java/com/spotify/voyager/jni/utils/package-use.html b/docs/java/com/spotify/voyager/jni/utils/package-use.html index 7c2714f9..a47b1e2d 100644 --- a/docs/java/com/spotify/voyager/jni/utils/package-use.html +++ b/docs/java/com/spotify/voyager/jni/utils/package-use.html @@ -1,62 +1,150 @@ + - + Uses of Package com.spotify.voyager.jni.utils (voyager 2.0.9 API) - - - - + - + + - - + + + + + - - -
-
+ + +
-

Uses of Package
com.spotify.voyager.jni.utils

-No usage of com.spotify.voyager.jni.utils
+
No usage of com.spotify.voyager.jni.utils
+
-
- -
+ +

Copyright © 2024. All rights reserved.

+ diff --git a/docs/java/com/spotify/voyager/package-summary.html b/docs/java/com/spotify/voyager/package-summary.html index 75faaa54..3393d377 100644 --- a/docs/java/com/spotify/voyager/package-summary.html +++ b/docs/java/com/spotify/voyager/package-summary.html @@ -1,76 +1,106 @@ + - + com.spotify.voyager (voyager 2.0.9 API) - - - - + - + + - - + + + + + - - -
-
+ + +
-
-

Package com.spotify.voyager

+

Package com.spotify.voyager

-
-
package com.spotify.voyager
-
+
+
+ +
Voyager is a Java and Python library that provides approximate nearest-neighbor search of vector data. For most use cases, Index will be the primary interface to Voyager's functionality. @@ -96,29 +126,54 @@

Package com.spotify.voyage index.saveIndex("my_tiny_index.voy");

-
-
    -
  • - -
  • -
-
-
- -
+ +

Copyright © 2024. All rights reserved.

+ diff --git a/docs/java/com/spotify/voyager/package-tree.html b/docs/java/com/spotify/voyager/package-tree.html index 08a2ba9e..632020bf 100644 --- a/docs/java/com/spotify/voyager/package-tree.html +++ b/docs/java/com/spotify/voyager/package-tree.html @@ -1,66 +1,153 @@ + - + com.spotify.voyager Class Hierarchy (voyager 2.0.9 API) - - - - + - + + - - + + + + + - - -
-
+ + +
-

Hierarchy For Package com.spotify.voyager

-
-Package Hierarchies: -
-
- -
+ +

Copyright © 2024. All rights reserved.

+ diff --git a/docs/java/com/spotify/voyager/package-use.html b/docs/java/com/spotify/voyager/package-use.html index 7c2c90af..b8e9a581 100644 --- a/docs/java/com/spotify/voyager/package-use.html +++ b/docs/java/com/spotify/voyager/package-use.html @@ -1,62 +1,150 @@ + - + Uses of Package com.spotify.voyager (voyager 2.0.9 API) - - - - + - + + - - + + + + + - - -
-
+ + +
-

Uses of Package
com.spotify.voyager

-No usage of com.spotify.voyager
+
No usage of com.spotify.voyager
+
-
- -
+ +

Copyright © 2024. All rights reserved.

+ diff --git a/docs/java/help-doc.html b/docs/java/help-doc.html index a8074758..5b04da64 100644 --- a/docs/java/help-doc.html +++ b/docs/java/help-doc.html @@ -1,127 +1,130 @@ + - + API Help (voyager 2.0.9 API) - - - - + - + + - - + + + + + - - -
-
+ + +
-
-

JavaDoc Help

- -
-
-

Navigation

-Starting from the Overview page, you can browse the documentation using the links in each page, and in the navigation bar at the top of each page. The Index and Search box allow you to navigate to specific declarations and summary pages, including: All Packages, All Classes and Interfaces - +
+

How This API Document Is Organized

+
This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.
-
-
-

Kinds of Pages

-The following sections describe the different kinds of pages in this collection. -
-

Overview

+
+
    +
  • +
    +

    Overview

    The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.

    -
    -

    Package

    -

    Each package has a page that contains a list of its classes and interfaces, with a summary for each. These pages may contain the following categories:

    -
      + +
    • +
      +

      Package

      +

      Each package has a page that contains a list of its classes and interfaces, with a summary for each. These pages may contain six categories:

      +
      • Interfaces
      • Classes
      • Enums
      • -
      • Exception Classes
      • +
      • Exceptions
      • +
      • Errors
      • Annotation Types
      -
      -

      Class or Interface

      -

      Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a declaration and description, member summary tables, and detailed member descriptions. Entries in each of these sections are omitted if they are empty or not applicable.

      -
        + +
      • +
        +

        Class or Interface

        +

        Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

        +
        • Class Inheritance Diagram
        • Direct Subclasses
        • All Known Subinterfaces
        • @@ -130,64 +133,150 @@

          Class or Interface

        • Class or Interface Description

        -
          +
          • Nested Class Summary
          • -
          • Enum Constant Summary
          • Field Summary
          • Property Summary
          • Constructor Summary
          • Method Summary
          • -
          • Required Element Summary
          • -
          • Optional Element Summary

          -
            -
          • Enum Constant Details
          • -
          • Field Details
          • -
          • Property Details
          • -
          • Constructor Details
          • -
          • Method Details
          • -
          • Element Details
          • +
              +
            • Field Detail
            • +
            • Property Detail
            • +
            • Constructor Detail
            • +
            • Method Detail
            • +
            +

            Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

            +
        +
      • +
      • +
        +

        Annotation Type

        +

        Each annotation type has its own separate page with the following sections:

        +
          +
        • Annotation Type Declaration
        • +
        • Annotation Type Description
        • +
        • Required Element Summary
        • +
        • Optional Element Summary
        • +
        • Element Detail
        -

        Note: Annotation interfaces have required and optional elements, but not methods. Only enum classes have enum constants. The components of a record class are displayed as part of the declaration of the record class. Properties are a feature of JavaFX.

        -

        The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

        -
        -

        Other Files

        -

        Packages and modules may contain pages with additional information related to the declarations nearby.

        +
      • +
      • +
        +

        Enum

        +

        Each enum has its own separate page with the following sections:

        +
          +
        • Enum Declaration
        • +
        • Enum Description
        • +
        • Enum Constant Summary
        • +
        • Enum Constant Detail
        • +
        -
        -

        Use

        -

        Each documented package, class and interface has its own Use page. This page describes what packages, classes, methods, constructors and fields use any part of the given class or package. Given a class or interface A, its Use page includes subclasses of A, fields declared as A, methods that return A, and methods and constructors with parameters of type A. You can access this page by first going to the package, class or interface, then clicking on the USE link in the navigation bar.

        +
      • +
      • +
        +

        Use

        +

        Each documented package, class and interface has its own Use page. This page describes what packages, classes, methods, constructors and fields use any part of the given class or package. Given a class or interface A, its "Use" page includes subclasses of A, fields declared as A, methods that return A, and methods and constructors with parameters of type A. You can access this page by first going to the package, class or interface, then clicking on the "Use" link in the navigation bar.

        -
        -

        Tree (Class Hierarchy)

        +
      • +
      • +
        +

        Tree (Class Hierarchy)

        There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. Classes are organized by inheritance structure starting with java.lang.Object. Interfaces do not inherit from java.lang.Object.

        -
          -
        • When viewing the Overview page, clicking on TREE displays the hierarchy for all packages.
        • -
        • When viewing a particular package, class or interface page, clicking on TREE displays the hierarchy for only that package.
        • +
            +
          • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
          • +
          • When viewing a particular package, class or interface page, clicking on "Tree" displays the hierarchy for only that package.
        -
        -

        All Packages

        -

        The All Packages page contains an alphabetic index of all packages contained in the documentation.

        +
      • +
      • +
        +

        Deprecated API

        +

        The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.

        -
        -

        All Classes and Interfaces

        -

        The All Classes and Interfaces page contains an alphabetic index of all classes and interfaces contained in the documentation, including annotation interfaces, enum classes, and record classes.

        +
      • +
      • +
        +

        Index

        +

        The Index contains an alphabetic index of all classes, interfaces, constructors, methods, and fields, as well as lists of all packages and all classes.

        -
        -

        Index

        -

        The Index contains an alphabetic index of all classes, interfaces, constructors, methods, and fields in the documentation, as well as summary pages such as All Packages, All Classes and Interfaces.

        +
      • +
      • +
        +

        All Classes

        +

        The All Classes link shows all classes and interfaces except non-static nested types.

        -
+ +
  • +
    +

    Serialized Form

    +

    Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.

    +
    +
  • +
  • +
    +

    Constant Field Values

    +

    The Constant Field Values page lists the static final fields and their values.

    +
    +
  • +
  • +
    +

    Search

    +

    You can search for definitions of modules, packages, types, fields, methods and other terms defined in the API, using some or all of the name. "Camel-case" abbreviations are supported: for example, "InpStr" will find "InputStream" and "InputStreamReader".

    +
    +
  • +
    -This help file applies to API documentation generated by the standard doclet.
    +This help file applies to API documentation generated by the standard doclet.
    +
    -
    - -
    + +

    Copyright © 2024. All rights reserved.

    + diff --git a/docs/java/index-all.html b/docs/java/index-all.html index 532051d6..5fd89dc9 100644 --- a/docs/java/index-all.html +++ b/docs/java/index-all.html @@ -1,93 +1,141 @@ + - + Index (voyager 2.0.9 API) - - - - + - + + - - + + + + + - - -
    -
    + + +
    -
    -
    -

    Index

    -
    -A C D E F G I J L M Q R S T U V W 
    All Classes and Interfaces|All Packages -

    A

    -
    -
    addItem(float[]) - Method in class com.spotify.voyager.jni.Index
    +
    A C D E F G I J L M Q R S T U V W 
    All Classes All Packages + + +

    A

    +
    +
    addItem(float[]) - Method in class com.spotify.voyager.jni.Index
    Add an item (a vector) to this Index.
    -
    addItem(float[], long) - Method in class com.spotify.voyager.jni.Index
    +
    addItem(float[], long) - Method in class com.spotify.voyager.jni.Index
    Add an item (a vector) to this Index with the provided identifier.
    -
    addItem(String, float[]) - Method in class com.spotify.voyager.jni.StringIndex
    +
    addItem(String, float[]) - Method in class com.spotify.voyager.jni.StringIndex
     
    -
    addItem(String, List<Float>) - Method in class com.spotify.voyager.jni.StringIndex
    +
    addItem(String, List<Float>) - Method in class com.spotify.voyager.jni.StringIndex
     
    -
    addItems(float[][], int) - Method in class com.spotify.voyager.jni.Index
    +
    addItems(float[][], int) - Method in class com.spotify.voyager.jni.Index
    Add multiple items (vectors) to this Index.
    -
    addItems(float[][], long[], int) - Method in class com.spotify.voyager.jni.Index
    +
    addItems(float[][], long[], int) - Method in class com.spotify.voyager.jni.Index
    Add multiple items (vectors) to this Index.
    -
    addItems(Map<String, List<Float>>) - Method in class com.spotify.voyager.jni.StringIndex
    +
    addItems(Map<String, List<Float>>) - Method in class com.spotify.voyager.jni.StringIndex
     
    -
    asBytes() - Method in class com.spotify.voyager.jni.Index
    +
    asBytes() - Method in class com.spotify.voyager.jni.Index
    Returns the contents of this index as an array of bytes.
    -

    C

    -
    -
    close() - Method in class com.spotify.voyager.jni.Index
    + + + +

    C

    +
    +
    close() - Method in class com.spotify.voyager.jni.Index
    Close this Index and release any memory held by it.
    -
    close() - Method in class com.spotify.voyager.jni.StringIndex
    +
    close() - Method in class com.spotify.voyager.jni.StringIndex
     
    com.spotify.voyager - package com.spotify.voyager
    @@ -100,357 +148,444 @@

    C

    com.spotify.voyager.jni.utils - package com.spotify.voyager.jni.utils
     
    -
    Cosine - Enum constant in enum com.spotify.voyager.jni.Index.SpaceType
    +
    Cosine - com.spotify.voyager.jni.Index.SpaceType
    Cosine distance; i.e. normalized dot product.
    -

    D

    -
    -
    distances - Variable in class com.spotify.voyager.jni.Index.QueryResults
    + + + +

    D

    +
    +
    distances - Variable in class com.spotify.voyager.jni.Index.QueryResults
    A list of distances from each item ID to the query vector for this query.
    -

    E

    -
    -
    E4M3 - Enum constant in enum com.spotify.voyager.jni.Index.StorageDataType
    + + + +

    E

    +
    +
    E4M3 - com.spotify.voyager.jni.Index.StorageDataType
    A custom 8-bit floating point data type with range [-448, 448] and variable precision.
    -
    Euclidean - Enum constant in enum com.spotify.voyager.jni.Index.SpaceType
    +
    Euclidean - com.spotify.voyager.jni.Index.SpaceType
    Euclidean distance, also known as L2 distance.
    -
    extractBinaries(String) - Static method in class com.spotify.voyager.jni.utils.JniLibExtractor
    +
    extractBinaries(String) - Static method in class com.spotify.voyager.jni.utils.JniLibExtractor
     
    -

    F

    -
    -
    finalize() - Method in class com.spotify.voyager.jni.Index
    + + + +

    F

    +
    +
    finalize() - Method in class com.spotify.voyager.jni.Index
     
    -
    Float32 - Enum constant in enum com.spotify.voyager.jni.Index.StorageDataType
    +
    Float32 - com.spotify.voyager.jni.Index.StorageDataType
    A 32-bit floating point ("Float") data type.
    -
    Float8 - Enum constant in enum com.spotify.voyager.jni.Index.StorageDataType
    +
    Float8 - com.spotify.voyager.jni.Index.StorageDataType
    An 8-bit floating point data type that expects all values to be on [-1, 1].
    -

    G

    -
    -
    getDistance(int) - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
    + + + +

    G

    +
    +
    getDistance(int) - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
     
    -
    getDistances() - Method in class com.spotify.voyager.jni.Index.QueryResults
    +
    getDistances() - Method in class com.spotify.voyager.jni.Index.QueryResults
    Retrieve the list of distances between query vectors and item vectors for the results of this query.
    -
    getDistances() - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
    +
    getDistances() - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
     
    -
    getEf() - Method in class com.spotify.voyager.jni.Index
    +
    getEf() - Method in class com.spotify.voyager.jni.Index
    Get the default EF ("query search depth") that will be uses when Index.query(float[], int) is called.
    -
    getEfConstruction() - Method in class com.spotify.voyager.jni.Index
    +
    getEfConstruction() - Method in class com.spotify.voyager.jni.Index
    Get the EF Construction value used when adding new elements to this Index.
    -
    getIDs() - Method in class com.spotify.voyager.jni.Index
    +
    getIDs() - Method in class com.spotify.voyager.jni.Index
    Get the list of identifiers currently stored by this index.
    -
    getLabels() - Method in class com.spotify.voyager.jni.Index.QueryResults
    +
    getLabels() - Method in class com.spotify.voyager.jni.Index.QueryResults
    Retrieve the list of item IDs ("labels") returned by this query.
    -
    getM() - Method in class com.spotify.voyager.jni.Index
    +
    getM() - Method in class com.spotify.voyager.jni.Index
    Get the M value used when adding new elements to this Index.
    -
    getMaxElements() - Method in class com.spotify.voyager.jni.Index
    +
    getMaxElements() - Method in class com.spotify.voyager.jni.Index
    Get the maximum number of elements currently storable by this Index.
    -
    getMaxElements() - Method in class com.spotify.voyager.jni.StringIndex
    +
    getMaxElements() - Method in class com.spotify.voyager.jni.StringIndex
    Get the maximum number of elements currently storable by this Index.
    -
    getName(int) - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
    +
    getName(int) - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
     
    -
    getNames() - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
    +
    getNames() - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
     
    -
    getNumDimensions() - Method in class com.spotify.voyager.jni.Index
    +
    getNumDimensions() - Method in class com.spotify.voyager.jni.Index
    Get the number of dimensions used in this Index.
    -
    getNumElements() - Method in class com.spotify.voyager.jni.Index
    +
    getNumElements() - Method in class com.spotify.voyager.jni.Index
    Get the number of elements currently in this Index.
    -
    getNumElements() - Method in class com.spotify.voyager.jni.StringIndex
    +
    getNumElements() - Method in class com.spotify.voyager.jni.StringIndex
     
    -
    getNumResults() - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
    +
    getNumResults() - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
     
    -
    getNumThreads() - Method in class com.spotify.voyager.jni.Index
    +
    getNumThreads() - Method in class com.spotify.voyager.jni.Index
    Get the default number of threads used when adding multiple vectors in bulk oor when querying for multiple vectors simultaneously.
    -
    getSpace() - Method in class com.spotify.voyager.jni.Index
    +
    getSpace() - Method in class com.spotify.voyager.jni.Index
    Get the Index.SpaceType that this Index uses to store and compare vectors.
    -
    getVector(long) - Method in class com.spotify.voyager.jni.Index
    +
    getVector(long) - Method in class com.spotify.voyager.jni.Index
    Get the vector for the provided identifier.
    -
    getVector(String) - Method in class com.spotify.voyager.jni.StringIndex
    +
    getVector(String) - Method in class com.spotify.voyager.jni.StringIndex
     
    -
    getVectors(long[]) - Method in class com.spotify.voyager.jni.Index
    +
    getVectors(long[]) - Method in class com.spotify.voyager.jni.Index
    Get the vectors for a provided array of identifiers.
    -

    I

    -
    -
    Index - Class in com.spotify.voyager.jni
    + + + +

    I

    +
    +
    Index - Class in com.spotify.voyager.jni
    A Voyager index, providing storage of floating-point vectors and the ability to efficiently search among those vectors.
    -
    Index(Index.SpaceType, int) - Constructor for class com.spotify.voyager.jni.Index
    +
    Index(Index.SpaceType, int) - Constructor for class com.spotify.voyager.jni.Index
    Create a new Index that uses the given Index.SpaceType to store and compare numDimensions-dimensional vectors.
    -
    Index(Index.SpaceType, int, long, long, long, long, Index.StorageDataType) - Constructor for class com.spotify.voyager.jni.Index
    +
    Index(Index.SpaceType, int, long, long, long, long, Index.StorageDataType) - Constructor for class com.spotify.voyager.jni.Index
    Create a new Index that uses the given Index.SpaceType to store and compare numDimensions-dimensional vectors.
    -
    Index.QueryResults - Class in com.spotify.voyager.jni
    +
    Index.QueryResults - Class in com.spotify.voyager.jni
    A container for query results, returned by Index.
    -
    Index.SpaceType - Enum in com.spotify.voyager.jni
    +
    Index.SpaceType - Enum in com.spotify.voyager.jni
    The space, also known as distance metric, to use when searching.
    -
    Index.StorageDataType - Enum in com.spotify.voyager.jni
    +
    Index.StorageDataType - Enum in com.spotify.voyager.jni
    The datatype used to use when storing vectors on disk.
    -
    InnerProduct - Enum constant in enum com.spotify.voyager.jni.Index.SpaceType
    +
    InnerProduct - com.spotify.voyager.jni.Index.SpaceType
    Inner (dot) product.
    -

    J

    -
    -
    JniLibExtractor - Class in com.spotify.voyager.jni.utils
    + + + +

    J

    +
    +
    JniLibExtractor - Class in com.spotify.voyager.jni.utils
     
    -
    JniLibExtractor() - Constructor for class com.spotify.voyager.jni.utils.JniLibExtractor
    +
    JniLibExtractor() - Constructor for class com.spotify.voyager.jni.utils.JniLibExtractor
     
    -

    L

    -
    -
    labels - Variable in class com.spotify.voyager.jni.Index.QueryResults
    + + + +

    L

    +
    +
    labels - Variable in class com.spotify.voyager.jni.Index.QueryResults
    A list of item IDs ("labels").
    -
    load(InputStream) - Static method in class com.spotify.voyager.jni.Index
    +
    load(InputStream) - Static method in class com.spotify.voyager.jni.Index
    Interpret the contents of a java.io.InputStream as the contents of a Voyager index file and create a new Index initialized with the data provided by that stream.
    -
    load(InputStream, Index.SpaceType, int, Index.StorageDataType) - Static method in class com.spotify.voyager.jni.Index
    +
    load(InputStream, Index.SpaceType, int, Index.StorageDataType) - Static method in class com.spotify.voyager.jni.Index
    Interpret the contents of a java.io.InputStream as the contents of a Voyager index file and create a new Index initialized with the data provided by that stream.
    -
    load(InputStream, InputStream) - Static method in class com.spotify.voyager.jni.StringIndex
    +
    load(InputStream, InputStream) - Static method in class com.spotify.voyager.jni.StringIndex
    Load a previously constructed index from the provided input stream.
    -
    load(InputStream, InputStream, Index.SpaceType, int, Index.StorageDataType) - Static method in class com.spotify.voyager.jni.StringIndex
    +
    load(InputStream, InputStream, Index.SpaceType, int, Index.StorageDataType) - Static method in class com.spotify.voyager.jni.StringIndex
    Load a previously constructed index from the provided input streams.
    -
    load(String) - Static method in class com.spotify.voyager.jni.Index
    +
    load(String) - Static method in class com.spotify.voyager.jni.Index
    Load a Voyager index file and create a new Index initialized with the data in that file.
    -
    load(String, Index.SpaceType, int, Index.StorageDataType) - Static method in class com.spotify.voyager.jni.Index
    +
    load(String, Index.SpaceType, int, Index.StorageDataType) - Static method in class com.spotify.voyager.jni.Index
    Load a Voyager index file and create a new Index initialized with the data in that file.
    -
    load(String, String) - Static method in class com.spotify.voyager.jni.StringIndex
    +
    load(String, String) - Static method in class com.spotify.voyager.jni.StringIndex
    Load a previously constructed index from the provided file location.
    -
    load(String, String, Index.SpaceType, int, Index.StorageDataType) - Static method in class com.spotify.voyager.jni.StringIndex
    +
    load(String, String, Index.SpaceType, int, Index.StorageDataType) - Static method in class com.spotify.voyager.jni.StringIndex
    Load a previously constructed index from the provided file location.
    -

    M

    -
    -
    markDeleted(long) - Method in class com.spotify.voyager.jni.Index
    + + + +

    M

    +
    +
    markDeleted(long) - Method in class com.spotify.voyager.jni.Index
    Mark an element of the index as deleted.
    -

    Q

    -
    -
    query(float[][], int, int) - Method in class com.spotify.voyager.jni.Index
    + + + +

    Q

    +
    +
    query(float[][], int, int) - Method in class com.spotify.voyager.jni.Index
    Query this Index for approximate nearest neighbors of multiple query vectors.
    -
    query(float[][], int, int, int) - Method in class com.spotify.voyager.jni.StringIndex
    +
    query(float[][], int, int, int) - Method in class com.spotify.voyager.jni.StringIndex
    Query for multiple target vectors in parallel.
    -
    query(float[][], int, int, long) - Method in class com.spotify.voyager.jni.Index
    +
    query(float[][], int, int, long) - Method in class com.spotify.voyager.jni.Index
    Query this Index for approximate nearest neighbors of multiple query vectors.
    -
    query(float[], int) - Method in class com.spotify.voyager.jni.Index
    +
    query(float[], int) - Method in class com.spotify.voyager.jni.Index
    Query this Index for approximate nearest neighbors of a single query vector.
    -
    query(float[], int, int) - Method in class com.spotify.voyager.jni.StringIndex
    +
    query(float[], int, int) - Method in class com.spotify.voyager.jni.StringIndex
    Find the nearest neighbors of the provided embedding.
    -
    query(float[], int, long) - Method in class com.spotify.voyager.jni.Index
    +
    query(float[], int, long) - Method in class com.spotify.voyager.jni.Index
    Query this Index for approximate nearest neighbors of a single query vector.
    -
    QueryResults(long[], float[]) - Constructor for class com.spotify.voyager.jni.Index.QueryResults
    +
    QueryResults(long[], float[]) - Constructor for class com.spotify.voyager.jni.Index.QueryResults
    Instantiates a new QueryResults object, provided two identical-length arrays of labels and their corresponding distances.
    -
    QueryResults(String[], float[]) - Constructor for class com.spotify.voyager.jni.StringIndex.QueryResults
    +
    QueryResults(String[], float[]) - Constructor for class com.spotify.voyager.jni.StringIndex.QueryResults
     
    -

    R

    -
    -
    readStringList(InputStream) - Static method in class com.spotify.voyager.jni.utils.TinyJson
    + + + +

    R

    +
    +
    readStringList(InputStream) - Static method in class com.spotify.voyager.jni.utils.TinyJson
     
    -
    resizeIndex(long) - Method in class com.spotify.voyager.jni.Index
    +
    resizeIndex(long) - Method in class com.spotify.voyager.jni.Index
    Change the maximum number of elements currently storable by this Index.
    -
    resizeIndex(long) - Method in class com.spotify.voyager.jni.StringIndex
    +
    resizeIndex(long) - Method in class com.spotify.voyager.jni.StringIndex
    Change the maximum number of elements currently storable by this Index.
    -

    S

    -
    -
    saveIndex(OutputStream) - Method in class com.spotify.voyager.jni.Index
    + + + +

    S

    +
    +
    saveIndex(OutputStream) - Method in class com.spotify.voyager.jni.Index
    Save this Index to the provided output stream.
    -
    saveIndex(OutputStream, OutputStream) - Method in class com.spotify.voyager.jni.StringIndex
    +
    saveIndex(OutputStream, OutputStream) - Method in class com.spotify.voyager.jni.StringIndex
    Save the underlying HNSW index and JSON encoded names list to the provided output streams
    -
    saveIndex(String) - Method in class com.spotify.voyager.jni.Index
    +
    saveIndex(String) - Method in class com.spotify.voyager.jni.Index
    Save this Index to a file at the provided filename.
    -
    saveIndex(String) - Method in class com.spotify.voyager.jni.StringIndex
    +
    saveIndex(String) - Method in class com.spotify.voyager.jni.StringIndex
    Save the underlying index and JSON encoded name list to the provided output directory
    -
    saveIndex(String, String, String) - Method in class com.spotify.voyager.jni.StringIndex
    +
    saveIndex(String, String, String) - Method in class com.spotify.voyager.jni.StringIndex
     
    -
    setEf(long) - Method in class com.spotify.voyager.jni.Index
    +
    setEf(long) - Method in class com.spotify.voyager.jni.Index
    Set the default EF ("query search depth") to use when Index.query(float[], int) is called.
    -
    setNumThreads(int) - Method in class com.spotify.voyager.jni.Index
    +
    setNumThreads(int) - Method in class com.spotify.voyager.jni.Index
    Set the default number of threads to use when adding multiple vectors in bulk, or when querying for multiple vectors simultaneously.
    -
    StringIndex - Class in com.spotify.voyager.jni
    +
    StringIndex - Class in com.spotify.voyager.jni
    Wrapper around com.spotify.voyager.jni.Index with a simplified interface which maps the index ID to a provided String.
    -
    StringIndex(Index.SpaceType, int) - Constructor for class com.spotify.voyager.jni.StringIndex
    +
    StringIndex(Index.SpaceType, int) - Constructor for class com.spotify.voyager.jni.StringIndex
    Instantiate a new empty index with the specified space type and dimensionality
    -
    StringIndex(Index.SpaceType, int, long, long, long, long, Index.StorageDataType) - Constructor for class com.spotify.voyager.jni.StringIndex
    +
    StringIndex(Index.SpaceType, int, long, long, long, long, Index.StorageDataType) - Constructor for class com.spotify.voyager.jni.StringIndex
    Instantiate an empty index with the specified index parameters
    -
    StringIndex.QueryResults - Class in com.spotify.voyager.jni
    +
    StringIndex.QueryResults - Class in com.spotify.voyager.jni
    A wrapper class for nearest neighbor query results.
    -

    T

    -
    -
    TinyJson - Class in com.spotify.voyager.jni.utils
    + + + +

    T

    +
    +
    TinyJson - Class in com.spotify.voyager.jni.utils
    A dependency-free, super tiny JSON serde class that only supports reading and writing lists of strings.
    -
    TinyJson() - Constructor for class com.spotify.voyager.jni.utils.TinyJson
    +
    TinyJson() - Constructor for class com.spotify.voyager.jni.utils.TinyJson
     
    -
    toString() - Method in class com.spotify.voyager.jni.Index.QueryResults
    +
    toString() - Method in class com.spotify.voyager.jni.Index.QueryResults
     
    -
    toString() - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
    +
    toString() - Method in class com.spotify.voyager.jni.StringIndex.QueryResults
     
    -

    U

    -
    -
    unmarkDeleted(long) - Method in class com.spotify.voyager.jni.Index
    + + + +

    U

    +
    +
    unmarkDeleted(long) - Method in class com.spotify.voyager.jni.Index
    Un-mark an element of the index as deleted, making it available again.
    -

    V

    -
    -
    valueOf(String) - Static method in enum com.spotify.voyager.jni.Index.SpaceType
    + + + +

    V

    +
    +
    valueOf(String) - Static method in enum com.spotify.voyager.jni.Index.SpaceType
    Returns the enum constant of this type with the specified name.
    -
    valueOf(String) - Static method in enum com.spotify.voyager.jni.Index.StorageDataType
    +
    valueOf(String) - Static method in enum com.spotify.voyager.jni.Index.StorageDataType
    Returns the enum constant of this type with the specified name.
    -
    values() - Static method in enum com.spotify.voyager.jni.Index.SpaceType
    +
    values() - Static method in enum com.spotify.voyager.jni.Index.SpaceType
    Returns an array containing the constants of this enum type, in the order they are declared.
    -
    values() - Static method in enum com.spotify.voyager.jni.Index.StorageDataType
    +
    values() - Static method in enum com.spotify.voyager.jni.Index.StorageDataType
    Returns an array containing the constants of this enum type, in the order they are declared.
    -

    W

    -
    -
    writeStringList(Iterable<String>, OutputStream) - Static method in class com.spotify.voyager.jni.utils.TinyJson
    + + + +

    W

    +
    +
    writeStringList(Iterable<String>, OutputStream) - Static method in class com.spotify.voyager.jni.utils.TinyJson
     
    -A C D E F G I J L M Q R S T U V W 
    All Classes and Interfaces|All Packages
    +A C D E F G I J L M Q R S T U V W 
    All Classes All Packages
    +
    -
    - -
    + +

    Copyright © 2024. All rights reserved.

    + diff --git a/docs/java/index.html b/docs/java/index.html index d62cb982..dc66bcd5 100644 --- a/docs/java/index.html +++ b/docs/java/index.html @@ -1,80 +1,177 @@ + - + Overview (voyager 2.0.9 API) - - - - + - + + - - + + + + + - - -
    -
    + + +
    -

    voyager 2.0.9 API

    -
    -
    Packages
    -
    -
    Package
    -
    Description
    - -
    +
    + + + + + + + + + + + + + + + + + + + + +
    Packages 
    PackageDescription
    com.spotify.voyager
    Voyager is a Java and Python library that provides approximate nearest-neighbor search of vector data.
    - - -
    +
    com.spotify.voyager.jni
    Java Native Interface (JNI) bindings to expose functionality from Voyager's C++ code into Java.
    - - -
     
    - +
    com.spotify.voyager.jni.utils 
    -
    - -
    + +

    Copyright © 2024. All rights reserved.

    + diff --git a/docs/java/jquery-ui.overrides.css b/docs/java/jquery-ui.overrides.css index f89acb63..facf852c 100644 --- a/docs/java/jquery-ui.overrides.css +++ b/docs/java/jquery-ui.overrides.css @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -31,4 +31,5 @@ a.ui-button:active, .ui-button.ui-state-active:hover { /* Overrides the color of selection used in jQuery UI */ background: #F8981D; + border: 1px solid #F8981D; } diff --git a/docs/java/legal/ASSEMBLY_EXCEPTION b/docs/java/legal/ASSEMBLY_EXCEPTION index 42966666..065b8d90 100644 --- a/docs/java/legal/ASSEMBLY_EXCEPTION +++ b/docs/java/legal/ASSEMBLY_EXCEPTION @@ -2,8 +2,8 @@ OPENJDK ASSEMBLY EXCEPTION The OpenJDK source code made available by Oracle America, Inc. (Oracle) at -openjdk.org ("OpenJDK Code") is distributed under the terms of the GNU -General Public License version 2 +openjdk.java.net ("OpenJDK Code") is distributed under the terms of the GNU +General Public License version 2 only ("GPL2"), with the following clarification and special exception. Linking this OpenJDK Code statically or dynamically with other code @@ -12,7 +12,7 @@ only ("GPL2"), with the following clarification and special exception. As a special exception, Oracle gives you permission to link this OpenJDK Code with certain code licensed by Oracle as indicated at - https://openjdk.org/legal/exception-modules-2007-05-08.html + http://openjdk.java.net/legal/exception-modules-2007-05-08.html ("Designated Exception Modules") to produce an executable, regardless of the license terms of the Designated Exception Modules, and to copy and distribute the resulting executable under GPL2, diff --git a/docs/java/member-search-index.js b/docs/java/member-search-index.js index bf707f7a..63604fc7 100644 --- a/docs/java/member-search-index.js +++ b/docs/java/member-search-index.js @@ -1 +1 @@ -memberSearchIndex = [{"p":"com.spotify.voyager.jni","c":"Index","l":"addItem(float[])"},{"p":"com.spotify.voyager.jni","c":"Index","l":"addItem(float[], long)","u":"addItem(float[],long)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"addItem(String, float[])","u":"addItem(java.lang.String,float[])"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"addItem(String, List)","u":"addItem(java.lang.String,java.util.List)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"addItems(float[][], int)","u":"addItems(float[][],int)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"addItems(float[][], long[], int)","u":"addItems(float[][],long[],int)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"addItems(Map>)","u":"addItems(java.util.Map)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"asBytes()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"close()"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"close()"},{"p":"com.spotify.voyager.jni","c":"Index.SpaceType","l":"Cosine"},{"p":"com.spotify.voyager.jni","c":"Index.QueryResults","l":"distances"},{"p":"com.spotify.voyager.jni","c":"Index.StorageDataType","l":"E4M3"},{"p":"com.spotify.voyager.jni","c":"Index.SpaceType","l":"Euclidean"},{"p":"com.spotify.voyager.jni.utils","c":"JniLibExtractor","l":"extractBinaries(String)","u":"extractBinaries(java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"finalize()"},{"p":"com.spotify.voyager.jni","c":"Index.StorageDataType","l":"Float32"},{"p":"com.spotify.voyager.jni","c":"Index.StorageDataType","l":"Float8"},{"p":"com.spotify.voyager.jni","c":"StringIndex.QueryResults","l":"getDistance(int)"},{"p":"com.spotify.voyager.jni","c":"Index.QueryResults","l":"getDistances()"},{"p":"com.spotify.voyager.jni","c":"StringIndex.QueryResults","l":"getDistances()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getEf()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getEfConstruction()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getIDs()"},{"p":"com.spotify.voyager.jni","c":"Index.QueryResults","l":"getLabels()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getM()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getMaxElements()"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"getMaxElements()"},{"p":"com.spotify.voyager.jni","c":"StringIndex.QueryResults","l":"getName(int)"},{"p":"com.spotify.voyager.jni","c":"StringIndex.QueryResults","l":"getNames()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getNumDimensions()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getNumElements()"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"getNumElements()"},{"p":"com.spotify.voyager.jni","c":"StringIndex.QueryResults","l":"getNumResults()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getNumThreads()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getSpace()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getVector(long)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"getVector(String)","u":"getVector(java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getVectors(long[])"},{"p":"com.spotify.voyager.jni","c":"Index","l":"Index(Index.SpaceType, int)","u":"%3Cinit%3E(com.spotify.voyager.jni.Index.SpaceType,int)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"Index(Index.SpaceType, int, long, long, long, long, Index.StorageDataType)","u":"%3Cinit%3E(com.spotify.voyager.jni.Index.SpaceType,int,long,long,long,long,com.spotify.voyager.jni.Index.StorageDataType)"},{"p":"com.spotify.voyager.jni","c":"Index.SpaceType","l":"InnerProduct"},{"p":"com.spotify.voyager.jni.utils","c":"JniLibExtractor","l":"JniLibExtractor()","u":"%3Cinit%3E()"},{"p":"com.spotify.voyager.jni","c":"Index.QueryResults","l":"labels"},{"p":"com.spotify.voyager.jni","c":"Index","l":"load(InputStream)","u":"load(java.io.InputStream)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"load(InputStream, Index.SpaceType, int, Index.StorageDataType)","u":"load(java.io.InputStream,com.spotify.voyager.jni.Index.SpaceType,int,com.spotify.voyager.jni.Index.StorageDataType)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"load(InputStream, InputStream)","u":"load(java.io.InputStream,java.io.InputStream)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"load(InputStream, InputStream, Index.SpaceType, int, Index.StorageDataType)","u":"load(java.io.InputStream,java.io.InputStream,com.spotify.voyager.jni.Index.SpaceType,int,com.spotify.voyager.jni.Index.StorageDataType)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"load(String)","u":"load(java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"load(String, Index.SpaceType, int, Index.StorageDataType)","u":"load(java.lang.String,com.spotify.voyager.jni.Index.SpaceType,int,com.spotify.voyager.jni.Index.StorageDataType)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"load(String, String)","u":"load(java.lang.String,java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"load(String, String, Index.SpaceType, int, Index.StorageDataType)","u":"load(java.lang.String,java.lang.String,com.spotify.voyager.jni.Index.SpaceType,int,com.spotify.voyager.jni.Index.StorageDataType)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"markDeleted(long)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"query(float[], int)","u":"query(float[],int)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"query(float[], int, int)","u":"query(float[],int,int)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"query(float[], int, long)","u":"query(float[],int,long)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"query(float[][], int, int)","u":"query(float[][],int,int)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"query(float[][], int, int, int)","u":"query(float[][],int,int,int)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"query(float[][], int, int, long)","u":"query(float[][],int,int,long)"},{"p":"com.spotify.voyager.jni","c":"Index.QueryResults","l":"QueryResults(long[], float[])","u":"%3Cinit%3E(long[],float[])"},{"p":"com.spotify.voyager.jni","c":"StringIndex.QueryResults","l":"QueryResults(String[], float[])","u":"%3Cinit%3E(java.lang.String[],float[])"},{"p":"com.spotify.voyager.jni.utils","c":"TinyJson","l":"readStringList(InputStream)","u":"readStringList(java.io.InputStream)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"resizeIndex(long)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"resizeIndex(long)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"saveIndex(OutputStream)","u":"saveIndex(java.io.OutputStream)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"saveIndex(OutputStream, OutputStream)","u":"saveIndex(java.io.OutputStream,java.io.OutputStream)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"saveIndex(String)","u":"saveIndex(java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"saveIndex(String)","u":"saveIndex(java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"saveIndex(String, String, String)","u":"saveIndex(java.lang.String,java.lang.String,java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"setEf(long)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"setNumThreads(int)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"StringIndex(Index.SpaceType, int)","u":"%3Cinit%3E(com.spotify.voyager.jni.Index.SpaceType,int)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"StringIndex(Index.SpaceType, int, long, long, long, long, Index.StorageDataType)","u":"%3Cinit%3E(com.spotify.voyager.jni.Index.SpaceType,int,long,long,long,long,com.spotify.voyager.jni.Index.StorageDataType)"},{"p":"com.spotify.voyager.jni.utils","c":"TinyJson","l":"TinyJson()","u":"%3Cinit%3E()"},{"p":"com.spotify.voyager.jni","c":"Index.QueryResults","l":"toString()"},{"p":"com.spotify.voyager.jni","c":"StringIndex.QueryResults","l":"toString()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"unmarkDeleted(long)"},{"p":"com.spotify.voyager.jni","c":"Index.SpaceType","l":"valueOf(String)","u":"valueOf(java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"Index.StorageDataType","l":"valueOf(String)","u":"valueOf(java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"Index.SpaceType","l":"values()"},{"p":"com.spotify.voyager.jni","c":"Index.StorageDataType","l":"values()"},{"p":"com.spotify.voyager.jni.utils","c":"TinyJson","l":"writeStringList(Iterable, OutputStream)","u":"writeStringList(java.lang.Iterable,java.io.OutputStream)"}];updateSearchResults(); \ No newline at end of file +memberSearchIndex = [{"p":"com.spotify.voyager.jni","c":"Index","l":"addItem(float[], long)","url":"addItem(float[],long)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"addItem(float[])"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"addItem(String, float[])","url":"addItem(java.lang.String,float[])"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"addItem(String, List)","url":"addItem(java.lang.String,java.util.List)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"addItems(float[][], int)","url":"addItems(float[][],int)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"addItems(float[][], long[], int)","url":"addItems(float[][],long[],int)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"addItems(Map>)","url":"addItems(java.util.Map)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"asBytes()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"close()"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"close()"},{"p":"com.spotify.voyager.jni","c":"Index.SpaceType","l":"Cosine"},{"p":"com.spotify.voyager.jni","c":"Index.QueryResults","l":"distances"},{"p":"com.spotify.voyager.jni","c":"Index.StorageDataType","l":"E4M3"},{"p":"com.spotify.voyager.jni","c":"Index.SpaceType","l":"Euclidean"},{"p":"com.spotify.voyager.jni.utils","c":"JniLibExtractor","l":"extractBinaries(String)","url":"extractBinaries(java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"finalize()"},{"p":"com.spotify.voyager.jni","c":"Index.StorageDataType","l":"Float32"},{"p":"com.spotify.voyager.jni","c":"Index.StorageDataType","l":"Float8"},{"p":"com.spotify.voyager.jni","c":"StringIndex.QueryResults","l":"getDistance(int)"},{"p":"com.spotify.voyager.jni","c":"Index.QueryResults","l":"getDistances()"},{"p":"com.spotify.voyager.jni","c":"StringIndex.QueryResults","l":"getDistances()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getEf()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getEfConstruction()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getIDs()"},{"p":"com.spotify.voyager.jni","c":"Index.QueryResults","l":"getLabels()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getM()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getMaxElements()"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"getMaxElements()"},{"p":"com.spotify.voyager.jni","c":"StringIndex.QueryResults","l":"getName(int)"},{"p":"com.spotify.voyager.jni","c":"StringIndex.QueryResults","l":"getNames()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getNumDimensions()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getNumElements()"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"getNumElements()"},{"p":"com.spotify.voyager.jni","c":"StringIndex.QueryResults","l":"getNumResults()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getNumThreads()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getSpace()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getVector(long)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"getVector(String)","url":"getVector(java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"getVectors(long[])"},{"p":"com.spotify.voyager.jni","c":"Index","l":"Index(Index.SpaceType, int, long, long, long, long, Index.StorageDataType)","url":"%3Cinit%3E(com.spotify.voyager.jni.Index.SpaceType,int,long,long,long,long,com.spotify.voyager.jni.Index.StorageDataType)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"Index(Index.SpaceType, int)","url":"%3Cinit%3E(com.spotify.voyager.jni.Index.SpaceType,int)"},{"p":"com.spotify.voyager.jni","c":"Index.SpaceType","l":"InnerProduct"},{"p":"com.spotify.voyager.jni.utils","c":"JniLibExtractor","l":"JniLibExtractor()","url":"%3Cinit%3E()"},{"p":"com.spotify.voyager.jni","c":"Index.QueryResults","l":"labels"},{"p":"com.spotify.voyager.jni","c":"Index","l":"load(InputStream, Index.SpaceType, int, Index.StorageDataType)","url":"load(java.io.InputStream,com.spotify.voyager.jni.Index.SpaceType,int,com.spotify.voyager.jni.Index.StorageDataType)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"load(InputStream, InputStream, Index.SpaceType, int, Index.StorageDataType)","url":"load(java.io.InputStream,java.io.InputStream,com.spotify.voyager.jni.Index.SpaceType,int,com.spotify.voyager.jni.Index.StorageDataType)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"load(InputStream, InputStream)","url":"load(java.io.InputStream,java.io.InputStream)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"load(InputStream)","url":"load(java.io.InputStream)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"load(String, Index.SpaceType, int, Index.StorageDataType)","url":"load(java.lang.String,com.spotify.voyager.jni.Index.SpaceType,int,com.spotify.voyager.jni.Index.StorageDataType)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"load(String, String, Index.SpaceType, int, Index.StorageDataType)","url":"load(java.lang.String,java.lang.String,com.spotify.voyager.jni.Index.SpaceType,int,com.spotify.voyager.jni.Index.StorageDataType)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"load(String, String)","url":"load(java.lang.String,java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"load(String)","url":"load(java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"markDeleted(long)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"query(float[], int, int)","url":"query(float[],int,int)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"query(float[], int, long)","url":"query(float[],int,long)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"query(float[], int)","url":"query(float[],int)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"query(float[][], int, int, int)","url":"query(float[][],int,int,int)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"query(float[][], int, int, long)","url":"query(float[][],int,int,long)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"query(float[][], int, int)","url":"query(float[][],int,int)"},{"p":"com.spotify.voyager.jni","c":"Index.QueryResults","l":"QueryResults(long[], float[])","url":"%3Cinit%3E(long[],float[])"},{"p":"com.spotify.voyager.jni","c":"StringIndex.QueryResults","l":"QueryResults(String[], float[])","url":"%3Cinit%3E(java.lang.String[],float[])"},{"p":"com.spotify.voyager.jni.utils","c":"TinyJson","l":"readStringList(InputStream)","url":"readStringList(java.io.InputStream)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"resizeIndex(long)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"resizeIndex(long)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"saveIndex(OutputStream, OutputStream)","url":"saveIndex(java.io.OutputStream,java.io.OutputStream)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"saveIndex(OutputStream)","url":"saveIndex(java.io.OutputStream)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"saveIndex(String, String, String)","url":"saveIndex(java.lang.String,java.lang.String,java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"saveIndex(String)","url":"saveIndex(java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"saveIndex(String)","url":"saveIndex(java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"setEf(long)"},{"p":"com.spotify.voyager.jni","c":"Index","l":"setNumThreads(int)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"StringIndex(Index.SpaceType, int, long, long, long, long, Index.StorageDataType)","url":"%3Cinit%3E(com.spotify.voyager.jni.Index.SpaceType,int,long,long,long,long,com.spotify.voyager.jni.Index.StorageDataType)"},{"p":"com.spotify.voyager.jni","c":"StringIndex","l":"StringIndex(Index.SpaceType, int)","url":"%3Cinit%3E(com.spotify.voyager.jni.Index.SpaceType,int)"},{"p":"com.spotify.voyager.jni.utils","c":"TinyJson","l":"TinyJson()","url":"%3Cinit%3E()"},{"p":"com.spotify.voyager.jni","c":"Index.QueryResults","l":"toString()"},{"p":"com.spotify.voyager.jni","c":"StringIndex.QueryResults","l":"toString()"},{"p":"com.spotify.voyager.jni","c":"Index","l":"unmarkDeleted(long)"},{"p":"com.spotify.voyager.jni","c":"Index.SpaceType","l":"valueOf(String)","url":"valueOf(java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"Index.StorageDataType","l":"valueOf(String)","url":"valueOf(java.lang.String)"},{"p":"com.spotify.voyager.jni","c":"Index.SpaceType","l":"values()"},{"p":"com.spotify.voyager.jni","c":"Index.StorageDataType","l":"values()"},{"p":"com.spotify.voyager.jni.utils","c":"TinyJson","l":"writeStringList(Iterable, OutputStream)","url":"writeStringList(java.lang.Iterable,java.io.OutputStream)"}] \ No newline at end of file diff --git a/docs/java/overview-summary.html b/docs/java/overview-summary.html index 40a97846..929e37b5 100644 --- a/docs/java/overview-summary.html +++ b/docs/java/overview-summary.html @@ -1,21 +1,18 @@ + - + voyager 2.0.9 API - - - - - - + + - +
    -

    Hierarchy For All Packages

    -
    -Package Hierarchies: -
    +
    +

    Class Hierarchy

    -
    +

    Enum Hierarchy

    +
    -
    - -
    + +

    Copyright © 2024. All rights reserved.

    + diff --git a/docs/java/package-search-index.js b/docs/java/package-search-index.js index 1cbb9a98..41002eac 100644 --- a/docs/java/package-search-index.js +++ b/docs/java/package-search-index.js @@ -1 +1 @@ -packageSearchIndex = [{"l":"All Packages","u":"allpackages-index.html"},{"l":"com.spotify.voyager"},{"l":"com.spotify.voyager.jni"},{"l":"com.spotify.voyager.jni.utils"}];updateSearchResults(); \ No newline at end of file +packageSearchIndex = [{"l":"All Packages","url":"allpackages-index.html"},{"l":"com.spotify.voyager"},{"l":"com.spotify.voyager.jni"},{"l":"com.spotify.voyager.jni.utils"}] \ No newline at end of file diff --git a/docs/java/script.js b/docs/java/script.js index 8b9691a1..7dc93c48 100644 --- a/docs/java/script.js +++ b/docs/java/script.js @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,22 +28,83 @@ var packageSearchIndex; var typeSearchIndex; var memberSearchIndex; var tagSearchIndex; - -var oddRowColor = "odd-row-color"; -var evenRowColor = "even-row-color"; -var sortAsc = "sort-asc"; -var sortDesc = "sort-desc"; -var tableTab = "table-tab"; -var activeTableTab = "active-table-tab"; - function loadScripts(doc, tag) { + createElem(doc, tag, 'jquery/jszip/dist/jszip.js'); + createElem(doc, tag, 'jquery/jszip-utils/dist/jszip-utils.js'); + if (window.navigator.userAgent.indexOf('MSIE ') > 0 || window.navigator.userAgent.indexOf('Trident/') > 0 || + window.navigator.userAgent.indexOf('Edge/') > 0) { + createElem(doc, tag, 'jquery/jszip-utils/dist/jszip-utils-ie.js'); + } createElem(doc, tag, 'search.js'); - createElem(doc, tag, 'module-search-index.js'); - createElem(doc, tag, 'package-search-index.js'); - createElem(doc, tag, 'type-search-index.js'); - createElem(doc, tag, 'member-search-index.js'); - createElem(doc, tag, 'tag-search-index.js'); + $.get(pathtoroot + "module-search-index.zip") + .done(function() { + JSZipUtils.getBinaryContent(pathtoroot + "module-search-index.zip", function(e, data) { + JSZip.loadAsync(data).then(function(zip){ + zip.file("module-search-index.json").async("text").then(function(content){ + moduleSearchIndex = JSON.parse(content); + }); + }); + }); + }); + $.get(pathtoroot + "package-search-index.zip") + .done(function() { + JSZipUtils.getBinaryContent(pathtoroot + "package-search-index.zip", function(e, data) { + JSZip.loadAsync(data).then(function(zip){ + zip.file("package-search-index.json").async("text").then(function(content){ + packageSearchIndex = JSON.parse(content); + }); + }); + }); + }); + $.get(pathtoroot + "type-search-index.zip") + .done(function() { + JSZipUtils.getBinaryContent(pathtoroot + "type-search-index.zip", function(e, data) { + JSZip.loadAsync(data).then(function(zip){ + zip.file("type-search-index.json").async("text").then(function(content){ + typeSearchIndex = JSON.parse(content); + }); + }); + }); + }); + $.get(pathtoroot + "member-search-index.zip") + .done(function() { + JSZipUtils.getBinaryContent(pathtoroot + "member-search-index.zip", function(e, data) { + JSZip.loadAsync(data).then(function(zip){ + zip.file("member-search-index.json").async("text").then(function(content){ + memberSearchIndex = JSON.parse(content); + }); + }); + }); + }); + $.get(pathtoroot + "tag-search-index.zip") + .done(function() { + JSZipUtils.getBinaryContent(pathtoroot + "tag-search-index.zip", function(e, data) { + JSZip.loadAsync(data).then(function(zip){ + zip.file("tag-search-index.json").async("text").then(function(content){ + tagSearchIndex = JSON.parse(content); + }); + }); + }); + }); + if (!moduleSearchIndex) { + createElem(doc, tag, 'module-search-index.js'); + } + if (!packageSearchIndex) { + createElem(doc, tag, 'package-search-index.js'); + } + if (!typeSearchIndex) { + createElem(doc, tag, 'type-search-index.js'); + } + if (!memberSearchIndex) { + createElem(doc, tag, 'member-search-index.js'); + } + if (!tagSearchIndex) { + createElem(doc, tag, 'tag-search-index.js'); + } + $(window).resize(function() { + $('.navPadding').css('padding-top', $('.fixedNav').css("height")); + }); } function createElem(doc, tag, path) { @@ -53,201 +114,36 @@ function createElem(doc, tag, path) { scriptElement.parentNode.insertBefore(script, scriptElement); } -// Helper for making content containing release names comparable lexicographically -function makeComparable(s) { - return s.toLowerCase().replace(/(\d+)/g, - function(n, m) { - return ("000" + m).slice(-4); - }); -} - -// Switches between two styles depending on a condition -function toggleStyle(classList, condition, trueStyle, falseStyle) { - if (condition) { - classList.remove(falseStyle); - classList.add(trueStyle); - } else { - classList.remove(trueStyle); - classList.add(falseStyle); - } -} - -// Sorts the rows in a table lexicographically by the content of a specific column -function sortTable(header, columnIndex, columns) { - var container = header.parentElement; - var descending = header.classList.contains(sortAsc); - container.querySelectorAll("div.table-header").forEach( - function(header) { - header.classList.remove(sortAsc); - header.classList.remove(sortDesc); +function show(type) { + count = 0; + for (var key in data) { + var row = document.getElementById(key); + if ((data[key] & type) !== 0) { + row.style.display = ''; + row.className = (count++ % 2) ? rowColor : altColor; } - ) - var cells = container.children; - var rows = []; - for (var i = columns; i < cells.length; i += columns) { - rows.push(Array.prototype.slice.call(cells, i, i + columns)); + else + row.style.display = 'none'; } - var comparator = function(a, b) { - var ka = makeComparable(a[columnIndex].textContent); - var kb = makeComparable(b[columnIndex].textContent); - if (ka < kb) - return descending ? 1 : -1; - if (ka > kb) - return descending ? -1 : 1; - return 0; - }; - var sorted = rows.sort(comparator); - var visible = 0; - sorted.forEach(function(row) { - if (row[0].style.display !== 'none') { - var isEvenRow = visible++ % 2 === 0; - } - row.forEach(function(cell) { - toggleStyle(cell.classList, isEvenRow, evenRowColor, oddRowColor); - container.appendChild(cell); - }) - }); - toggleStyle(header.classList, descending, sortDesc, sortAsc); + updateTabs(type); } -// Toggles the visibility of a table category in all tables in a page -function toggleGlobal(checkbox, selected, columns) { - var display = checkbox.checked ? '' : 'none'; - document.querySelectorAll("div.table-tabs").forEach(function(t) { - var id = t.parentElement.getAttribute("id"); - var selectedClass = id + "-tab" + selected; - // if selected is empty string it selects all uncategorized entries - var selectUncategorized = !Boolean(selected); - var visible = 0; - document.querySelectorAll('div.' + id) - .forEach(function(elem) { - if (selectUncategorized) { - if (elem.className.indexOf(selectedClass) === -1) { - elem.style.display = display; - } - } else if (elem.classList.contains(selectedClass)) { - elem.style.display = display; - } - if (elem.style.display === '') { - var isEvenRow = visible++ % (columns * 2) < columns; - toggleStyle(elem.classList, isEvenRow, evenRowColor, oddRowColor); - } - }); - var displaySection = visible === 0 ? 'none' : ''; - t.parentElement.style.display = displaySection; - document.querySelector("li#contents-" + id).style.display = displaySection; - }) -} - -// Shows the elements of a table belonging to a specific category -function show(tableId, selected, columns) { - if (tableId !== selected) { - document.querySelectorAll('div.' + tableId + ':not(.' + selected + ')') - .forEach(function(elem) { - elem.style.display = 'none'; - }); - } - document.querySelectorAll('div.' + selected) - .forEach(function(elem, index) { - elem.style.display = ''; - var isEvenRow = index % (columns * 2) < columns; - toggleStyle(elem.classList, isEvenRow, evenRowColor, oddRowColor); - }); - updateTabs(tableId, selected); -} - -function updateTabs(tableId, selected) { - document.querySelector('div#' + tableId +' .summary-table') - .setAttribute('aria-labelledby', selected); - document.querySelectorAll('button[id^="' + tableId + '"]') - .forEach(function(tab, index) { - if (selected === tab.id || (tableId === selected && index === 0)) { - tab.className = activeTableTab; - tab.setAttribute('aria-selected', true); - tab.setAttribute('tabindex',0); - } else { - tab.className = tableTab; - tab.setAttribute('aria-selected', false); - tab.setAttribute('tabindex',-1); - } - }); -} - -function switchTab(e) { - var selected = document.querySelector('[aria-selected=true]'); - if (selected) { - if ((e.keyCode === 37 || e.keyCode === 38) && selected.previousSibling) { - // left or up arrow key pressed: move focus to previous tab - selected.previousSibling.click(); - selected.previousSibling.focus(); - e.preventDefault(); - } else if ((e.keyCode === 39 || e.keyCode === 40) && selected.nextSibling) { - // right or down arrow key pressed: move focus to next tab - selected.nextSibling.click(); - selected.nextSibling.focus(); - e.preventDefault(); +function updateTabs(type) { + for (var value in tabs) { + var sNode = document.getElementById(tabs[value][0]); + var spanNode = sNode.firstChild; + if (value == type) { + sNode.className = activeTableTab; + spanNode.innerHTML = tabs[value][1]; + } + else { + sNode.className = tableTab; + spanNode.innerHTML = "" + tabs[value][1] + ""; } } } -var updateSearchResults = function() {}; - -function indexFilesLoaded() { - return moduleSearchIndex - && packageSearchIndex - && typeSearchIndex - && memberSearchIndex - && tagSearchIndex; -} -// Copy the contents of the local snippet to the clipboard -function copySnippet(button) { - copyToClipboard(button.nextElementSibling.innerText); - switchCopyLabel(button, button.firstElementChild); -} -function copyToClipboard(content) { - var textarea = document.createElement("textarea"); - textarea.style.height = 0; - document.body.appendChild(textarea); - textarea.value = content; - textarea.select(); - document.execCommand("copy"); - document.body.removeChild(textarea); +function updateModuleFrame(pFrame, cFrame) { + top.packageFrame.location = pFrame; + top.classFrame.location = cFrame; } -function switchCopyLabel(button, span) { - var copied = span.getAttribute("data-copied"); - button.classList.add("visible"); - var initialLabel = span.innerHTML; - span.innerHTML = copied; - setTimeout(function() { - button.classList.remove("visible"); - setTimeout(function() { - if (initialLabel !== copied) { - span.innerHTML = initialLabel; - } - }, 100); - }, 1900); -} -// Workaround for scroll position not being included in browser history (8249133) -document.addEventListener("DOMContentLoaded", function(e) { - var contentDiv = document.querySelector("div.flex-content"); - window.addEventListener("popstate", function(e) { - if (e.state !== null) { - contentDiv.scrollTop = e.state; - } - }); - window.addEventListener("hashchange", function(e) { - history.replaceState(contentDiv.scrollTop, document.title); - }); - var timeoutId; - contentDiv.addEventListener("scroll", function(e) { - if (timeoutId) { - clearTimeout(timeoutId); - } - timeoutId = setTimeout(function() { - history.replaceState(contentDiv.scrollTop, document.title); - }, 100); - }); - if (!location.hash) { - history.replaceState(contentDiv.scrollTop, document.title); - } -}); diff --git a/docs/java/search.html b/docs/java/search.html index 80f17184..22515698 100644 --- a/docs/java/search.html +++ b/docs/java/search.html @@ -1,11 +1,11 @@ - + Search (voyager 2.0.9 API) - + diff --git a/docs/java/search.js b/docs/java/search.js index 7673397e..8492271e 100644 --- a/docs/java/search.js +++ b/docs/java/search.js @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -22,356 +22,120 @@ * or visit www.oracle.com if you need additional information or have any * questions. */ -"use strict"; -const messages = { - enterTerm: "Enter a search term", - noResult: "No results found", - oneResult: "Found one result", - manyResults: "Found {0} results", - loading: "Loading search index...", - searching: "Searching...", - redirecting: "Redirecting to first result...", - linkIcon: "Link icon", - linkToSection: "Link to this section" -} -const categories = { - modules: "Modules", - packages: "Packages", - types: "Types", - members: "Members", - searchTags: "Search Tags" -}; -const highlight = "$&"; -const NO_MATCH = {}; -const MAX_RESULTS = 300; -function checkUnnamed(name, separator) { - return name === "" || !name ? "" : name + separator; -} -function escapeHtml(str) { - return str.replace(//g, ">"); -} -function getHighlightedText(str, boundaries, from, to) { - var start = from; - var text = ""; - for (var i = 0; i < boundaries.length; i += 2) { - var b0 = boundaries[i]; - var b1 = boundaries[i + 1]; - if (b0 >= to || b1 <= from) { - continue; - } - text += escapeHtml(str.slice(start, Math.max(start, b0))); - text += ""; - text += escapeHtml(str.slice(Math.max(start, b0), Math.min(to, b1))); - text += ""; - start = Math.min(to, b1); + +var noResult = {l: "No results found"}; +var catModules = "Modules"; +var catPackages = "Packages"; +var catTypes = "Types"; +var catMembers = "Members"; +var catSearchTags = "SearchTags"; +var highlight = "$&"; +var camelCaseRegexp = ""; +var secondaryMatcher = ""; +function getHighlightedText(item) { + var ccMatcher = new RegExp(camelCaseRegexp); + var label = item.replace(ccMatcher, highlight); + if (label === item) { + label = item.replace(secondaryMatcher, highlight); } - text += escapeHtml(str.slice(start, to)); - return text; + return label; } -function getURLPrefix(item, category) { - var urlPrefix = ""; - var slash = "/"; - if (category === "modules") { - return item.l + slash; - } else if (category === "packages" && item.m) { - return item.m + slash; - } else if (category === "types" || category === "members") { - if (item.m) { - urlPrefix = item.m + slash; - } else { - $.each(packageSearchIndex, function(index, it) { - if (it.m && item.p === it.l) { - urlPrefix = it.m + slash; +function getURLPrefix(ui) { + var urlPrefix=""; + if (useModuleDirectories) { + var slash = "/"; + if (ui.item.category === catModules) { + return ui.item.l + slash; + } else if (ui.item.category === catPackages && ui.item.m) { + return ui.item.m + slash; + } else if ((ui.item.category === catTypes && ui.item.p) || ui.item.category === catMembers) { + $.each(packageSearchIndex, function(index, item) { + if (item.m && ui.item.p == item.l) { + urlPrefix = item.m + slash; } }); - } - } - return urlPrefix; -} -function getURL(item, category) { - if (item.url) { - return item.url; - } - var url = getURLPrefix(item, category); - if (category === "modules") { - url += "module-summary.html"; - } else if (category === "packages") { - if (item.u) { - url = item.u; - } else { - url += item.l.replace(/\./g, '/') + "/package-summary.html"; - } - } else if (category === "types") { - if (item.u) { - url = item.u; + return urlPrefix; } else { - url += checkUnnamed(item.p, "/").replace(/\./g, '/') + item.l + ".html"; + return urlPrefix; } - } else if (category === "members") { - url += checkUnnamed(item.p, "/").replace(/\./g, '/') + item.c + ".html" + "#"; - if (item.u) { - url += item.u; - } else { - url += item.l; - } - } else if (category === "searchTags") { - url += item.u; } - item.url = url; - return url; + return urlPrefix; } -function createMatcher(term, camelCase) { - if (camelCase && !isUpperCase(term)) { - return null; // no need for camel-case matcher for lower case query - } - var pattern = ""; - var upperCase = []; - term.trim().split(/\s+/).forEach(function(w, index, array) { - var tokens = w.split(/(?=[A-Z,.()<>?[\/])/); - for (var i = 0; i < tokens.length; i++) { - var s = tokens[i]; - // ',' and '?' are the only delimiters commonly followed by space in java signatures - pattern += "(" + $.ui.autocomplete.escapeRegex(s).replace(/[,?]/g, "$&\\s*?") + ")"; - upperCase.push(false); - var isWordToken = /\w$/.test(s); - if (isWordToken) { - if (i === tokens.length - 1 && index < array.length - 1) { - // space in query string matches all delimiters - pattern += "(.*?)"; - upperCase.push(isUpperCase(s[0])); - } else { - if (!camelCase && isUpperCase(s) && s.length === 1) { - pattern += "()"; - } else { - pattern += "([a-z0-9$<>?[\\]]*?)"; - } - upperCase.push(isUpperCase(s[0])); - } - } else { - pattern += "()"; - upperCase.push(false); - } +var watermark = 'Search'; +$(function() { + $("#search").val(''); + $("#search").prop("disabled", false); + $("#reset").prop("disabled", false); + $("#search").val(watermark).addClass('watermark'); + $("#search").blur(function() { + if ($(this).val().length == 0) { + $(this).val(watermark).addClass('watermark'); } }); - var re = new RegExp(pattern, "gi"); - re.upperCase = upperCase; - return re; -} -function findMatch(matcher, input, startOfName, endOfName) { - var from = startOfName; - matcher.lastIndex = from; - var match = matcher.exec(input); - // Expand search area until we get a valid result or reach the beginning of the string - while (!match || match.index + match[0].length < startOfName || endOfName < match.index) { - if (from === 0) { - return NO_MATCH; - } - from = input.lastIndexOf(".", from - 2) + 1; - matcher.lastIndex = from; - match = matcher.exec(input); - } - var boundaries = []; - var matchEnd = match.index + match[0].length; - var score = 5; - var start = match.index; - var prevEnd = -1; - for (var i = 1; i < match.length; i += 2) { - var isUpper = isUpperCase(input[start]); - var isMatcherUpper = matcher.upperCase[i]; - // capturing groups come in pairs, match and non-match - boundaries.push(start, start + match[i].length); - // make sure groups are anchored on a left word boundary - var prevChar = input[start - 1] || ""; - var nextChar = input[start + 1] || ""; - if (start !== 0 && !/[\W_]/.test(prevChar) && !/[\W_]/.test(input[start])) { - if (isUpper && (isLowerCase(prevChar) || isLowerCase(nextChar))) { - score -= 0.1; - } else if (isMatcherUpper && start === prevEnd) { - score -= isUpper ? 0.1 : 1.0; - } else { - return NO_MATCH; - } - } - prevEnd = start + match[i].length; - start += match[i].length + match[i + 1].length; - - // lower score for parts of the name that are missing - if (match[i + 1] && prevEnd < endOfName) { - score -= rateNoise(match[i + 1]); - } - } - // lower score if a type name contains unmatched camel-case parts - if (input[matchEnd - 1] !== "." && endOfName > matchEnd) - score -= rateNoise(input.slice(matchEnd, endOfName)); - score -= rateNoise(input.slice(0, Math.max(startOfName, match.index))); - - if (score <= 0) { - return NO_MATCH; - } - return { - input: input, - score: score, - boundaries: boundaries - }; -} -function isUpperCase(s) { - return s !== s.toLowerCase(); -} -function isLowerCase(s) { - return s !== s.toUpperCase(); -} -function rateNoise(str) { - return (str.match(/([.(])/g) || []).length / 5 - + (str.match(/([A-Z]+)/g) || []).length / 10 - + str.length / 20; -} -function doSearch(request, response) { - var term = request.term.trim(); - var maxResults = request.maxResults || MAX_RESULTS; - if (term.length === 0) { - return this.close(); - } - var matcher = { - plainMatcher: createMatcher(term, false), - camelCaseMatcher: createMatcher(term, true) - } - var indexLoaded = indexFilesLoaded(); - - function getPrefix(item, category) { - switch (category) { - case "packages": - return checkUnnamed(item.m, "/"); - case "types": - return checkUnnamed(item.p, "."); - case "members": - return checkUnnamed(item.p, ".") + item.c + "."; - default: - return ""; - } - } - function useQualifiedName(category) { - switch (category) { - case "packages": - return /[\s/]/.test(term); - case "types": - case "members": - return /[\s.]/.test(term); - default: - return false; - } - } - function searchIndex(indexArray, category) { - var matches = []; - if (!indexArray) { - if (!indexLoaded) { - matches.push({ l: messages.loading, category: category }); - } - return matches; - } - $.each(indexArray, function (i, item) { - var prefix = getPrefix(item, category); - var simpleName = item.l; - var qualifiedName = prefix + simpleName; - var useQualified = useQualifiedName(category); - var input = useQualified ? qualifiedName : simpleName; - var startOfName = useQualified ? prefix.length : 0; - var endOfName = category === "members" && input.indexOf("(", startOfName) > -1 - ? input.indexOf("(", startOfName) : input.length; - var m = findMatch(matcher.plainMatcher, input, startOfName, endOfName); - if (m === NO_MATCH && matcher.camelCaseMatcher) { - m = findMatch(matcher.camelCaseMatcher, input, startOfName, endOfName); - } - if (m !== NO_MATCH) { - m.indexItem = item; - m.prefix = prefix; - m.category = category; - if (!useQualified) { - m.input = qualifiedName; - m.boundaries = m.boundaries.map(function(b) { - return b + prefix.length; - }); - } - matches.push(m); - } - return true; - }); - return matches.sort(function(e1, e2) { - return e2.score - e1.score; - }).slice(0, maxResults); - } - - var result = searchIndex(moduleSearchIndex, "modules") - .concat(searchIndex(packageSearchIndex, "packages")) - .concat(searchIndex(typeSearchIndex, "types")) - .concat(searchIndex(memberSearchIndex, "members")) - .concat(searchIndex(tagSearchIndex, "searchTags")); - - if (!indexLoaded) { - updateSearchResults = function() { - doSearch(request, response); + $("#search").on('click keydown', function() { + if ($(this).val() == watermark) { + $(this).val('').removeClass('watermark'); } - } else { - updateSearchResults = function() {}; - } - response(result); -} -// JQuery search menu implementation + }); + $("#reset").click(function() { + $("#search").val(''); + $("#search").focus(); + }); + $("#search").focus(); + $("#search")[0].setSelectionRange(0, 0); +}); $.widget("custom.catcomplete", $.ui.autocomplete, { _create: function() { this._super(); - this.widget().menu("option", "items", "> .result-item"); - // workaround for search result scrolling - this.menu._scrollIntoView = function _scrollIntoView( item ) { - var borderTop, paddingTop, offset, scroll, elementHeight, itemHeight; - if ( this._hasScroll() ) { - borderTop = parseFloat( $.css( this.activeMenu[ 0 ], "borderTopWidth" ) ) || 0; - paddingTop = parseFloat( $.css( this.activeMenu[ 0 ], "paddingTop" ) ) || 0; - offset = item.offset().top - this.activeMenu.offset().top - borderTop - paddingTop; - scroll = this.activeMenu.scrollTop(); - elementHeight = this.activeMenu.height() - 26; - itemHeight = item.outerHeight(); - - if ( offset < 0 ) { - this.activeMenu.scrollTop( scroll + offset ); - } else if ( offset + itemHeight > elementHeight ) { - this.activeMenu.scrollTop( scroll + offset - elementHeight + itemHeight ); - } - } - }; + this.widget().menu("option", "items", "> :not(.ui-autocomplete-category)"); }, _renderMenu: function(ul, items) { - var currentCategory = ""; - var widget = this; - widget.menu.bindings = $(); + var rMenu = this, + currentCategory = ""; + rMenu.menu.bindings = $(); $.each(items, function(index, item) { - if (item.category && item.category !== currentCategory) { - ul.append("
  • " + categories[item.category] + "
  • "); + var li; + if (item.l !== noResult.l && item.category !== currentCategory) { + ul.append("
  • " + item.category + "
  • "); currentCategory = item.category; } - var li = widget._renderItemData(ul, item); + li = rMenu._renderItemData(ul, item); if (item.category) { - li.attr("aria-label", categories[item.category] + " : " + item.l); + li.attr("aria-label", item.category + " : " + item.l); + li.attr("class", "resultItem"); } else { li.attr("aria-label", item.l); + li.attr("class", "resultItem"); } - li.attr("class", "result-item"); }); - ul.append(""); }, _renderItem: function(ul, item) { + var label = ""; + if (item.category === catModules) { + label = getHighlightedText(item.l); + } else if (item.category === catPackages) { + label = (item.m) + ? getHighlightedText(item.m + "/" + item.l) + : getHighlightedText(item.l); + } else if (item.category === catTypes) { + label = (item.p) + ? getHighlightedText(item.p + "." + item.l) + : getHighlightedText(item.l); + } else if (item.category === catMembers) { + label = getHighlightedText(item.p + "." + (item.c + "." + item.l)); + } else if (item.category === catSearchTags) { + label = getHighlightedText(item.l); + } else { + label = item.l; + } var li = $("
  • ").appendTo(ul); var div = $("
    ").appendTo(li); - var label = item.l - ? item.l - : getHighlightedText(item.input, item.boundaries, 0, item.input.length); - var idx = item.indexItem; - if (item.category === "searchTags" && idx && idx.h) { - if (idx.d) { - div.html(label + " (" + idx.h + ")
    " - + idx.d + "
    "); + if (item.category === catSearchTags) { + if (item.d) { + div.html(label + " (" + item.h + ")
    " + + item.d + "
    "); } else { - div.html(label + " (" + idx.h + ")"); + div.html(label + " (" + item.h + ")"); } } else { div.html(label); @@ -380,79 +144,183 @@ $.widget("custom.catcomplete", $.ui.autocomplete, { } }); $(function() { - var expanded = false; - var windowWidth; - function collapse() { - if (expanded) { - $("div#navbar-top").removeAttr("style"); - $("button#navbar-toggle-button") - .removeClass("expanded") - .attr("aria-expanded", "false"); - expanded = false; - } - } - $("button#navbar-toggle-button").click(function (e) { - if (expanded) { - collapse(); - } else { - var navbar = $("div#navbar-top"); - navbar.height(navbar.prop("scrollHeight")); - $("button#navbar-toggle-button") - .addClass("expanded") - .attr("aria-expanded", "true"); - expanded = true; - windowWidth = window.innerWidth; - } - }); - $("ul.sub-nav-list-small li a").click(collapse); - $("input#search-input").focus(collapse); - $("main").click(collapse); - $("section[id] > :header, :header[id], :header:has(a[id])").each(function(idx, el) { - // Create anchor links for headers with an associated id attribute - var hdr = $(el); - var id = hdr.attr("id") || hdr.parent("section").attr("id") || hdr.children("a").attr("id"); - if (id) { - hdr.append(" " + messages.linkIcon +""); - } - }); - $(window).on("orientationchange", collapse).on("resize", function(e) { - if (expanded && windowWidth !== window.innerWidth) collapse(); - }); - var search = $("#search-input"); - var reset = $("#reset-button"); - search.catcomplete({ + $("#search").catcomplete({ minLength: 1, - delay: 200, - source: doSearch, + delay: 100, + source: function(request, response) { + var result = new Array(); + var presult = new Array(); + var tresult = new Array(); + var mresult = new Array(); + var tgresult = new Array(); + var secondaryresult = new Array(); + var displayCount = 0; + var exactMatcher = new RegExp("^" + $.ui.autocomplete.escapeRegex(request.term) + "$", "i"); + camelCaseRegexp = ($.ui.autocomplete.escapeRegex(request.term)).split(/(?=[A-Z])/).join("([a-z0-9_$]*?)"); + var camelCaseMatcher = new RegExp("^" + camelCaseRegexp); + secondaryMatcher = new RegExp($.ui.autocomplete.escapeRegex(request.term), "i"); + + // Return the nested innermost name from the specified object + function nestedName(e) { + return e.l.substring(e.l.lastIndexOf(".") + 1); + } + + function concatResults(a1, a2) { + a1 = a1.concat(a2); + a2.length = 0; + return a1; + } + + if (moduleSearchIndex) { + var mdleCount = 0; + $.each(moduleSearchIndex, function(index, item) { + item.category = catModules; + if (exactMatcher.test(item.l)) { + result.push(item); + mdleCount++; + } else if (camelCaseMatcher.test(item.l)) { + result.push(item); + } else if (secondaryMatcher.test(item.l)) { + secondaryresult.push(item); + } + }); + displayCount = mdleCount; + result = concatResults(result, secondaryresult); + } + if (packageSearchIndex) { + var pCount = 0; + var pkg = ""; + $.each(packageSearchIndex, function(index, item) { + item.category = catPackages; + pkg = (item.m) + ? (item.m + "/" + item.l) + : item.l; + if (exactMatcher.test(item.l)) { + presult.push(item); + pCount++; + } else if (camelCaseMatcher.test(pkg)) { + presult.push(item); + } else if (secondaryMatcher.test(pkg)) { + secondaryresult.push(item); + } + }); + result = result.concat(concatResults(presult, secondaryresult)); + displayCount = (pCount > displayCount) ? pCount : displayCount; + } + if (typeSearchIndex) { + var tCount = 0; + $.each(typeSearchIndex, function(index, item) { + item.category = catTypes; + var s = nestedName(item); + if (exactMatcher.test(s)) { + tresult.push(item); + tCount++; + } else if (camelCaseMatcher.test(s)) { + tresult.push(item); + } else if (secondaryMatcher.test(item.p + "." + item.l)) { + secondaryresult.push(item); + } + }); + result = result.concat(concatResults(tresult, secondaryresult)); + displayCount = (tCount > displayCount) ? tCount : displayCount; + } + if (memberSearchIndex) { + var mCount = 0; + $.each(memberSearchIndex, function(index, item) { + item.category = catMembers; + var s = nestedName(item); + if (exactMatcher.test(s)) { + mresult.push(item); + mCount++; + } else if (camelCaseMatcher.test(s)) { + mresult.push(item); + } else if (secondaryMatcher.test(item.c + "." + item.l)) { + secondaryresult.push(item); + } + }); + result = result.concat(concatResults(mresult, secondaryresult)); + displayCount = (mCount > displayCount) ? mCount : displayCount; + } + if (tagSearchIndex) { + var tgCount = 0; + $.each(tagSearchIndex, function(index, item) { + item.category = catSearchTags; + if (exactMatcher.test(item.l)) { + tgresult.push(item); + tgCount++; + } else if (secondaryMatcher.test(item.l)) { + secondaryresult.push(item); + } + }); + result = result.concat(concatResults(tgresult, secondaryresult)); + displayCount = (tgCount > displayCount) ? tgCount : displayCount; + } + displayCount = (displayCount > 500) ? displayCount : 500; + var counter = function() { + var count = {Modules: 0, Packages: 0, Types: 0, Members: 0, SearchTags: 0}; + var f = function(item) { + count[item.category] += 1; + return (count[item.category] <= displayCount); + }; + return f; + }(); + response(result.filter(counter)); + }, response: function(event, ui) { if (!ui.content.length) { - ui.content.push({ l: messages.noResult }); + ui.content.push(noResult); } else { - $("#search-input").empty(); + $("#search").empty(); } }, autoFocus: true, - focus: function(event, ui) { - return false; - }, position: { collision: "flip" }, select: function(event, ui) { - if (ui.item.indexItem) { - var url = getURL(ui.item.indexItem, ui.item.category); - window.location.href = pathtoroot + url; - $("#search-input").focus(); + if (ui.item.l !== noResult.l) { + var url = getURLPrefix(ui); + if (ui.item.category === catModules) { + if (useModuleDirectories) { + url += "module-summary.html"; + } else { + url = ui.item.l + "-summary.html"; + } + } else if (ui.item.category === catPackages) { + if (ui.item.url) { + url = ui.item.url; + } else { + url += ui.item.l.replace(/\./g, '/') + "/package-summary.html"; + } + } else if (ui.item.category === catTypes) { + if (ui.item.url) { + url = ui.item.url; + } else if (ui.item.p === "") { + url += ui.item.l + ".html"; + } else { + url += ui.item.p.replace(/\./g, '/') + "/" + ui.item.l + ".html"; + } + } else if (ui.item.category === catMembers) { + if (ui.item.p === "") { + url += ui.item.c + ".html" + "#"; + } else { + url += ui.item.p.replace(/\./g, '/') + "/" + ui.item.c + ".html" + "#"; + } + if (ui.item.url) { + url += ui.item.url; + } else { + url += ui.item.l; + } + } else if (ui.item.category === catSearchTags) { + url += ui.item.u; + } + if (top !== window) { + parent.classFrame.location = pathtoroot + url; + } else { + window.location.href = pathtoroot + url; + } + $("#search").focus(); } } }); - search.val(''); - search.prop("disabled", false); - reset.prop("disabled", false); - reset.click(function() { - search.val('').focus(); - }); - search.focus(); }); diff --git a/docs/java/stylesheet.css b/docs/java/stylesheet.css index f71489f8..de945eda 100644 --- a/docs/java/stylesheet.css +++ b/docs/java/stylesheet.css @@ -1,76 +1,21 @@ -/* +/* * Javadoc style sheet */ @import url('resources/fonts/dejavu.css'); -/* - * These CSS custom properties (variables) define the core color and font - * properties used in this stylesheet. - */ -:root { - /* body, block and code fonts */ - --body-font-family: 'DejaVu Sans', Arial, Helvetica, sans-serif; - --block-font-family: 'DejaVu Serif', Georgia, "Times New Roman", Times, serif; - --code-font-family: 'DejaVu Sans Mono', monospace; - /* Base font sizes for body and code elements */ - --body-font-size: 14px; - --code-font-size: 14px; - /* Text colors for body and block elements */ - --body-text-color: #353833; - --block-text-color: #474747; - /* Background colors for various structural elements */ - --body-background-color: #ffffff; - --section-background-color: #f8f8f8; - --detail-background-color: #ffffff; - /* Colors for navigation bar and table captions */ - --navbar-background-color: #4D7A97; - --navbar-text-color: #ffffff; - /* Background color for subnavigation and various headers */ - --subnav-background-color: #dee3e9; - /* Background and text colors for selected tabs and navigation items */ - --selected-background-color: #f8981d; - --selected-text-color: #253441; - --selected-link-color: #1f389c; - /* Background colors for generated tables */ - --even-row-color: #ffffff; - --odd-row-color: #eeeeef; - /* Text color for page title */ - --title-color: #2c4557; - /* Text colors for links */ - --link-color: #4A6782; - --link-color-active: #bb7a2a; - /* Snippet colors */ - --snippet-background-color: #ebecee; - --snippet-text-color: var(--block-text-color); - --snippet-highlight-color: #f7c590; - /* Border colors for structural elements and user defined tables */ - --border-color: #ededed; - --table-border-color: #000000; - /* Search input colors */ - --search-input-background-color: #ffffff; - --search-input-text-color: #000000; - --search-input-placeholder-color: #909090; - /* Highlight color for active search tag target */ - --search-tag-highlight-color: #ffff00; - /* Adjustments for icon and active background colors of copy-to-clipboard buttons */ - --copy-icon-brightness: 100%; - --copy-button-background-color-active: rgba(168, 168, 176, 0.3); - /* Colors for invalid tag notifications */ - --invalid-tag-background-color: #ffe6e6; - --invalid-tag-text-color: #000000; -} /* * Styles for individual HTML elements. * * These are styles that are specific to individual HTML elements. Changing them affects the style of a particular * HTML element throughout the page. */ + body { - background-color:var(--body-background-color); - color:var(--body-text-color); - font-family:var(--body-font-family); - font-size:var(--body-font-size); + background-color:#ffffff; + color:#353833; + font-family:'DejaVu Sans', Arial, Helvetica, sans-serif; + font-size:14px; margin:0; padding:0; height:100%; @@ -86,69 +31,70 @@ iframe { } a:link, a:visited { text-decoration:none; - color:var(--link-color); + color:#4A6782; } a[href]:hover, a[href]:focus { text-decoration:none; - color:var(--link-color-active); + color:#bb7a2a; +} +a[name] { + color:#353833; +} +a[name]:before, a[name]:target, a[id]:before, a[id]:target { + content:""; + display:inline-block; + position:relative; + padding-top:129px; + margin-top:-129px; } pre { - font-family:var(--code-font-family); - font-size:1em; + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; } h1 { - font-size:1.428em; + font-size:20px; } h2 { - font-size:1.285em; + font-size:18px; } h3 { - font-size:1.14em; + font-size:16px; + font-style:italic; } h4 { - font-size:1.072em; + font-size:13px; } h5 { - font-size:1.001em; + font-size:12px; } h6 { - font-size:0.93em; -} -/* Disable font boosting for selected elements */ -h1, h2, h3, h4, h5, h6, div.member-signature { - max-height: 1000em; + font-size:11px; } ul { list-style-type:disc; } code, tt { - font-family:var(--code-font-family); -} -:not(h1, h2, h3, h4, h5, h6) > code, -:not(h1, h2, h3, h4, h5, h6) > tt { - font-size:var(--code-font-size); + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; padding-top:4px; margin-top:8px; line-height:1.4em; } dt code { - font-family:var(--code-font-family); - font-size:1em; + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; padding-top:4px; } -.summary-table dt code { - font-family:var(--code-font-family); - font-size:1em; +table tr td dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; vertical-align:top; padding-top:4px; } sup { font-size:8px; } -button { - font-family: var(--body-font-family); - font-size: 1em; -} + /* * Styles for HTML generated by javadoc. * @@ -158,187 +104,254 @@ button { /* * Styles for document title and copyright. */ -.about-language { +.clear { + clear:both; + height:0px; + overflow:hidden; +} +.aboutLanguage { float:right; - padding:0 21px 8px 8px; - font-size:0.915em; + padding:0px 21px; + font-size:11px; + z-index:200; margin-top:-9px; - height:2.9em; } -.legal-copy { +.legalCopy { margin-left:.5em; } +.bar a, .bar a:link, .bar a:visited, .bar a:active { + color:#FFFFFF; + text-decoration:none; +} +.bar a:hover, .bar a:focus { + color:#bb7a2a; +} +.tab { + background-color:#0066FF; + color:#ffffff; + padding:8px; + width:5em; + font-weight:bold; +} /* * Styles for navigation bar. */ -@media screen { - div.flex-box { - position:fixed; - display:flex; - flex-direction:column; - height: 100%; - width: 100%; - } - header.flex-header { - flex: 0 0 auto; - } - div.flex-content { - flex: 1 1 auto; - overflow-y: auto; - } -} -.top-nav { - background-color:var(--navbar-background-color); - color:var(--navbar-text-color); +.bar { + background-color:#4D7A97; + color:#FFFFFF; + padding:.8em .5em .4em .8em; + height:auto;/*height:1.8em;*/ + font-size:11px; + margin:0; +} +.navPadding { + padding-top: 107px; +} +.fixedNav { + position:fixed; + width:100%; + z-index:999; + background-color:#ffffff; +} +.topNav { + background-color:#4D7A97; + color:#FFFFFF; float:left; + padding:0; width:100%; clear:right; - min-height:2.8em; - padding:10px 0 0 0; + height:2.8em; + padding-top:10px; overflow:hidden; - font-size:0.857em; -} -button#navbar-toggle-button { - display:none; + font-size:12px; } -ul.sub-nav-list-small { - display: none; +.bottomNav { + margin-top:10px; + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; } -.sub-nav { - background-color:var(--subnav-background-color); +.subNav { + background-color:#dee3e9; float:left; width:100%; overflow:hidden; - font-size:0.857em; + font-size:12px; } -.sub-nav div { +.subNav div { clear:left; float:left; - padding:6px; + padding:0 0 5px 6px; text-transform:uppercase; } -.sub-nav .sub-nav-list { - padding-top:4px; -} -ul.nav-list { - display:block; - margin:0 25px 0 0; - padding:0; -} -ul.sub-nav-list { +ul.navList, ul.subNavList { float:left; margin:0 25px 0 0; padding:0; } -ul.nav-list li { +ul.navList li{ list-style:none; float:left; padding: 5px 6px; text-transform:uppercase; } -.sub-nav .nav-list-search { +ul.navListSearch { float:right; - margin:0; - padding:6px; - clear:none; - text-align:right; + margin:0 0 0 0; + padding:0; +} +ul.navListSearch li { + list-style:none; + float:right; + padding: 5px 6px; + text-transform:uppercase; +} +ul.navListSearch li label { position:relative; + right:-16px; } -ul.sub-nav-list li { +ul.subNavList li { list-style:none; float:left; } -.top-nav a:link, .top-nav a:active, .top-nav a:visited { - color:var(--navbar-text-color); +.topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited { + color:#FFFFFF; text-decoration:none; text-transform:uppercase; } -.top-nav a:hover { - color:var(--link-color-active); +.topNav a:hover, .bottomNav a:hover { + text-decoration:none; + color:#bb7a2a; + text-transform:uppercase; } -.nav-bar-cell1-rev { - background-color:var(--selected-background-color); - color:var(--selected-text-color); +.navBarCell1Rev { + background-color:#F8981D; + color:#253441; margin: auto 5px; } -.skip-nav { +.skipNav { position:absolute; top:auto; left:-9999px; overflow:hidden; } /* - * Hide navigation links and search box in print layout + * Styles for page header and footer. */ -@media print { - ul.nav-list, div.sub-nav { - display:none; - } +.header, .footer { + clear:both; + margin:0 20px; + padding:5px 0 0 0; +} +.indexNav { + position:relative; + font-size:12px; + background-color:#dee3e9; +} +.indexNav ul { + margin-top:0; + padding:5px; +} +.indexNav ul li { + display:inline; + list-style-type:none; + padding-right:10px; + text-transform:uppercase; +} +.indexNav h1 { + font-size:13px; } -/* - * Styles for page header. - */ .title { - color:var(--title-color); + color:#2c4557; margin:10px 0; } -.sub-title { +.subTitle { margin:5px 0 0 0; } -ul.contents-list { - margin: 0 0 15px 0; - padding: 0; - list-style: none; +.header ul { + margin:0 0 15px 0; + padding:0; +} +.footer ul { + margin:20px 0 5px 0; } -ul.contents-list li { - font-size:0.93em; +.header ul li, .footer ul li { + list-style:none; + font-size:13px; } /* * Styles for headings. */ -body.class-declaration-page .summary h2, -body.class-declaration-page .details h2, -body.class-use-page h2, -body.module-declaration-page .block-list h2 { - font-style: italic; - padding:0; - margin:15px 0; +div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; } -body.class-declaration-page .summary h3, -body.class-declaration-page .details h3, -body.class-declaration-page .summary .inherited-list h2 { - background-color:var(--subnav-background-color); - border:1px solid var(--border-color); +ul.blockList ul.blockList ul.blockList li.blockList h3 { + background-color:#dee3e9; + border:1px solid #d0d9e0; margin:0 0 6px -8px; padding:7px 5px; } +ul.blockList ul.blockList li.blockList h3 { + padding:0; + margin:15px 0; +} +ul.blockList li.blockList h2 { + padding:0px 0 20px 0; +} /* * Styles for page layout containers. */ -main { +.contentContainer, .sourceContainer, .classUseContainer, .serializedFormContainer, .constantValuesContainer, +.allClassesContainer, .allPackagesContainer { clear:both; padding:10px 20px; position:relative; } -dl.notes > dt { - font-family: var(--body-font-family); - font-size:0.856em; +.indexContainer { + margin:10px; + position:relative; + font-size:12px; +} +.indexContainer h2 { + font-size:13px; + padding:0 0 3px 0; +} +.indexContainer ul { + margin:0; + padding:0; +} +.indexContainer ul li { + list-style:none; + padding-top:2px; +} +.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt { + font-size:12px; font-weight:bold; margin:10px 0 0 0; - color:var(--body-text-color); + color:#4E4E4E; } -dl.notes > dd { - margin:5px 10px 10px 0; - font-size:1em; - font-family:var(--block-font-family) +.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd { + margin:5px 0 10px 0px; + font-size:14px; + font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif; } -dl.name-value > dt { +.serializedFormContainer dl.nameValue dt { margin-left:1px; font-size:1.1em; display:inline; font-weight:bold; } -dl.name-value > dd { +.serializedFormContainer dl.nameValue dd { margin:0 0 0 1px; font-size:1.1em; display:inline; @@ -353,310 +366,307 @@ ul.horizontal li { display:inline; font-size:0.9em; } -div.inheritance { +ul.inheritance { margin:0; padding:0; } -div.inheritance div.inheritance { - margin-left:2em; +ul.inheritance li { + display:inline; + list-style:none; } -ul.block-list, -ul.details-list, -ul.member-list, -ul.summary-list { +ul.inheritance li ul.inheritance { + margin-left:15px; + padding-left:15px; + padding-top:1px; +} +ul.blockList, ul.blockListLast { margin:10px 0 10px 0; padding:0; } -ul.block-list > li, -ul.details-list > li, -ul.member-list > li, -ul.summary-list > li { +ul.blockList li.blockList, ul.blockListLast li.blockList { list-style:none; margin-bottom:15px; line-height:1.4; } -ul.ref-list { - padding:0; - margin:0; +ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList { + padding:0px 20px 5px 10px; + border:1px solid #ededed; + background-color:#f8f8f8; +} +ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList { + padding:0 0 5px 8px; + background-color:#ffffff; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockList { + margin-left:0; + padding-left:0; + padding-bottom:15px; + border:none; } -ul.ref-list > li { +ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast { list-style:none; + border-bottom:none; + padding-bottom:0; } -.summary-table dl, .summary-table dl dt, .summary-table dl dd { +table tr td dl, table tr td dl dt, table tr td dl dd { margin-top:0; margin-bottom:1px; } -ul.tag-list, ul.tag-list-long { - padding-left: 0; - list-style: none; -} -ul.tag-list li { - display: inline; -} -ul.tag-list li:not(:last-child):after, -ul.tag-list-long li:not(:last-child):after -{ - content: ", "; - white-space: pre-wrap; -} -ul.preview-feature-list { - list-style: none; - margin:0; - padding:0.1em; - line-height: 1.6em; -} /* * Styles for tables. */ -.summary-table, .details-table { +.overviewSummary, .memberSummary, .typeSummary, .useSummary, .constantsSummary, .deprecatedSummary, +.requiresSummary, .packagesSummary, .providesSummary, .usesSummary { width:100%; border-spacing:0; - border:1px solid var(--border-color); - border-top:0; - padding:0; + border-left:1px solid #EEE; + border-right:1px solid #EEE; + border-bottom:1px solid #EEE; } -.caption { +.overviewSummary, .memberSummary, .requiresSummary, .packagesSummary, .providesSummary, .usesSummary { + padding:0px; +} +.overviewSummary caption, .memberSummary caption, .typeSummary caption, +.useSummary caption, .constantsSummary caption, .deprecatedSummary caption, +.requiresSummary caption, .packagesSummary caption, .providesSummary caption, .usesSummary caption { position:relative; text-align:left; background-repeat:no-repeat; - color:var(--selected-text-color); + color:#253441; + font-weight:bold; clear:none; overflow:hidden; - padding: 10px 0 0 1px; - margin:0; -} -.caption a:link, .caption a:visited { - color:var(--selected-link-color); + padding:0px; + padding-top:10px; + padding-left:1px; + margin:0px; + white-space:pre; +} +.constantsSummary caption a:link, .constantsSummary caption a:visited, +.useSummary caption a:link, .useSummary caption a:visited { + color:#1f389c; +} +.overviewSummary caption a:link, .memberSummary caption a:link, .typeSummary caption a:link, +.deprecatedSummary caption a:link, +.requiresSummary caption a:link, .packagesSummary caption a:link, .providesSummary caption a:link, +.usesSummary caption a:link, +.overviewSummary caption a:hover, .memberSummary caption a:hover, .typeSummary caption a:hover, +.useSummary caption a:hover, .constantsSummary caption a:hover, .deprecatedSummary caption a:hover, +.requiresSummary caption a:hover, .packagesSummary caption a:hover, .providesSummary caption a:hover, +.usesSummary caption a:hover, +.overviewSummary caption a:active, .memberSummary caption a:active, .typeSummary caption a:active, +.useSummary caption a:active, .constantsSummary caption a:active, .deprecatedSummary caption a:active, +.requiresSummary caption a:active, .packagesSummary caption a:active, .providesSummary caption a:active, +.usesSummary caption a:active, +.overviewSummary caption a:visited, .memberSummary caption a:visited, .typeSummary caption a:visited, +.deprecatedSummary caption a:visited, +.requiresSummary caption a:visited, .packagesSummary caption a:visited, .providesSummary caption a:visited, +.usesSummary caption a:visited { + color:#FFFFFF; +} +.overviewSummary caption span, .memberSummary caption span, .typeSummary caption span, +.useSummary caption span, .constantsSummary caption span, .deprecatedSummary caption span, +.requiresSummary caption span, .packagesSummary caption span, .providesSummary caption span, +.usesSummary caption span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + padding-bottom:7px; + display:inline-block; + float:left; + background-color:#F8981D; + border: none; + height:16px; } -.caption a:hover, -.caption a:active { - color:var(--navbar-text-color); +.memberSummary caption span.activeTableTab span, .packagesSummary caption span.activeTableTab span, +.overviewSummary caption span.activeTableTab span, .typeSummary caption span.activeTableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#F8981D; + height:16px; } -.caption span { - font-weight:bold; +.memberSummary caption span.tableTab span, .packagesSummary caption span.tableTab span, +.overviewSummary caption span.tableTab span, .typeSummary caption span.tableTab span { white-space:nowrap; - padding:5px 12px 7px 12px; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; display:inline-block; float:left; - background-color:var(--selected-background-color); - border: none; + background-color:#4D7A97; height:16px; } -div.table-tabs { - padding:10px 0 0 1px; - margin:10px 0 0 0; +.memberSummary caption span.tableTab, .memberSummary caption span.activeTableTab, +.packagesSummary caption span.tableTab, .packagesSummary caption span.activeTableTab, +.overviewSummary caption span.tableTab, .overviewSummary caption span.activeTableTab, +.typeSummary caption span.tableTab, .typeSummary caption span.activeTableTab { + padding-top:0px; + padding-left:0px; + padding-right:0px; + background-image:none; + float:none; + display:inline; } -div.table-tabs > button { - border: none; - cursor: pointer; - padding: 5px 12px 7px 12px; - font-weight: bold; - margin-right: 8px; -} -div.table-tabs > .active-table-tab { - background: var(--selected-background-color); - color: var(--selected-text-color); -} -div.table-tabs > button.table-tab { - background: var(--navbar-background-color); - color: var(--navbar-text-color); -} -.two-column-search-results { - display: grid; - grid-template-columns: minmax(400px, max-content) minmax(400px, auto); -} -div.checkboxes { - line-height: 2em; -} -div.checkboxes > span { - margin-left: 10px; -} -div.checkboxes > label { - margin-left: 8px; - white-space: nowrap; -} -div.checkboxes > label > input { - margin: 0 2px; -} -.two-column-summary { - display: grid; - grid-template-columns: minmax(25%, max-content) minmax(25%, auto); -} -.three-column-summary { - display: grid; - grid-template-columns: minmax(15%, max-content) minmax(20%, max-content) minmax(20%, auto); -} -.three-column-release-summary { - display: grid; - grid-template-columns: minmax(40%, max-content) minmax(10%, max-content) minmax(40%, auto); -} -.four-column-summary { - display: grid; - grid-template-columns: minmax(10%, max-content) minmax(15%, max-content) minmax(15%, max-content) minmax(15%, auto); -} -@media screen and (max-width: 1000px) { - .four-column-summary { - display: grid; - grid-template-columns: minmax(15%, max-content) minmax(15%, auto); - } -} -@media screen and (max-width: 800px) { - .two-column-search-results { - display: grid; - grid-template-columns: minmax(40%, max-content) minmax(40%, auto); - } - .three-column-summary { - display: grid; - grid-template-columns: minmax(10%, max-content) minmax(25%, auto); - } - .three-column-release-summary { - display: grid; - grid-template-columns: minmax(70%, max-content) minmax(30%, max-content) - } - .three-column-summary .col-last, - .three-column-release-summary .col-last{ - grid-column-end: span 2; - } -} -@media screen and (max-width: 600px) { - .two-column-summary { - display: grid; - grid-template-columns: 1fr; - } -} -.summary-table > div, .details-table > div { +.overviewSummary .tabEnd, .memberSummary .tabEnd, .typeSummary .tabEnd, +.useSummary .tabEnd, .constantsSummary .tabEnd, .deprecatedSummary .tabEnd, +.requiresSummary .tabEnd, .packagesSummary .tabEnd, .providesSummary .tabEnd, .usesSummary .tabEnd { + display:none; + width:5px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .activeTableTab .tabEnd, .packagesSummary .activeTableTab .tabEnd, +.overviewSummary .activeTableTab .tabEnd, .typeSummary .activeTableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .tableTab .tabEnd, .packagesSummary .tableTab .tabEnd, +.overviewSummary .tableTab .tabEnd, .typeSummary .tableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + background-color:#4D7A97; + float:left; +} +.rowColor th, .altColor th { + font-weight:normal; +} +.overviewSummary td, .memberSummary td, .typeSummary td, +.useSummary td, .constantsSummary td, .deprecatedSummary td, +.requiresSummary td, .packagesSummary td, .providesSummary td, .usesSummary td { text-align:left; - padding: 8px 3px 3px 7px; - overflow-x: auto; - scrollbar-width: thin; + padding:0px 0px 12px 10px; } -.col-first, .col-second, .col-last, .col-constructor-name, .col-summary-item-name { +th.colFirst, th.colSecond, th.colLast, th.colConstructorName, th.colDeprecatedItemName, .useSummary th, +.constantsSummary th, .packagesSummary th, td.colFirst, td.colSecond, td.colLast, .useSummary td, +.constantsSummary td { vertical-align:top; - padding-right:0; + padding-right:0px; padding-top:8px; padding-bottom:3px; } -.table-header { - background:var(--subnav-background-color); - font-weight: bold; -} -/* Sortable table columns */ -.table-header[onclick] { - cursor: pointer; -} -.table-header[onclick]::after { - content:""; - display:inline-block; - background-image:url('data:image/svg+xml; utf8, \ - \ - '); - background-size:100% 100%; - width:9px; - height:14px; - margin-left:4px; - margin-bottom:-3px; -} -.table-header[onclick].sort-asc::after { - background-image:url('data:image/svg+xml; utf8, \ - \ - \ - '); - +th.colFirst, th.colSecond, th.colLast, th.colConstructorName, th.colDeprecatedItemName, .constantsSummary th, +.packagesSummary th { + background:#dee3e9; + text-align:left; + padding:8px 3px 3px 7px; } -.table-header[onclick].sort-desc::after { - background-image:url('data:image/svg+xml; utf8, \ - \ - \ - '); +td.colFirst, th.colFirst { + font-size:13px; } -.col-first, .col-first { - font-size:0.93em; +td.colSecond, th.colSecond, td.colLast, th.colConstructorName, th.colDeprecatedItemName, th.colLast { + font-size:13px; } -.col-second, .col-second, .col-last, .col-constructor-name, .col-summary-item-name, .col-last { - font-size:0.93em; +.constantsSummary th, .packagesSummary th { + font-size:13px; } -.col-first, .col-second, .col-constructor-name { +.providesSummary th.colFirst, .providesSummary th.colLast, .providesSummary td.colFirst, +.providesSummary td.colLast { + white-space:normal; + font-size:13px; +} +.overviewSummary td.colFirst, .overviewSummary th.colFirst, +.requiresSummary td.colFirst, .requiresSummary th.colFirst, +.packagesSummary td.colFirst, .packagesSummary td.colSecond, .packagesSummary th.colFirst, .packagesSummary th, +.usesSummary td.colFirst, .usesSummary th.colFirst, +.providesSummary td.colFirst, .providesSummary th.colFirst, +.memberSummary td.colFirst, .memberSummary th.colFirst, +.memberSummary td.colSecond, .memberSummary th.colSecond, .memberSummary th.colConstructorName, +.typeSummary td.colFirst, .typeSummary th.colFirst { vertical-align:top; - overflow: auto; } -.col-last { +.packagesSummary th.colLast, .packagesSummary td.colLast { white-space:normal; } -.col-first a:link, .col-first a:visited, -.col-second a:link, .col-second a:visited, -.col-first a:link, .col-first a:visited, -.col-second a:link, .col-second a:visited, -.col-constructor-name a:link, .col-constructor-name a:visited, -.col-summary-item-name a:link, .col-summary-item-name a:visited { +td.colFirst a:link, td.colFirst a:visited, +td.colSecond a:link, td.colSecond a:visited, +th.colFirst a:link, th.colFirst a:visited, +th.colSecond a:link, th.colSecond a:visited, +th.colConstructorName a:link, th.colConstructorName a:visited, +th.colDeprecatedItemName a:link, th.colDeprecatedItemName a:visited, +.constantValuesContainer td a:link, .constantValuesContainer td a:visited, +.allClassesContainer td a:link, .allClassesContainer td a:visited, +.allPackagesContainer td a:link, .allPackagesContainer td a:visited { font-weight:bold; } -.even-row-color, .even-row-color .table-header { - background-color:var(--even-row-color); +.tableSubHeadingColor { + background-color:#EEEEFF; } -.odd-row-color, .odd-row-color .table-header { - background-color:var(--odd-row-color); +.altColor, .altColor th { + background-color:#FFFFFF; +} +.rowColor, .rowColor th { + background-color:#EEEEEF; } /* * Styles for contents. */ +.description pre { + margin-top:0; +} +.deprecatedContent { + margin:0; + padding:10px 0; +} +.docSummary { + padding:0; +} +ul.blockList ul.blockList ul.blockList li.blockList h3 { + font-style:normal; +} div.block { - font-size:var(--body-font-size); - font-family:var(--block-font-family); + font-size:14px; + font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif; } -.col-last div { - padding-top:0; +td.colLast div { + padding-top:0px; } -.col-last a { +td.colLast a { padding-bottom:3px; } -.module-signature, -.package-signature, -.type-signature, -.member-signature { - font-family:var(--code-font-family); - font-size:1em; - margin:14px 0; - white-space: pre-wrap; -} -.module-signature, -.package-signature, -.type-signature { - margin-top: 0; -} -.member-signature .type-parameters-long, -.member-signature .parameters, -.member-signature .exceptions { - display: inline-block; - vertical-align: top; - white-space: pre; -} -.member-signature .type-parameters { - white-space: normal; -} /* * Styles for formatting effect. */ -.source-line-no { - /* Color of line numbers in source pages can be set via custom property below */ - color:var(--source-linenumber-color, green); +.sourceLineNo { + color:green; padding:0 30px 0 0; } +h1.hidden { + visibility:hidden; + overflow:hidden; + font-size:10px; +} .block { display:block; - margin:0 10px 5px 0; - color:var(--block-text-color); + margin:3px 10px 2px 0px; + color:#474747; } -.deprecated-label, .description-from-type-label, .implementation-label, .member-name-link, -.module-label-in-package, .module-label-in-type, .package-label-in-type, -.package-hierarchy-label, .type-name-label, .type-name-link, .search-tag-link, .preview-label { +.deprecatedLabel, .descfrmTypeLabel, .implementationLabel, .memberNameLabel, .memberNameLink, +.moduleLabelInPackage, .moduleLabelInType, .overrideSpecifyLabel, .packageLabelInType, +.packageHierarchyLabel, .paramLabel, .returnLabel, .seeLabel, .simpleTagLabel, +.throwsLabel, .typeNameLabel, .typeNameLink, .searchTagLink { font-weight:bold; } -.deprecation-comment, .help-footnote, .preview-comment { +.deprecationComment, .emphasizedPhrase, .interfaceName { font-style:italic; } -.deprecation-block { - font-size:1em; - font-family:var(--block-font-family); +.deprecationBlock { + font-size:14px; + font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif; border-style:solid; border-width:thin; border-radius:10px; @@ -665,32 +675,67 @@ div.block { margin-right:10px; display:inline-block; } -.preview-block { - font-size:1em; - font-family:var(--block-font-family); - border-style:solid; - border-width:thin; - border-radius:10px; - padding:10px; - margin-bottom:10px; - margin-right:10px; - display:inline-block; -} -div.block div.deprecation-comment { +div.block div.deprecationComment, div.block div.block span.emphasizedPhrase, +div.block div.block span.interfaceName { font-style:normal; } -details.invalid-tag, span.invalid-tag { - font-size:1em; - font-family:var(--block-font-family); - color: var(--invalid-tag-text-color); - background: var(--invalid-tag-background-color); - border: thin solid var(--table-border-color); - border-radius:2px; - padding: 2px 4px; - display:inline-block; +div.contentContainer ul.blockList li.blockList h2 { + padding-bottom:0px; +} +/* + * Styles for IFRAME. + */ +.mainContainer { + margin:0 auto; + padding:0; + height:100%; + width:100%; + position:fixed; + top:0; + left:0; } -details summary { - cursor: pointer; +.leftContainer { + height:100%; + position:fixed; + width:320px; +} +.leftTop { + position:relative; + float:left; + width:315px; + top:0; + left:0; + height:30%; + border-right:6px solid #ccc; + border-bottom:6px solid #ccc; +} +.leftBottom { + position:relative; + float:left; + width:315px; + bottom:0; + left:0; + height:70%; + border-right:6px solid #ccc; + border-top:1px solid #000; +} +.rightContainer { + position:absolute; + left:320px; + top:0; + bottom:0; + height:100%; + right:0; + border-left:1px solid #000; +} +.rightIframe { + margin:0; + padding:0; + height:100%; + right:30px; + width:100%; + overflow:visible; + margin-bottom:30px; } /* * Styles specific to HTML5 elements. @@ -701,321 +746,90 @@ main, nav, header, footer, section { /* * Styles for javadoc search. */ -.ui-state-active { - /* Overrides the color of selection used in jQuery UI */ - background: var(--selected-background-color); - border: 1px solid var(--selected-background-color); - color: var(--selected-text-color); -} .ui-autocomplete-category { font-weight:bold; font-size:15px; padding:7px 0 7px 3px; - background-color:var(--navbar-background-color); - color:var(--navbar-text-color); + background-color:#4D7A97; + color:#FFFFFF; +} +.resultItem { + font-size:13px; } .ui-autocomplete { max-height:85%; max-width:65%; - overflow-y:auto; - overflow-x:auto; - scrollbar-width: thin; + overflow-y:scroll; + overflow-x:scroll; white-space:nowrap; box-shadow: 0 3px 6px rgba(0,0,0,0.16), 0 3px 6px rgba(0,0,0,0.23); } ul.ui-autocomplete { position:fixed; - z-index:1; - background-color: var(--body-background-color); + z-index:999999; + background-color: #FFFFFF; } -ul.ui-autocomplete li { +ul.ui-autocomplete li { float:left; clear:both; - min-width:100%; + width:100%; } -ul.ui-autocomplete li.ui-static-link { - position:sticky; - bottom:0; - left:0; - background: var(--subnav-background-color); - padding: 5px 0; - font-family: var(--body-font-family); - font-size: 0.93em; - font-weight: bolder; - z-index: 2; -} -li.ui-static-link a, li.ui-static-link a:visited { - text-decoration:none; - color:var(--link-color); - float:right; - margin-right:20px; +.resultHighlight { + font-weight:bold; } .ui-autocomplete .result-item { font-size: inherit; } -.ui-autocomplete .result-highlight { - font-weight:bold; -} -#search-input, #page-search-input { +#search { background-image:url('resources/glass.png'); background-size:13px; background-repeat:no-repeat; background-position:2px 3px; - background-color: var(--search-input-background-color); - color: var(--search-input-text-color); - border-color: var(--border-color); padding-left:20px; - width: 250px; - margin: 0; -} -#search-input { - margin-left: 4px; + position:relative; + right:-18px; } -#reset-button { - background-color: transparent; +#reset { + background-color: rgb(255,255,255); background-image:url('resources/x.png'); + background-position:center; background-repeat:no-repeat; - background-size:contain; - border:0; - border-radius:0; - width:12px; - height:12px; - position:absolute; - right:12px; - top:10px; - font-size:0; + background-size:12px; + border:0 none; + width:16px; + height:17px; + position:relative; + left:-4px; + top:-4px; + font-size:0px; } -::placeholder { - color:var(--search-input-placeholder-color); - opacity: 1; +.watermark { + color:#545454; } -.search-tag-desc-result { +.searchTagDescResult { font-style:italic; font-size:11px; } -.search-tag-holder-result { +.searchTagHolderResult { font-style:italic; font-size:12px; } -.search-tag-result:target { - background-color:var(--search-tag-highlight-color); -} -details.page-search-details { - display: inline-block; -} -div#result-container { - font-size: 1em; -} -div#result-container a.search-result-link { - padding: 0; - margin: 4px 0; - width: 100%; -} -#result-container .result-highlight { - font-weight:bolder; -} -.page-search-info { - background-color: var(--subnav-background-color); - border-radius: 3px; - border: 0 solid var(--border-color); - padding: 0 8px; - overflow: hidden; - height: 0; - transition: all 0.2s ease; -} -div.table-tabs > button.table-tab { - background: var(--navbar-background-color); - color: var(--navbar-text-color); +.searchTagResult:before, .searchTagResult:target { + color:red; } -.page-search-header { - padding: 5px 12px 7px 12px; - font-weight: bold; - margin-right: 3px; - background-color:var(--navbar-background-color); - color:var(--navbar-text-color); - display: inline-block; -} -button.page-search-header { - border: none; - cursor: pointer; -} -span#page-search-link { - text-decoration: underline; -} -.module-graph span, .sealed-graph span { +.moduleGraph span { display:none; position:absolute; } -.module-graph:hover span, .sealed-graph:hover span { +.moduleGraph:hover span { display:block; margin: -100px 0 0 100px; z-index: 1; } -.inherited-list { - margin: 10px 0 10px 0; -} -section.class-description { - line-height: 1.4; -} -.summary section[class$="-summary"], .details section[class$="-details"], -.class-uses .detail, .serialized-class-details { - padding: 0 20px 5px 10px; - border: 1px solid var(--border-color); - background-color: var(--section-background-color); -} -.inherited-list, section[class$="-details"] .detail { - padding:0 0 5px 8px; - background-color:var(--detail-background-color); - border:none; -} -.vertical-separator { - padding: 0 5px; -} -ul.help-section-list { - margin: 0; -} -ul.help-subtoc > li { - display: inline-block; - padding-right: 5px; - font-size: smaller; -} -ul.help-subtoc > li::before { - content: "\2022" ; - padding-right:2px; -} -.help-note { - font-style: italic; -} -/* - * Indicator icon for external links. - */ -main a[href*="://"]::after { - content:""; - display:inline-block; - background-image:url('data:image/svg+xml; utf8, \ - \ - \ - '); - background-size:100% 100%; - width:7px; - height:7px; - margin-left:2px; - margin-bottom:4px; -} -main a[href*="://"]:hover::after, -main a[href*="://"]:focus::after { - background-image:url('data:image/svg+xml; utf8, \ - \ - \ - '); -} -/* - * Styles for header/section anchor links - */ -a.anchor-link { - opacity: 0; - transition: opacity 0.1s; -} -:hover > a.anchor-link { - opacity: 80%; -} -a.anchor-link:hover, -a.anchor-link:focus-visible, -a.anchor-link.visible { - opacity: 100%; -} -a.anchor-link > img { - width: 0.9em; - height: 0.9em; -} -/* - * Styles for copy-to-clipboard buttons - */ -button.copy { - opacity: 70%; - border: none; - border-radius: 3px; - position: relative; - background:none; - transition: opacity 0.3s; - cursor: pointer; -} -:hover > button.copy { - opacity: 80%; -} -button.copy:hover, -button.copy:active, -button.copy:focus-visible, -button.copy.visible { - opacity: 100%; -} -button.copy img { - position: relative; - background: none; - filter: brightness(var(--copy-icon-brightness)); -} -button.copy:active { - background-color: var(--copy-button-background-color-active); -} -button.copy span { - color: var(--body-text-color); - position: relative; - top: -0.1em; - transition: all 0.1s; - font-size: 0.76rem; - line-height: 1.2em; - opacity: 0; -} -button.copy:hover span, -button.copy:focus-visible span, -button.copy.visible span { - opacity: 100%; -} -/* search page copy button */ -button#page-search-copy { - margin-left: 0.4em; - padding:0.3em; - top:0.13em; -} -button#page-search-copy img { - width: 1.2em; - height: 1.2em; - padding: 0.01em 0; - top: 0.15em; -} -button#page-search-copy span { - color: var(--body-text-color); - line-height: 1.2em; - padding: 0.2em; - top: -0.18em; -} -div.page-search-info:hover button#page-search-copy span { - opacity: 100%; -} -/* snippet copy button */ -button.snippet-copy { - position: absolute; - top: 6px; - right: 6px; - height: 1.7em; - padding: 2px; -} -button.snippet-copy img { - width: 18px; - height: 18px; - padding: 0.05em 0; -} -button.snippet-copy span { - line-height: 1.2em; - padding: 0.2em; - position: relative; - top: -0.5em; -} -div.snippet-container:hover button.snippet-copy span { - opacity: 100%; +.methodSignature { + white-space:normal; } + /* * Styles for user-provided tables. * @@ -1062,211 +876,35 @@ table.borderless > thead > tr, table.borderless > tbody > tr, table.borderless > } table.plain { border-collapse: collapse; - border: 1px solid var(--table-border-color); + border: 1px solid black; } table.plain > thead > tr, table.plain > tbody tr, table.plain > tr { background-color: transparent; } table.plain > thead > tr > th, table.plain > tbody > tr > th, table.plain > tr > th, table.plain > thead > tr > td, table.plain > tbody > tr > td, table.plain > tr > td { - border: 1px solid var(--table-border-color); + border: 1px solid black; } table.striped { border-collapse: collapse; - border: 1px solid var(--table-border-color); + border: 1px solid black; } table.striped > thead { - background-color: var(--subnav-background-color); + background-color: #E3E3E3; } table.striped > thead > tr > th, table.striped > thead > tr > td { - border: 1px solid var(--table-border-color); + border: 1px solid black; } table.striped > tbody > tr:nth-child(even) { - background-color: var(--odd-row-color) + background-color: #EEE } table.striped > tbody > tr:nth-child(odd) { - background-color: var(--even-row-color) + background-color: #FFF } table.striped > tbody > tr > th, table.striped > tbody > tr > td { - border-left: 1px solid var(--table-border-color); - border-right: 1px solid var(--table-border-color); + border-left: 1px solid black; + border-right: 1px solid black; } table.striped > tbody > tr > th { font-weight: normal; } -/** - * Tweak style for small screens. - */ -@media screen and (max-width: 920px) { - header.flex-header { - max-height: 100vh; - overflow-y: auto; - } - div#navbar-top { - height: 2.8em; - transition: height 0.35s ease; - } - ul.nav-list { - display: block; - width: 40%; - float:left; - clear: left; - margin: 10px 0 0 0; - padding: 0; - } - ul.nav-list li { - float: none; - padding: 6px; - margin-left: 10px; - margin-top: 2px; - } - ul.sub-nav-list-small { - display:block; - height: 100%; - width: 50%; - float: right; - clear: right; - background-color: var(--subnav-background-color); - color: var(--body-text-color); - margin: 6px 0 0 0; - padding: 0; - } - ul.sub-nav-list-small ul { - padding-left: 20px; - } - ul.sub-nav-list-small a:link, ul.sub-nav-list-small a:visited { - color:var(--link-color); - } - ul.sub-nav-list-small a:hover { - color:var(--link-color-active); - } - ul.sub-nav-list-small li { - list-style:none; - float:none; - padding: 6px; - margin-top: 1px; - text-transform:uppercase; - } - ul.sub-nav-list-small > li { - margin-left: 10px; - } - ul.sub-nav-list-small li p { - margin: 5px 0; - } - div#navbar-sub-list { - display: none; - } - .top-nav a:link, .top-nav a:active, .top-nav a:visited { - display: block; - } - button#navbar-toggle-button { - width: 3.4em; - height: 2.8em; - background-color: transparent; - display: block; - float: left; - border: 0; - margin: 0 10px; - cursor: pointer; - font-size: 10px; - } - button#navbar-toggle-button .nav-bar-toggle-icon { - display: block; - width: 24px; - height: 3px; - margin: 1px 0 4px 0; - border-radius: 2px; - transition: all 0.1s; - background-color: var(--navbar-text-color); - } - button#navbar-toggle-button.expanded span.nav-bar-toggle-icon:nth-child(1) { - transform: rotate(45deg); - transform-origin: 10% 10%; - width: 26px; - } - button#navbar-toggle-button.expanded span.nav-bar-toggle-icon:nth-child(2) { - opacity: 0; - } - button#navbar-toggle-button.expanded span.nav-bar-toggle-icon:nth-child(3) { - transform: rotate(-45deg); - transform-origin: 10% 90%; - width: 26px; - } -} -@media screen and (max-width: 800px) { - .about-language { - padding-right: 16px; - } - ul.nav-list li { - margin-left: 5px; - } - ul.sub-nav-list-small > li { - margin-left: 5px; - } - main { - padding: 10px; - } - .summary section[class$="-summary"], .details section[class$="-details"], - .class-uses .detail, .serialized-class-details { - padding: 0 8px 5px 8px; - } - body { - -webkit-text-size-adjust: none; - } -} -@media screen and (max-width: 400px) { - .about-language { - font-size: 10px; - padding-right: 12px; - } -} -@media screen and (max-width: 400px) { - .nav-list-search { - width: 94%; - } - #search-input, #page-search-input { - width: 70%; - } -} -@media screen and (max-width: 320px) { - .nav-list-search > label { - display: none; - } - .nav-list-search { - width: 90%; - } - #search-input, #page-search-input { - width: 80%; - } -} - -pre.snippet { - background-color: var(--snippet-background-color); - color: var(--snippet-text-color); - padding: 10px; - margin: 12px 0; - overflow: auto; - white-space: pre; -} -div.snippet-container { - position: relative; -} -@media screen and (max-width: 800px) { - pre.snippet { - padding-top: 26px; - } - button.snippet-copy { - top: 4px; - right: 4px; - } -} -pre.snippet .italic { - font-style: italic; -} -pre.snippet .bold { - font-weight: bold; -} -pre.snippet .highlighted { - background-color: var(--snippet-highlight-color); - border-radius: 10%; -} diff --git a/docs/java/type-search-index.js b/docs/java/type-search-index.js index c0a46319..373d08da 100644 --- a/docs/java/type-search-index.js +++ b/docs/java/type-search-index.js @@ -1 +1 @@ -typeSearchIndex = [{"l":"All Classes and Interfaces","u":"allclasses-index.html"},{"p":"com.spotify.voyager.jni","l":"Index"},{"p":"com.spotify.voyager.jni.utils","l":"JniLibExtractor"},{"p":"com.spotify.voyager.jni","l":"Index.QueryResults"},{"p":"com.spotify.voyager.jni","l":"StringIndex.QueryResults"},{"p":"com.spotify.voyager.jni","l":"Index.SpaceType"},{"p":"com.spotify.voyager.jni","l":"Index.StorageDataType"},{"p":"com.spotify.voyager.jni","l":"StringIndex"},{"p":"com.spotify.voyager.jni.utils","l":"TinyJson"}];updateSearchResults(); \ No newline at end of file +typeSearchIndex = [{"l":"All Classes","url":"allclasses-index.html"},{"p":"com.spotify.voyager.jni","l":"Index"},{"p":"com.spotify.voyager.jni.utils","l":"JniLibExtractor"},{"p":"com.spotify.voyager.jni","l":"Index.QueryResults"},{"p":"com.spotify.voyager.jni","l":"StringIndex.QueryResults"},{"p":"com.spotify.voyager.jni","l":"Index.SpaceType"},{"p":"com.spotify.voyager.jni","l":"Index.StorageDataType"},{"p":"com.spotify.voyager.jni","l":"StringIndex"},{"p":"com.spotify.voyager.jni.utils","l":"TinyJson"}] \ No newline at end of file From ba06fd2c73e7cae2aba2ebfc6b5d664a4cbfdbec Mon Sep 17 00:00:00 2001 From: Mark Koh Date: Sun, 18 Aug 2024 20:05:56 -0600 Subject: [PATCH 20/25] Add EOF EOL --- cpp/.clang-format | 1 + cpp/src/Enums.h | 2 +- cpp/src/Metadata.h | 2 +- cpp/src/StreamUtils.h | 2 +- cpp/src/array_utils.h | 2 +- cpp/src/std_utils.h | 2 +- cpp/test/doctest_setup.cpp | 2 +- java/JavaInputStream.h | 2 +- java/JavaOutputStream.h | 2 +- python/src/PythonFileLike.h | 2 +- python/src/PythonInputStream.h | 2 +- python/src/PythonOutputStream.h | 2 +- 12 files changed, 12 insertions(+), 11 deletions(-) diff --git a/cpp/.clang-format b/cpp/.clang-format index 3cb6857e..56d41cd4 100644 --- a/cpp/.clang-format +++ b/cpp/.clang-format @@ -1,6 +1,7 @@ --- BasedOnStyle: LLVM IndentWidth: 2 +InsertNewlineAtEOF: true --- Language: Cpp # Use 120 columns since we have big screens now diff --git a/cpp/src/Enums.h b/cpp/src/Enums.h index c9bcaf7d..885dada4 100644 --- a/cpp/src/Enums.h +++ b/cpp/src/Enums.h @@ -57,4 +57,4 @@ std::ostream &operator<<(std::ostream &os, const SpaceType space) { std::ostream &operator<<(std::ostream &os, const StorageDataType sdt) { os << toString(sdt); return os; -} \ No newline at end of file +} diff --git a/cpp/src/Metadata.h b/cpp/src/Metadata.h index 7f460268..1cbab7da 100644 --- a/cpp/src/Metadata.h +++ b/cpp/src/Metadata.h @@ -129,4 +129,4 @@ static std::unique_ptr loadFromStream(std::shared_ptr }; } // namespace Metadata -}; // namespace voyager \ No newline at end of file +}; // namespace voyager diff --git a/cpp/src/StreamUtils.h b/cpp/src/StreamUtils.h index 5f0f68b1..d4d40d25 100644 --- a/cpp/src/StreamUtils.h +++ b/cpp/src/StreamUtils.h @@ -162,4 +162,4 @@ template static void readBinaryPOD(std::shared_ptr in, throw std::runtime_error("Failed to read " + std::to_string(sizeof(T)) + " bytes from stream! Got " + std::to_string(bytesRead) + "."); } -} \ No newline at end of file +} diff --git a/cpp/src/array_utils.h b/cpp/src/array_utils.h index cb8f95cf..0971fd88 100644 --- a/cpp/src/array_utils.h +++ b/cpp/src/array_utils.h @@ -277,4 +277,4 @@ std::string toFloatVectorString(data_t *vec, size_t size) { template > std::string toFloatVectorString(std::vector vec) { return toFloatVectorString(vec.data(), vec.size()); -} \ No newline at end of file +} diff --git a/cpp/src/std_utils.h b/cpp/src/std_utils.h index 54f2a8aa..7dc0110e 100644 --- a/cpp/src/std_utils.h +++ b/cpp/src/std_utils.h @@ -131,4 +131,4 @@ void mergePriorityQueues(std::priority_queue maxElements) dest.pop(); -} \ No newline at end of file +} diff --git a/cpp/test/doctest_setup.cpp b/cpp/test/doctest_setup.cpp index f62715f5..a3f832e4 100644 --- a/cpp/test/doctest_setup.cpp +++ b/cpp/test/doctest_setup.cpp @@ -1,2 +1,2 @@ #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN -#include "doctest.h" \ No newline at end of file +#include "doctest.h" diff --git a/java/JavaInputStream.h b/java/JavaInputStream.h index 2bb2b822..752c6d59 100644 --- a/java/JavaInputStream.h +++ b/java/JavaInputStream.h @@ -131,4 +131,4 @@ class JavaInputStream : public InputStream { jobject inputStream; std::vector peekValue; long long bytesRead = 0; -}; \ No newline at end of file +}; diff --git a/java/JavaOutputStream.h b/java/JavaOutputStream.h index d91db1a3..42b8975e 100644 --- a/java/JavaOutputStream.h +++ b/java/JavaOutputStream.h @@ -80,4 +80,4 @@ class JavaOutputStream : public OutputStream { private: JNIEnv *env; jobject outputStream; -}; \ No newline at end of file +}; diff --git a/python/src/PythonFileLike.h b/python/src/PythonFileLike.h index 613dcd35..7fd2fea6 100644 --- a/python/src/PythonFileLike.h +++ b/python/src/PythonFileLike.h @@ -87,4 +87,4 @@ class PythonFileLike { protected: py::object fileLike; -}; \ No newline at end of file +}; diff --git a/python/src/PythonInputStream.h b/python/src/PythonInputStream.h index 99391a2f..e3ff35ec 100644 --- a/python/src/PythonInputStream.h +++ b/python/src/PythonInputStream.h @@ -189,4 +189,4 @@ class PythonInputStream : public InputStream, PythonFileLike { long long totalLength = -1; std::vector peekValue; bool lastReadWasSmallerThanExpected = false; -}; \ No newline at end of file +}; diff --git a/python/src/PythonOutputStream.h b/python/src/PythonOutputStream.h index 379c0667..8b60256b 100644 --- a/python/src/PythonOutputStream.h +++ b/python/src/PythonOutputStream.h @@ -68,4 +68,4 @@ class PythonOutputStream : public OutputStream, public PythonFileLike { return true; } -}; \ No newline at end of file +}; From 0532250fa8eb1416257c04c2295762e362f69f07 Mon Sep 17 00:00:00 2001 From: Mark Koh Date: Sun, 18 Aug 2024 20:07:34 -0600 Subject: [PATCH 21/25] Revert cpp spacing to 80 for cleaner PR --- cpp/.clang-format | 2 +- cpp/src/E4M3.h | 23 +- cpp/src/Index.h | 24 +- cpp/src/Metadata.h | 26 +- cpp/src/Spaces/Euclidean.h | 52 ++- cpp/src/Spaces/InnerProduct.h | 75 ++-- cpp/src/Spaces/Space.h | 3 +- cpp/src/StreamUtils.h | 37 +- cpp/src/TypedIndex.h | 285 +++++++++------ cpp/src/array_utils.h | 115 +++--- cpp/src/hnswalg.h | 467 ++++++++++++++++--------- cpp/src/hnswlib.h | 11 +- cpp/src/std_utils.h | 22 +- cpp/test/test_main.cpp | 16 +- java/JavaInputStream.h | 39 ++- java/JavaOutputStream.h | 19 +- java/com_spotify_voyager_jni_Index.cpp | 366 ++++++++++++------- java/com_spotify_voyager_jni_Index.h | 111 ++++-- java/thread_pool.h | 7 +- python/bindings.cpp | 287 +++++++++------ python/src/PythonInputStream.h | 40 ++- python/src/PythonOutputStream.h | 8 +- 22 files changed, 1332 insertions(+), 703 deletions(-) diff --git a/cpp/.clang-format b/cpp/.clang-format index 56d41cd4..94f0987d 100644 --- a/cpp/.clang-format +++ b/cpp/.clang-format @@ -5,4 +5,4 @@ InsertNewlineAtEOF: true --- Language: Cpp # Use 120 columns since we have big screens now -ColumnLimit: 120 +ColumnLimit: 80 diff --git a/cpp/src/E4M3.h b/cpp/src/E4M3.h index c4258e92..37f6b523 100644 --- a/cpp/src/E4M3.h +++ b/cpp/src/E4M3.h @@ -294,9 +294,11 @@ class E4M3 { E4M3() : E4M3(0, 0, 0) {} - E4M3(uint8_t sign, uint8_t exponent, uint8_t mantissa) : sign(sign), exponent(exponent), mantissa(mantissa) {} + E4M3(uint8_t sign, uint8_t exponent, uint8_t mantissa) + : sign(sign), exponent(exponent), mantissa(mantissa) {} - E4M3(uint8_t c) : sign(c >> 7), exponent((c >> 3) & 0b1111), mantissa(c & 0b111) {} + E4M3(uint8_t c) + : sign(c >> 7), exponent((c >> 3) & 0b1111), mantissa(c & 0b111) {} E4M3(float input) { if (std::isnan(input) || std::isinf(input)) { @@ -314,11 +316,15 @@ class E4M3 { // TODO: Don't hard-code these, and instead compute them based on the bit // widths above! if (input < -448 || input > 448) { - throw std::domain_error("E4M3 cannot represent values outside of [-448, 448]."); + throw std::domain_error( + "E4M3 cannot represent values outside of [-448, 448]."); } - int originalExponent = ((*((const unsigned int *)&input) & 0b01111111100000000000000000000000) >> 23); - int originalMantissa = (*((const unsigned int *)&input) & 0b00000000011111111111111111111111); + int originalExponent = ((*((const unsigned int *)&input) & + 0b01111111100000000000000000000000) >> + 23); + int originalMantissa = + (*((const unsigned int *)&input) & 0b00000000011111111111111111111111); sign = input < 0; @@ -380,9 +386,10 @@ class E4M3 { if (mantissa == 0b111) { if (exponent == 0b1111) { // Rounding up would push us just outside of the representable range! - throw std::domain_error("E4M3 cannot represent values outside of [-448, " - "448] - tried to convert " + - std::to_string(input) + "."); + throw std::domain_error( + "E4M3 cannot represent values outside of [-448, " + "448] - tried to convert " + + std::to_string(input) + "."); } else { exponent++; mantissa = 0; diff --git a/cpp/src/Index.h b/cpp/src/Index.h index 98c3e267..f6fe581c 100644 --- a/cpp/src/Index.h +++ b/cpp/src/Index.h @@ -62,27 +62,33 @@ class Index { virtual void saveIndex(const std::string &pathToIndex) = 0; virtual void saveIndex(std::shared_ptr outputStream) = 0; - virtual void loadIndex(const std::string &pathToIndex, bool searchOnly = false) = 0; - virtual void loadIndex(std::shared_ptr inputStream, bool searchOnly = false) = 0; + virtual void loadIndex(const std::string &pathToIndex, + bool searchOnly = false) = 0; + virtual void loadIndex(std::shared_ptr inputStream, + bool searchOnly = false) = 0; virtual float getDistance(std::vector a, std::vector b) = 0; - virtual hnswlib::labeltype addItem(std::vector vector, std::optional id) = 0; - virtual std::vector addItems(NDArray input, std::vector ids = {}, - int numThreads = -1) = 0; + virtual hnswlib::labeltype addItem(std::vector vector, + std::optional id) = 0; + virtual std::vector + addItems(NDArray input, std::vector ids = {}, + int numThreads = -1) = 0; virtual std::vector getVector(hnswlib::labeltype id) = 0; virtual NDArray getVectors(std::vector ids) = 0; virtual std::vector getIDs() const = 0; virtual long long getIDsCount() const = 0; - virtual const std::unordered_map &getIDsMap() const = 0; + virtual const std::unordered_map & + getIDsMap() const = 0; - virtual std::tuple, std::vector> query(std::vector queryVector, - int k = 1, long queryEf = -1) = 0; + virtual std::tuple, std::vector> + query(std::vector queryVector, int k = 1, long queryEf = -1) = 0; virtual std::tuple, NDArray> - query(NDArray queryVectors, int k = 1, int numThreads = -1, long queryEf = -1) = 0; + query(NDArray queryVectors, int k = 1, int numThreads = -1, + long queryEf = -1) = 0; virtual void markDeleted(hnswlib::labeltype label) = 0; virtual void unmarkDeleted(hnswlib::labeltype label) = 0; diff --git a/cpp/src/Metadata.h b/cpp/src/Metadata.h index 1cbab7da..feaced34 100644 --- a/cpp/src/Metadata.h +++ b/cpp/src/Metadata.h @@ -33,9 +33,10 @@ namespace Metadata { */ class V1 { public: - V1(int numDimensions, SpaceType spaceType, StorageDataType storageDataType, float maxNorm, - bool useOrderPreservingTransform) - : numDimensions(numDimensions), spaceType(spaceType), storageDataType(storageDataType), maxNorm(maxNorm), + V1(int numDimensions, SpaceType spaceType, StorageDataType storageDataType, + float maxNorm, bool useOrderPreservingTransform) + : numDimensions(numDimensions), spaceType(spaceType), + storageDataType(storageDataType), maxNorm(maxNorm), useOrderPreservingTransform(useOrderPreservingTransform) {} V1() {} @@ -51,12 +52,20 @@ class V1 { float getMaxNorm() { return maxNorm; } - bool getUseOrderPreservingTransform() const { return useOrderPreservingTransform; } - void setUseOrderPreservingTransform(bool newValue) { useOrderPreservingTransform = newValue; } + bool getUseOrderPreservingTransform() const { + return useOrderPreservingTransform; + } + void setUseOrderPreservingTransform(bool newValue) { + useOrderPreservingTransform = newValue; + } - void setNumDimensions(int newNumDimensions) { numDimensions = newNumDimensions; } + void setNumDimensions(int newNumDimensions) { + numDimensions = newNumDimensions; + } - void setStorageDataType(StorageDataType newStorageDataType) { storageDataType = newStorageDataType; } + void setStorageDataType(StorageDataType newStorageDataType) { + storageDataType = newStorageDataType; + } void setSpaceType(SpaceType newSpaceType) { spaceType = newSpaceType; } @@ -89,7 +98,8 @@ class V1 { bool useOrderPreservingTransform; }; -static std::unique_ptr loadFromStream(std::shared_ptr inputStream) { +static std::unique_ptr +loadFromStream(std::shared_ptr inputStream) { uint32_t header = inputStream->peek(); if (header != 'AYOV') { return nullptr; diff --git a/cpp/src/Spaces/Euclidean.h b/cpp/src/Spaces/Euclidean.h index 380467ff..252e413d 100644 --- a/cpp/src/Spaces/Euclidean.h +++ b/cpp/src/Spaces/Euclidean.h @@ -32,8 +32,10 @@ namespace hnswlib { * should automatically do the loop unrolling for us here and vectorize as * appropriate. */ -template > -static dist_t L2Sqr(const data_t *__restrict pVect1, const data_t *__restrict pVect2, const size_t qty) { +template > +static dist_t L2Sqr(const data_t *__restrict pVect1, + const data_t *__restrict pVect2, const size_t qty) { dist_t res = 0; for (size_t i = 0; i < qty / K; i++) { @@ -49,18 +51,22 @@ static dist_t L2Sqr(const data_t *__restrict pVect1, const data_t *__restrict pV return (res * scale * scale); } -template > -static dist_t L2SqrAtLeast(const data_t *__restrict pVect1, const data_t *__restrict pVect2, const size_t qty) { +template > +static dist_t L2SqrAtLeast(const data_t *__restrict pVect1, + const data_t *__restrict pVect2, const size_t qty) { size_t remainder = qty - K; return L2Sqr(pVect1, pVect2, K) + - L2Sqr(pVect1 + K, pVect2 + K, remainder); + L2Sqr(pVect1 + K, pVect2 + K, + remainder); } #if defined(USE_AVX512) // Favor using AVX512 if available. -static float L2SqrSIMD16Ext(const float *pVect1, const float *pVect2, const size_t qty) { +static float L2SqrSIMD16Ext(const float *pVect1, const float *pVect2, + const size_t qty) { float PORTABLE_ALIGN64 TmpRes[16]; size_t qty16 = qty >> 4; @@ -80,8 +86,10 @@ static float L2SqrSIMD16Ext(const float *pVect1, const float *pVect2, const size } _mm512_store_ps(TmpRes, sum); - float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7] + - TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + TmpRes[15]; + float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + + TmpRes[5] + TmpRes[6] + TmpRes[7] + TmpRes[8] + TmpRes[9] + + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + + TmpRes[15]; return (res); } @@ -89,7 +97,8 @@ static float L2SqrSIMD16Ext(const float *pVect1, const float *pVect2, const size #elif defined(USE_AVX) // Favor using AVX if available. -static float L2SqrSIMD16Ext(const float *pVect1, const float *pVect2, const size_t qty) { +static float L2SqrSIMD16Ext(const float *pVect1, const float *pVect2, + const size_t qty) { float PORTABLE_ALIGN32 TmpRes[8]; size_t qty16 = qty >> 4; @@ -115,12 +124,14 @@ static float L2SqrSIMD16Ext(const float *pVect1, const float *pVect2, const size } _mm256_store_ps(TmpRes, sum); - return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7]; + return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + + TmpRes[6] + TmpRes[7]; } #elif defined(USE_SSE) -static float L2SqrSIMD16Ext(const float *pVect1, const float *pVect2, const size_t qty) { +static float L2SqrSIMD16Ext(const float *pVect1, const float *pVect2, + const size_t qty) { float PORTABLE_ALIGN32 TmpRes[8]; size_t qty16 = qty >> 4; @@ -166,18 +177,21 @@ static float L2SqrSIMD16Ext(const float *pVect1, const float *pVect2, const size #endif #if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512) -static float L2SqrSIMD16ExtResiduals(const float *pVect1, const float *pVect2, const size_t qty) { +static float L2SqrSIMD16ExtResiduals(const float *pVect1, const float *pVect2, + const size_t qty) { size_t qty16 = qty >> 4 << 4; float res = L2SqrSIMD16Ext(pVect1, pVect2, qty16); size_t qty_left = qty - qty16; - float res_tail = L2Sqr(pVect1 + qty16, pVect2 + qty16, qty_left); + float res_tail = + L2Sqr(pVect1 + qty16, pVect2 + qty16, qty_left); return (res + res_tail); } #endif #ifdef USE_SSE -static float L2SqrSIMD4Ext(const float *pVect1, const float *pVect2, const size_t qty) { +static float L2SqrSIMD4Ext(const float *pVect1, const float *pVect2, + const size_t qty) { float PORTABLE_ALIGN32 TmpRes[8]; size_t qty4 = qty >> 2; @@ -198,7 +212,8 @@ static float L2SqrSIMD4Ext(const float *pVect1, const float *pVect2, const size_ return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3]; } -static float L2SqrSIMD4ExtResiduals(const float *pVect1, const float *pVect2, const size_t qty) { +static float L2SqrSIMD4ExtResiduals(const float *pVect1, const float *pVect2, + const size_t qty) { size_t qty4 = qty >> 2 << 2; float res = L2SqrSIMD4Ext(pVect1, pVect2, qty4); @@ -210,7 +225,8 @@ static float L2SqrSIMD4ExtResiduals(const float *pVect1, const float *pVect2, co } #endif -template > +template > class EuclideanSpace : public Space { DISTFUNC fstdistfunc_; size_t data_size_; @@ -256,7 +272,9 @@ class EuclideanSpace : public Space { ~EuclideanSpace() {} }; -template <> EuclideanSpace::EuclideanSpace(size_t dim) : data_size_(dim * sizeof(float)), dim_(dim) { +template <> +EuclideanSpace::EuclideanSpace(size_t dim) + : data_size_(dim * sizeof(float)), dim_(dim) { fstdistfunc_ = L2Sqr; #if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512) if (dim % 16 == 0) diff --git a/cpp/src/Spaces/InnerProduct.h b/cpp/src/Spaces/InnerProduct.h index 076c2ca7..5e671f6e 100644 --- a/cpp/src/Spaces/InnerProduct.h +++ b/cpp/src/Spaces/InnerProduct.h @@ -32,8 +32,10 @@ namespace hnswlib { * compiler should automatically do the loop unrolling for us here and vectorize * as appropriate. */ -template > -static dist_t InnerProductWithoutScale(const data_t *pVect1, const data_t *pVect2, size_t qty) { +template > +static dist_t InnerProductWithoutScale(const data_t *pVect1, + const data_t *pVect2, size_t qty) { dist_t res = 0; qty = qty / K; @@ -49,20 +51,28 @@ static dist_t InnerProductWithoutScale(const data_t *pVect1, const data_t *pVect return res; } -template > -static dist_t InnerProduct(const data_t *pVect1, const data_t *pVect2, size_t qty) { - dist_t res = InnerProductWithoutScale(pVect1, pVect2, qty); +template > +static dist_t InnerProduct(const data_t *pVect1, const data_t *pVect2, + size_t qty) { + dist_t res = InnerProductWithoutScale( + pVect1, pVect2, qty); constexpr dist_t scale = (dist_t)scalefactor::num / (dist_t)scalefactor::den; res *= scale * scale; res = (static_cast(1.0f) - res); return res; } -template > -static dist_t InnerProductAtLeast(const data_t *__restrict pVect1, const data_t *__restrict pVect2, const size_t qty) { +template > +static dist_t InnerProductAtLeast(const data_t *__restrict pVect1, + const data_t *__restrict pVect2, + const size_t qty) { size_t remainder = qty - K; - dist_t res = InnerProductWithoutScale(pVect1, pVect2, K) + - InnerProductWithoutScale(pVect1 + K, pVect2 + K, remainder); + dist_t res = InnerProductWithoutScale( + pVect1, pVect2, K) + + InnerProductWithoutScale( + pVect1 + K, pVect2 + K, remainder); constexpr dist_t scale = (dist_t)scalefactor::num / (dist_t)scalefactor::den; res *= scale * scale; res = (static_cast(1.0f) - res); @@ -72,7 +82,8 @@ static dist_t InnerProductAtLeast(const data_t *__restrict pVect1, const data_t #if defined(USE_AVX) // Favor using AVX if available. -static float InnerProductSIMD4Ext(const float *pVect1, const float *pVect2, const size_t qty) { +static float InnerProductSIMD4Ext(const float *pVect1, const float *pVect2, + const size_t qty) { float PORTABLE_ALIGN32 TmpRes[8]; size_t qty16 = qty / 16; @@ -100,7 +111,8 @@ static float InnerProductSIMD4Ext(const float *pVect1, const float *pVect2, cons } __m128 v1, v2; - __m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), _mm256_extractf128_ps(sum256, 1)); + __m128 sum_prod = _mm_add_ps(_mm256_extractf128_ps(sum256, 0), + _mm256_extractf128_ps(sum256, 1)); while (pVect1 < pEnd2) { v1 = _mm_loadu_ps(pVect1); @@ -118,7 +130,8 @@ static float InnerProductSIMD4Ext(const float *pVect1, const float *pVect2, cons #elif defined(USE_SSE) -static float InnerProductSIMD4Ext(const float *pVect1, const float *pVect2, const size_t qty) { +static float InnerProductSIMD4Ext(const float *pVect1, const float *pVect2, + const size_t qty) { float PORTABLE_ALIGN32 TmpRes[8]; size_t qty16 = qty / 16; @@ -174,7 +187,8 @@ static float InnerProductSIMD4Ext(const float *pVect1, const float *pVect2, cons #if defined(USE_AVX512) -static float InnerProductSIMD16Ext(const float *pVect1, const float *pVect2, const size_t qty) { +static float InnerProductSIMD16Ext(const float *pVect1, const float *pVect2, + const size_t qty) { float PORTABLE_ALIGN64 TmpRes[16]; size_t qty16 = qty / 16; @@ -194,15 +208,18 @@ static float InnerProductSIMD16Ext(const float *pVect1, const float *pVect2, con } _mm512_store_ps(TmpRes, sum512); - float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7] + - TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + TmpRes[15]; + float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + + TmpRes[5] + TmpRes[6] + TmpRes[7] + TmpRes[8] + TmpRes[9] + + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + + TmpRes[15]; return 1.0f - sum; } #elif defined(USE_AVX) -static float InnerProductSIMD16Ext(const float *pVect1, const float *pVect2, const size_t qty) { +static float InnerProductSIMD16Ext(const float *pVect1, const float *pVect2, + const size_t qty) { float PORTABLE_ALIGN32 TmpRes[8]; size_t qty16 = qty / 16; @@ -228,14 +245,16 @@ static float InnerProductSIMD16Ext(const float *pVect1, const float *pVect2, con } _mm256_store_ps(TmpRes, sum256); - float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7]; + float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + + TmpRes[5] + TmpRes[6] + TmpRes[7]; return 1.0f - sum; } #elif defined(USE_SSE) -static float InnerProductSIMD16Ext(const float *pVect1, const float *pVect2, const size_t qty) { +static float InnerProductSIMD16Ext(const float *pVect1, const float *pVect2, + const size_t qty) { float PORTABLE_ALIGN32 TmpRes[8]; size_t qty16 = qty / 16; @@ -278,28 +297,35 @@ static float InnerProductSIMD16Ext(const float *pVect1, const float *pVect2, con #endif #if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512) -static float InnerProductSIMD16ExtResiduals(const float *pVect1, const float *pVect2, const size_t qty) { +static float InnerProductSIMD16ExtResiduals(const float *pVect1, + const float *pVect2, + const size_t qty) { size_t qty16 = qty >> 4 << 4; float res = InnerProductSIMD16Ext(pVect1, pVect2, qty16); size_t qty_left = qty - qty16; - float res_tail = InnerProduct(pVect1 + qty16, pVect2 + qty16, qty_left); + float res_tail = + InnerProduct(pVect1 + qty16, pVect2 + qty16, qty_left); return res + res_tail - 1.0f; } -static float InnerProductSIMD4ExtResiduals(const float *pVect1, const float *pVect2, const size_t qty) { +static float InnerProductSIMD4ExtResiduals(const float *pVect1, + const float *pVect2, + const size_t qty) { size_t qty4 = qty >> 2 << 2; float res = InnerProductSIMD4Ext(pVect1, pVect2, qty4); size_t qty_left = qty - qty4; - float res_tail = InnerProduct(pVect1 + qty4, pVect2 + qty4, qty_left); + float res_tail = + InnerProduct(pVect1 + qty4, pVect2 + qty4, qty_left); return res + res_tail - 1.0f; } #endif -template > +template > class InnerProductSpace : public Space { DISTFUNC fstdistfunc_; size_t data_size_; @@ -345,7 +371,8 @@ class InnerProductSpace : public Space { }; template <> -InnerProductSpace::InnerProductSpace(size_t dim) : data_size_(dim * sizeof(float)), dim_(dim) { +InnerProductSpace::InnerProductSpace(size_t dim) + : data_size_(dim * sizeof(float)), dim_(dim) { fstdistfunc_ = InnerProduct; #if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512) if (dim % 16 == 0) diff --git a/cpp/src/Spaces/Space.h b/cpp/src/Spaces/Space.h index 05fd9d9f..672c21a5 100644 --- a/cpp/src/Spaces/Space.h +++ b/cpp/src/Spaces/Space.h @@ -26,7 +26,8 @@ namespace hnswlib { template -using DISTFUNC = std::function; +using DISTFUNC = + std::function; /** * An abstract class representing a type of space to search through, diff --git a/cpp/src/StreamUtils.h b/cpp/src/StreamUtils.h index d4d40d25..d76c2ec4 100644 --- a/cpp/src/StreamUtils.h +++ b/cpp/src/StreamUtils.h @@ -39,7 +39,9 @@ class InputStream { virtual bool isExhausted() = 0; virtual long long getPosition() = 0; virtual bool setPosition(long long position) = 0; - virtual bool advanceBy(long long numBytes) { return setPosition(getPosition() + numBytes); } + virtual bool advanceBy(long long numBytes) { + return setPosition(getPosition() + numBytes); + } virtual uint32_t peek() = 0; }; @@ -62,12 +64,18 @@ class FileInputStream : public InputStream { virtual bool isSeekable() { return isRegularFile; } virtual long long getTotalLength() { return sizeInBytes; } - virtual long long read(char *buffer, long long bytesToRead) { return fread(buffer, 1, bytesToRead, handle); } + virtual long long read(char *buffer, long long bytesToRead) { + return fread(buffer, 1, bytesToRead, handle); + } virtual bool isExhausted() { return feof(handle); } virtual long long getPosition() { return ftell(handle); } - virtual bool setPosition(long long position) { return fseek(handle, position, SEEK_SET) == 0; } - virtual bool advanceBy(long long bytes) { return fseek(handle, bytes, SEEK_CUR) == 0; } + virtual bool setPosition(long long position) { + return fseek(handle, position, SEEK_SET) == 0; + } + virtual bool advanceBy(long long bytes) { + return fseek(handle, bytes, SEEK_CUR) == 0; + } virtual uint32_t peek() { uint32_t result = 0; long long lastPosition = getPosition(); @@ -75,8 +83,10 @@ class FileInputStream : public InputStream { setPosition(lastPosition); return result; } else { - throw std::runtime_error("Failed to peek " + std::to_string(sizeof(result)) + " bytes from file \"" + filename + - "\" at index " + std::to_string(lastPosition) + "."); + throw std::runtime_error( + "Failed to peek " + std::to_string(sizeof(result)) + + " bytes from file \"" + filename + "\" at index " + + std::to_string(lastPosition) + "."); } } @@ -113,7 +123,8 @@ class FileOutputStream : public OutputStream { errno = 0; handle = fopen(filename.c_str(), "wb"); if (!handle) { - throw std::runtime_error("Failed to open file for writing (errno " + std::to_string(errno) + "): " + filename); + throw std::runtime_error("Failed to open file for writing (errno " + + std::to_string(errno) + "): " + filename); } } @@ -150,16 +161,20 @@ class MemoryOutputStream : public OutputStream { std::ostringstream outputStream; }; -template static void writeBinaryPOD(std::shared_ptr out, const T &podRef) { +template +static void writeBinaryPOD(std::shared_ptr out, const T &podRef) { if (!out->write((char *)&podRef, sizeof(T))) { - throw std::runtime_error("Failed to write " + std::to_string(sizeof(T)) + " bytes to stream!"); + throw std::runtime_error("Failed to write " + std::to_string(sizeof(T)) + + " bytes to stream!"); } } -template static void readBinaryPOD(std::shared_ptr in, T &podRef) { +template +static void readBinaryPOD(std::shared_ptr in, T &podRef) { long long bytesRead = in->read((char *)&podRef, sizeof(T)); if (bytesRead != sizeof(T)) { - throw std::runtime_error("Failed to read " + std::to_string(sizeof(T)) + " bytes from stream! Got " + + throw std::runtime_error("Failed to read " + std::to_string(sizeof(T)) + + " bytes from stream! Got " + std::to_string(bytesRead) + "."); } } diff --git a/cpp/src/TypedIndex.h b/cpp/src/TypedIndex.h index 7938f82b..c0f5debc 100644 --- a/cpp/src/TypedIndex.h +++ b/cpp/src/TypedIndex.h @@ -39,15 +39,22 @@ template inline const StorageDataType storageDataType(); template inline const std::string storageDataTypeName(); -template <> const StorageDataType storageDataType() { return StorageDataType::Float8; } -template <> const StorageDataType storageDataType() { return StorageDataType::Float32; } -template <> const StorageDataType storageDataType() { return StorageDataType::E4M3; } +template <> const StorageDataType storageDataType() { + return StorageDataType::Float8; +} +template <> const StorageDataType storageDataType() { + return StorageDataType::Float32; +} +template <> const StorageDataType storageDataType() { + return StorageDataType::E4M3; +} template <> const std::string storageDataTypeName() { return "Float8"; } template <> const std::string storageDataTypeName() { return "Float32"; } template <> const std::string storageDataTypeName() { return "E4M3"; } -template dist_t ensureNotNegative(dist_t distance, hnswlib::labeltype label) { +template +dist_t ensureNotNegative(dist_t distance, hnswlib::labeltype label) { if constexpr (std::is_same_v) { // Allow for a very slight negative distance if using E4M3 if (distance < 0 && distance >= -0.14) { @@ -60,8 +67,10 @@ template dist_t ensureNotNegative(dist_t dist return 0; } - throw std::runtime_error("Potential candidate (with label '" + std::to_string(label) + "') had negative distance " + - std::to_string(distance) + ". This may indicate a corrupted index file."); + throw std::runtime_error( + "Potential candidate (with label '" + std::to_string(label) + + "') had negative distance " + std::to_string(distance) + + ". This may indicate a corrupted index file."); } return distance; @@ -74,7 +83,8 @@ template dist_t ensureNotNegative(dist_t dist * so its interfaces should only include C++ or C datatypes, and * it should avoid unnecessary memory copies if possible. */ -template > +template > class TypedIndex : public Index { private: static const int ser_version = 1; // serialization version @@ -100,27 +110,33 @@ class TypedIndex : public Index { /** * Create an empty index with the given parameters. */ - TypedIndex(const SpaceType space, const int dimensions, const size_t M = 12, const size_t efConstruction = 200, - const size_t randomSeed = 1, const size_t maxElements = 1, + TypedIndex(const SpaceType space, const int dimensions, const size_t M = 12, + const size_t efConstruction = 200, const size_t randomSeed = 1, + const size_t maxElements = 1, const bool enableOrderPreservingTransform = true) : space(space), dimensions(dimensions), - metadata(std::make_unique(dimensions, space, getStorageDataType(), 0.0, - space == InnerProduct)) { + metadata(std::make_unique( + dimensions, space, getStorageDataType(), 0.0, + space == InnerProduct)) { switch (space) { case Euclidean: - spaceImpl = std::make_unique>(dimensions); + spaceImpl = std::make_unique< + hnswlib::EuclideanSpace>(dimensions); break; case InnerProduct: useOrderPreservingTransform = enableOrderPreservingTransform; - spaceImpl = std::make_unique>( + spaceImpl = std::make_unique< + hnswlib::InnerProductSpace>( dimensions + (useOrderPreservingTransform ? 1 : 0)); break; case Cosine: - spaceImpl = std::make_unique>(dimensions); + spaceImpl = std::make_unique< + hnswlib::InnerProductSpace>(dimensions); normalize = true; break; default: - throw new std::runtime_error("Space must be one of Euclidean, InnerProduct, or Cosine."); + throw new std::runtime_error( + "Space must be one of Euclidean, InnerProduct, or Cosine."); } ep_added = true; @@ -129,8 +145,8 @@ class TypedIndex : public Index { defaultEF = 10; currentLabel = 0; - algorithmImpl = std::make_unique>(spaceImpl.get(), maxElements, M, - efConstruction, randomSeed); + algorithmImpl = std::make_unique>( + spaceImpl.get(), maxElements, M, efConstruction, randomSeed); ep_added = false; algorithmImpl->ef_ = defaultEF; @@ -146,12 +162,13 @@ class TypedIndex : public Index { * * This constructor is only used to load a V0-type index from file. */ - TypedIndex(const std::string &indexFilename, const SpaceType space, const int dimensions, bool searchOnly = false) + TypedIndex(const std::string &indexFilename, const SpaceType space, + const int dimensions, bool searchOnly = false) : TypedIndex(space, dimensions, /* M */ 12, /* efConstruction */ 200, /* randomSeed */ 1, /* maxElements */ 1, /* enableOrderPreservingTransform */ false) { - algorithmImpl = - std::make_unique>(spaceImpl.get(), indexFilename, 0, searchOnly); + algorithmImpl = std::make_unique>( + spaceImpl.get(), indexFilename, 0, searchOnly); currentLabel = algorithmImpl->cur_element_count; } @@ -161,13 +178,13 @@ class TypedIndex : public Index { * * This constructor is only used to load a V0-type index from a stream. */ - TypedIndex(std::shared_ptr inputStream, const SpaceType space, const int dimensions, - bool searchOnly = false) + TypedIndex(std::shared_ptr inputStream, const SpaceType space, + const int dimensions, bool searchOnly = false) : TypedIndex(space, dimensions, /* M */ 12, /* efConstruction */ 200, /* randomSeed */ 1, /* maxElements */ 1, /* enableOrderPreservingTransform */ false) { - algorithmImpl = - std::make_unique>(spaceImpl.get(), inputStream, 0, searchOnly); + algorithmImpl = std::make_unique>( + spaceImpl.get(), inputStream, 0, searchOnly); currentLabel = algorithmImpl->cur_element_count; } @@ -175,15 +192,15 @@ class TypedIndex : public Index { * Load an index from the given input stream, interpreting * it as the given Space and number of dimensions. */ - TypedIndex(std::unique_ptr metadata, std::shared_ptr inputStream, - bool searchOnly = false) + TypedIndex(std::unique_ptr metadata, + std::shared_ptr inputStream, bool searchOnly = false) : TypedIndex(metadata->getSpaceType(), metadata->getNumDimensions(), /* M */ 12, /* efConstruction */ 200, /* randomSeed */ 1, /* maxElements */ 1, /* enableOrderPreservingTransform */ metadata->getUseOrderPreservingTransform()) { - algorithmImpl = - std::make_unique>(spaceImpl.get(), inputStream, 0, searchOnly); + algorithmImpl = std::make_unique>( + spaceImpl.get(), inputStream, 0, searchOnly); max_norm = metadata->getMaxNorm(); currentLabel = algorithmImpl->cur_element_count; } @@ -206,9 +223,13 @@ class TypedIndex : public Index { } } - StorageDataType getStorageDataType() const { return storageDataType(); } + StorageDataType getStorageDataType() const { + return storageDataType(); + } - std::string getStorageDataTypeName() const { return storageDataTypeName(); } + std::string getStorageDataTypeName() const { + return storageDataTypeName(); + } void setEF(size_t ef) { defaultEF = ef; @@ -222,7 +243,8 @@ class TypedIndex : public Index { throw std::runtime_error("Not implemented."); } - void loadIndex(std::shared_ptr inputStream, bool searchOnly = false) { + void loadIndex(std::shared_ptr inputStream, + bool searchOnly = false) { throw std::runtime_error("Not implemented."); } @@ -249,11 +271,13 @@ class TypedIndex : public Index { float getDistance(std::vector _a, std::vector _b) { if ((int)_a.size() != dimensions || (int)_b.size() != dimensions) { throw std::runtime_error("Index has " + std::to_string(dimensions) + - " dimensions, but received vectors of size: " + std::to_string(_a.size()) + " and " + + " dimensions, but received vectors of size: " + + std::to_string(_a.size()) + " and " + std::to_string(_b.size()) + "."); } - int actualDimensions = useOrderPreservingTransform ? dimensions + 1 : dimensions; + int actualDimensions = + useOrderPreservingTransform ? dimensions + 1 : dimensions; std::vector a(actualDimensions); std::vector b(actualDimensions); @@ -266,8 +290,10 @@ class TypedIndex : public Index { } if (normalize) { - normalizeVector(_a.data(), a.data(), a.size()); - normalizeVector(_b.data(), b.data(), b.size()); + normalizeVector(_a.data(), a.data(), + a.size()); + normalizeVector(_b.data(), b.data(), + b.size()); } else { floatToDataType(_a.data(), a.data(), a.size()); floatToDataType(_b.data(), b.data(), b.size()); @@ -276,7 +302,8 @@ class TypedIndex : public Index { return spaceImpl->get_dist_func()(a.data(), b.data(), actualDimensions); } - hnswlib::labeltype addItem(std::vector vector, std::optional id) { + hnswlib::labeltype addItem(std::vector vector, + std::optional id) { std::vector ids; if (id) { @@ -286,8 +313,9 @@ class TypedIndex : public Index { return addItems(NDArray(vector, {1, (int)vector.size()}), ids)[0]; } - std::vector addItems(NDArray floatInput, std::vector ids = {}, - int numThreads = -1) { + std::vector + addItems(NDArray floatInput, + std::vector ids = {}, int numThreads = -1) { if (numThreads <= 0) numThreads = numThreadsDefault; @@ -295,9 +323,10 @@ class TypedIndex : public Index { size_t features = std::get<1>(floatInput.shape); if (features != (size_t)dimensions) { - throw std::domain_error("The provided vector(s) have " + std::to_string(features) + - " dimensions, but this index expects vectors with " + std::to_string(dimensions) + - " dimensions."); + throw std::domain_error( + "The provided vector(s) have " + std::to_string(features) + + " dimensions, but this index expects vectors with " + + std::to_string(dimensions) + " dimensions."); } std::vector idsToReturn(rows); @@ -308,9 +337,11 @@ class TypedIndex : public Index { } if (!ids.empty() && (unsigned long)ids.size() != rows) { - throw std::runtime_error(std::to_string(rows) + " vectors were provided, but " + std::to_string(ids.size()) + - " IDs were provided. If providing IDs along with vectors, the number " - "of provided IDs must match the number of vectors."); + throw std::runtime_error( + std::to_string(rows) + " vectors were provided, but " + + std::to_string(ids.size()) + + " IDs were provided. If providing IDs along with vectors, the number " + "of provided IDs must match the number of vectors."); } // TODO: Should we always double the number of elements instead? Maybe use @@ -324,7 +355,8 @@ class TypedIndex : public Index { } } - int actualDimensions = useOrderPreservingTransform ? dimensions + 1 : dimensions; + int actualDimensions = + useOrderPreservingTransform ? dimensions + 1 : dimensions; int start = 0; if (!ep_added) { @@ -333,17 +365,19 @@ class TypedIndex : public Index { std::vector inputVector(actualDimensions); std::vector convertedVector(actualDimensions); - std::memcpy(inputVector.data(), floatInput[0], dimensions * sizeof(float)); + std::memcpy(inputVector.data(), floatInput[0], + dimensions * sizeof(float)); if (useOrderPreservingTransform) { inputVector[dimensions] = getDotFactorAndUpdateNorm(floatInput[0]); } if (normalize) { - normalizeVector(inputVector.data(), convertedVector.data(), - convertedVector.size()); + normalizeVector( + inputVector.data(), convertedVector.data(), convertedVector.size()); } else { - floatToDataType(inputVector.data(), convertedVector.data(), convertedVector.size()); + floatToDataType( + inputVector.data(), convertedVector.data(), convertedVector.size()); } algorithmImpl->addPoint(convertedVector.data(), (size_t)id); @@ -357,13 +391,17 @@ class TypedIndex : public Index { std::vector convertedArray(numThreads * actualDimensions); ParallelFor(start, rows, numThreads, [&](size_t row, size_t threadId) { size_t startIndex = threadId * actualDimensions; - std::memcpy(&inputArray[startIndex], floatInput[row], dimensions * sizeof(float)); + std::memcpy(&inputArray[startIndex], floatInput[row], + dimensions * sizeof(float)); if (useOrderPreservingTransform) { - inputArray[startIndex + dimensions] = getDotFactorAndUpdateNorm(floatInput[row]); + inputArray[startIndex + dimensions] = + getDotFactorAndUpdateNorm(floatInput[row]); } - floatToDataType(&inputArray[startIndex], &convertedArray[startIndex], actualDimensions); + floatToDataType(&inputArray[startIndex], + &convertedArray[startIndex], + actualDimensions); size_t id = ids.size() ? ids.at(row) : (currentLabel + row); try { algorithmImpl->addPoint(convertedArray.data() + startIndex, id); @@ -389,14 +427,17 @@ class TypedIndex : public Index { ParallelFor(start, rows, numThreads, [&](size_t row, size_t threadId) { size_t startIndex = threadId * actualDimensions; - std::memcpy(&inputArray[startIndex], floatInput[row], dimensions * sizeof(float)); + std::memcpy(&inputArray[startIndex], floatInput[row], + dimensions * sizeof(float)); if (useOrderPreservingTransform) { - inputArray[startIndex + dimensions] = getDotFactorAndUpdateNorm(floatInput[row]); + inputArray[startIndex + dimensions] = + getDotFactorAndUpdateNorm(floatInput[row]); } - normalizeVector(&inputArray[startIndex], &normalizedArray[startIndex], - actualDimensions); + normalizeVector( + &inputArray[startIndex], &normalizedArray[startIndex], + actualDimensions); size_t id = ids.size() ? ids.at(row) : (currentLabel + row); try { @@ -427,7 +468,8 @@ class TypedIndex : public Index { dist_t prevMaxNorm = max_norm; // atomically update max_norm when inserting from multiple threads - while (prevMaxNorm < norm && !max_norm.compare_exchange_weak(prevMaxNorm, norm)) { + while (prevMaxNorm < norm && + !max_norm.compare_exchange_weak(prevMaxNorm, norm)) { } return getDotFactor(norm); @@ -443,7 +485,9 @@ class TypedIndex : public Index { return sqrt((max_norm * max_norm) - (norm * norm)); } - std::vector getRawVector(hnswlib::labeltype id) { return algorithmImpl->getDataByLabel(id); } + std::vector getRawVector(hnswlib::labeltype id) { + return algorithmImpl->getDataByLabel(id); + } std::vector getVector(hnswlib::labeltype id) { std::vector rawData = getRawVector(id); @@ -456,7 +500,8 @@ class TypedIndex : public Index { for (unsigned long i = 0; i < ids.size(); i++) { std::vector vector = getVector(ids[i]); - std::copy(vector.begin(), vector.end(), output.data.data() + (i * dimensions)); + std::copy(vector.begin(), vector.end(), + output.data.data() + (i * dimensions)); } return output; @@ -475,12 +520,14 @@ class TypedIndex : public Index { long long getIDsCount() const { return algorithmImpl->label_lookup_.size(); } - const std::unordered_map &getIDsMap() const { + const std::unordered_map & + getIDsMap() const { return algorithmImpl->label_lookup_; } - std::tuple, NDArray> query(NDArray floatQueryVectors, int k = 1, - int numThreads = -1, long queryEf = -1) { + std::tuple, NDArray> + query(NDArray floatQueryVectors, int k = 1, int numThreads = -1, + long queryEf = -1) { if (queryEf > 0 && queryEf < k) { throw std::runtime_error("queryEf must be equal to or greater than the " "requested number of neighbors"); @@ -489,7 +536,8 @@ class TypedIndex : public Index { int numFeatures = std::get<1>(floatQueryVectors.shape); if (numFeatures != dimensions) { - throw std::runtime_error("Query vectors expected to share dimensionality with index."); + throw std::runtime_error( + "Query vectors expected to share dimensionality with index."); } NDArray labels({numRows, k}); @@ -508,7 +556,8 @@ class TypedIndex : public Index { numThreads = 1; } - int actualDimensions = useOrderPreservingTransform ? dimensions + 1 : dimensions; + int actualDimensions = + useOrderPreservingTransform ? dimensions + 1 : dimensions; if (normalize == false) { std::vector inputArray(numThreads * actualDimensions); @@ -519,17 +568,22 @@ class TypedIndex : public Index { // Only copy at most `dimensions` from the input; if we're using // the order-preserving transform, the remaining dimension will be 0 // anyways. - std::memcpy(&inputArray[start_idx], floatQueryVectors[row], dimensions * sizeof(float)); + std::memcpy(&inputArray[start_idx], floatQueryVectors[row], + dimensions * sizeof(float)); - floatToDataType(&inputArray[start_idx], &convertedArray[start_idx], actualDimensions); + floatToDataType(&inputArray[start_idx], + &convertedArray[start_idx], + actualDimensions); std::priority_queue> result = - algorithmImpl->searchKnn((convertedArray.data() + start_idx), k, nullptr, queryEf); + algorithmImpl->searchKnn((convertedArray.data() + start_idx), k, + nullptr, queryEf); if (result.size() != (unsigned long)k) { - throw std::runtime_error("Fewer than expected results were retrieved; only found " + - std::to_string(result.size()) + " of " + std::to_string(k) + - " requested neighbors."); + throw std::runtime_error( + "Fewer than expected results were retrieved; only found " + + std::to_string(result.size()) + " of " + std::to_string(k) + + " requested neighbors."); } for (int i = k - 1; i >= 0; i--) { @@ -552,17 +606,21 @@ class TypedIndex : public Index { // Only copy at most `dimensions` from the input; if we're using // the order-preserving transform, the remaining dimension will be 0 // anyways. - std::memcpy(&inputArray[start_idx], floatQueryVectors[row], dimensions * sizeof(float)); + std::memcpy(&inputArray[start_idx], floatQueryVectors[row], + dimensions * sizeof(float)); - normalizeVector(&inputArray[start_idx], &norm_array[start_idx], actualDimensions); + normalizeVector( + &inputArray[start_idx], &norm_array[start_idx], actualDimensions); std::priority_queue> result = - algorithmImpl->searchKnn(norm_array.data() + start_idx, k, nullptr, queryEf); + algorithmImpl->searchKnn(norm_array.data() + start_idx, k, nullptr, + queryEf); if (result.size() != (unsigned long)k) { - throw std::runtime_error("Fewer than expected results were retrieved; only found " + - std::to_string(result.size()) + " of " + std::to_string(k) + - " requested neighbors."); + throw std::runtime_error( + "Fewer than expected results were retrieved; only found " + + std::to_string(result.size()) + " of " + std::to_string(k) + + " requested neighbors."); } for (int i = k - 1; i >= 0; i--) { @@ -571,7 +629,8 @@ class TypedIndex : public Index { dist_t distance = result_tuple.first; hnswlib::labeltype label = result_tuple.second; - distancePointer[row * k + i] = ensureNotNegative(distance, label); + distancePointer[row * k + i] = + ensureNotNegative(distance, label); labelPointer[row * k + i] = label; result.pop(); } @@ -581,8 +640,8 @@ class TypedIndex : public Index { return {labels, distances}; } - std::tuple, std::vector> query(std::vector floatQueryVector, int k = 1, - long queryEf = -1) { + std::tuple, std::vector> + query(std::vector floatQueryVector, int k = 1, long queryEf = -1) { if (queryEf > 0 && queryEf < k) { throw std::runtime_error("queryEf must be equal to or greater than the " "requested number of neighbors"); @@ -591,7 +650,8 @@ class TypedIndex : public Index { int numFeatures = floatQueryVector.size(); if (numFeatures != dimensions) { - throw std::runtime_error("Query vector expected to share dimensionality with index."); + throw std::runtime_error( + "Query vector expected to share dimensionality with index."); } int actualDimensions = dimensions; @@ -607,14 +667,17 @@ class TypedIndex : public Index { dist_t *distancePointer = distances.data(); if (normalize == false) { - const std::vector queryVector = floatToDataType(floatQueryVector); + const std::vector queryVector = + floatToDataType(floatQueryVector); std::priority_queue> result = algorithmImpl->searchKnn(queryVector.data(), k, nullptr, queryEf); if (result.size() != (unsigned long)k) { - throw std::runtime_error("Fewer than expected results were retrieved; only found " + - std::to_string(result.size()) + " of " + std::to_string(k) + " requested neighbors."); + throw std::runtime_error( + "Fewer than expected results were retrieved; only found " + + std::to_string(result.size()) + " of " + std::to_string(k) + + " requested neighbors."); } for (int i = k - 1; i >= 0; i--) { @@ -625,20 +688,24 @@ class TypedIndex : public Index { } } else { std::vector norm_array(numFeatures); - normalizeVector(floatQueryVector.data(), norm_array.data(), actualDimensions); + normalizeVector( + floatQueryVector.data(), norm_array.data(), actualDimensions); std::priority_queue> result = algorithmImpl->searchKnn(norm_array.data(), k, nullptr, queryEf); if (result.size() != (unsigned long)k) { - throw std::runtime_error("Fewer than expected results were retrieved; only found " + - std::to_string(result.size()) + " of " + std::to_string(k) + " requested neighbors."); + throw std::runtime_error( + "Fewer than expected results were retrieved; only found " + + std::to_string(result.size()) + " of " + std::to_string(k) + + " requested neighbors."); } for (int i = k - 1; i >= 0; i--) { auto &result_tuple = result.top(); - distancePointer[i] = ensureNotNegative(result_tuple.first, result_tuple.second); + distancePointer[i] = ensureNotNegative( + result_tuple.first, result_tuple.second); labelPointer[i] = result_tuple.second; result.pop(); } @@ -647,9 +714,13 @@ class TypedIndex : public Index { return {labels, distances}; } - void markDeleted(hnswlib::labeltype label) { algorithmImpl->markDelete(label); } + void markDeleted(hnswlib::labeltype label) { + algorithmImpl->markDelete(label); + } - void unmarkDeleted(hnswlib::labeltype label) { algorithmImpl->unmarkDelete(label); } + void unmarkDeleted(hnswlib::labeltype label) { + algorithmImpl->unmarkDelete(label); + } void resizeIndex(size_t new_size) { algorithmImpl->resizeIndex(new_size); } @@ -671,35 +742,47 @@ class TypedIndex : public Index { size_t getM() const { return algorithmImpl->M_; } }; -std::unique_ptr loadTypedIndexFromMetadata(std::unique_ptr metadata, - std::shared_ptr inputStream) { +std::unique_ptr +loadTypedIndexFromMetadata(std::unique_ptr metadata, + std::shared_ptr inputStream) { if (!metadata) { - throw std::domain_error("The provided file contains no Voyager parameter metadata. Please " - "specify the number of dimensions, SpaceType, and StorageDataType that " - "this index contains."); - } else if (voyager::Metadata::V1 *v1 = dynamic_cast(metadata.get())) { + throw std::domain_error( + "The provided file contains no Voyager parameter metadata. Please " + "specify the number of dimensions, SpaceType, and StorageDataType that " + "this index contains."); + } else if (voyager::Metadata::V1 *v1 = + dynamic_cast(metadata.get())) { // We have enough information to create a TypedIndex! switch (v1->getStorageDataType()) { case StorageDataType::Float32: return std::make_unique>( - std::unique_ptr((voyager::Metadata::V1 *)metadata.release()), inputStream); + std::unique_ptr( + (voyager::Metadata::V1 *)metadata.release()), + inputStream); break; case StorageDataType::Float8: return std::make_unique>>( - std::unique_ptr((voyager::Metadata::V1 *)metadata.release()), inputStream); + std::unique_ptr( + (voyager::Metadata::V1 *)metadata.release()), + inputStream); break; case StorageDataType::E4M3: return std::make_unique>( - std::unique_ptr((voyager::Metadata::V1 *)metadata.release()), inputStream); + std::unique_ptr( + (voyager::Metadata::V1 *)metadata.release()), + inputStream); break; default: - throw std::domain_error("Unknown storage data type: " + std::to_string((int)v1->getStorageDataType())); + throw std::domain_error("Unknown storage data type: " + + std::to_string((int)v1->getStorageDataType())); } } else { throw std::domain_error("Unknown Voyager metadata format."); } } -std::unique_ptr loadTypedIndexFromStream(std::shared_ptr inputStream) { - return loadTypedIndexFromMetadata(voyager::Metadata::loadFromStream(inputStream), inputStream); +std::unique_ptr +loadTypedIndexFromStream(std::shared_ptr inputStream) { + return loadTypedIndexFromMetadata( + voyager::Metadata::loadFromStream(inputStream), inputStream); } diff --git a/cpp/src/array_utils.h b/cpp/src/array_utils.h index 0971fd88..7c2a7556 100644 --- a/cpp/src/array_utils.h +++ b/cpp/src/array_utils.h @@ -42,13 +42,16 @@ template class NDArray { const std::array strides; NDArray(std::array shape) - : data(std::accumulate(shape.begin(), shape.end(), 1, std::multiplies())), shape(shape), - strides(computeStrides()) {} + : data(std::accumulate(shape.begin(), shape.end(), 1, + std::multiplies())), + shape(shape), strides(computeStrides()) {} - NDArray(std::vector data, std::array shape) : data(data), shape(shape), strides(computeStrides()) {} + NDArray(std::vector data, std::array shape) + : data(data), shape(shape), strides(computeStrides()) {} NDArray(T *inputPointer, std::array shape) - : data(computeNumElements(shape)), shape(shape), strides(computeStrides()) { + : data(computeNumElements(shape)), shape(shape), + strides(computeStrides()) { std::memcpy(data.data(), inputPointer, data.size() * sizeof(T)); } @@ -81,8 +84,9 @@ NDArray floatToDataType(NDArray input) { // Handle rescaling to integer storage values if necessary: if constexpr (std::is_same_v) { if constexpr (scalefactor::num != scalefactor::den) { - throw std::runtime_error("Index has a non-unity scale factor set, but is using float32 data " - "storage. This combination is not yet implemented."); + throw std::runtime_error( + "Index has a non-unity scale factor set, but is using float32 data " + "storage. This combination is not yet implemented."); } return input; @@ -99,10 +103,12 @@ NDArray floatToDataType(NDArray input) { return output; } else { // Re-scale the input values by multiplying by `scalefactor`: - constexpr float lowerBound = - (float)std::numeric_limits::min() * (float)scalefactor::num / (float)scalefactor::den; - constexpr float upperBound = - (float)std::numeric_limits::max() * (float)scalefactor::num / (float)scalefactor::den; + constexpr float lowerBound = (float)std::numeric_limits::min() * + (float)scalefactor::num / + (float)scalefactor::den; + constexpr float upperBound = (float)std::numeric_limits::max() * + (float)scalefactor::num / + (float)scalefactor::den; NDArray output(input.shape); @@ -112,12 +118,15 @@ NDArray floatToDataType(NDArray input) { for (unsigned long i = 0; i < input.data.size(); i++) { if (inputPointer[i] > upperBound || inputPointer[i] < lowerBound) { - throw std::domain_error("One or more vectors contain values outside of [" + std::to_string(lowerBound) + ", " + - std::to_string(upperBound) + "]. Index: " + std::to_string(i) + - ", invalid value: " + std::to_string(inputPointer[i])); + throw std::domain_error( + "One or more vectors contain values outside of [" + + std::to_string(lowerBound) + ", " + std::to_string(upperBound) + + "]. Index: " + std::to_string(i) + + ", invalid value: " + std::to_string(inputPointer[i])); } - outputPointer[i] = (inputPointer[i] * (float)scalefactor::den) / (float)scalefactor::num; + outputPointer[i] = + (inputPointer[i] * (float)scalefactor::den) / (float)scalefactor::num; } return output; @@ -125,12 +134,14 @@ NDArray floatToDataType(NDArray input) { } template > -void floatToDataType(const float *inputPointer, data_t *outputPointer, int dimensions) { +void floatToDataType(const float *inputPointer, data_t *outputPointer, + int dimensions) { // Handle rescaling to integer storage values if necessary: if constexpr (std::is_same_v) { if constexpr (scalefactor::num != scalefactor::den) { - throw std::runtime_error("Index has a non-unity scale factor set, but is using float32 data " - "storage. This combination is not yet implemented."); + throw std::runtime_error( + "Index has a non-unity scale factor set, but is using float32 data " + "storage. This combination is not yet implemented."); } std::memcpy(outputPointer, inputPointer, sizeof(float) * dimensions); @@ -141,22 +152,27 @@ void floatToDataType(const float *inputPointer, data_t *outputPointer, int dimen } } else { // Re-scale the input values by multiplying by `scalefactor`: - constexpr float lowerBound = - (float)std::numeric_limits::min() * (float)scalefactor::num / (float)scalefactor::den; - constexpr float upperBound = - (float)std::numeric_limits::max() * (float)scalefactor::num / (float)scalefactor::den; + constexpr float lowerBound = (float)std::numeric_limits::min() * + (float)scalefactor::num / + (float)scalefactor::den; + constexpr float upperBound = (float)std::numeric_limits::max() * + (float)scalefactor::num / + (float)scalefactor::den; std::vector output(dimensions); // Re-scale the input values by multiplying by `scalefactor`: for (int i = 0; i < dimensions; i++) { if (inputPointer[i] > upperBound || inputPointer[i] < lowerBound) { - throw std::domain_error("One or more vectors contain values outside of [" + std::to_string(lowerBound) + ", " + - std::to_string(upperBound) + "]. Index: " + std::to_string(i) + - ", invalid value: " + std::to_string(inputPointer[i])); + throw std::domain_error( + "One or more vectors contain values outside of [" + + std::to_string(lowerBound) + ", " + std::to_string(upperBound) + + "]. Index: " + std::to_string(i) + + ", invalid value: " + std::to_string(inputPointer[i])); } - outputPointer[i] = (inputPointer[i] * (float)scalefactor::den) / (float)scalefactor::num; + outputPointer[i] = + (inputPointer[i] * (float)scalefactor::den) / (float)scalefactor::num; } } } @@ -165,15 +181,17 @@ template > std::vector floatToDataType(const std::vector input) { if constexpr (std::is_same_v) { if constexpr (scalefactor::num != scalefactor::den) { - throw std::runtime_error("Index has a non-unity scale factor set, but is using float32 data " - "storage. This combination is not yet implemented."); + throw std::runtime_error( + "Index has a non-unity scale factor set, but is using float32 data " + "storage. This combination is not yet implemented."); } return input; } std::vector output(input.size()); - floatToDataType(input.data(), output.data(), input.size()); + floatToDataType(input.data(), output.data(), + input.size()); return output; } @@ -182,8 +200,9 @@ NDArray dataTypeToFloat(NDArray input) { // Handle rescaling to integer storage values if necessary: if constexpr (std::is_same_v) { if constexpr (scalefactor::num != scalefactor::den) { - throw std::runtime_error("Index has a non-unity scale factor set, but is using float32 data " - "storage. This combination is not yet implemented."); + throw std::runtime_error( + "Index has a non-unity scale factor set, but is using float32 data " + "storage. This combination is not yet implemented."); } return input; @@ -195,19 +214,22 @@ NDArray dataTypeToFloat(NDArray input) { float *outputPointer = output.data.data(); for (unsigned long i = 0; i < input.data.size(); i++) { - outputPointer[i] = ((float)inputPointer[i] * (float)scalefactor::num) / (float)scalefactor::den; + outputPointer[i] = ((float)inputPointer[i] * (float)scalefactor::num) / + (float)scalefactor::den; } return output; } } -template > +template > void normalizeVector(const float *data, data_t *norm_array, int dimensions) { dist_t norm = 0.0; for (int i = 0; i < dimensions; i++) { if constexpr (scalefactor::num != scalefactor::den) { - dist_t point = (dist_t)(data[i] * (dist_t)scalefactor::num) / (dist_t)scalefactor::den; + dist_t point = (dist_t)(data[i] * (dist_t)scalefactor::num) / + (dist_t)scalefactor::den; norm += point * point; } else { norm += data[i] * data[i]; @@ -216,7 +238,8 @@ void normalizeVector(const float *data, data_t *norm_array, int dimensions) { norm = 1.0f / (sqrtf(norm) + 1e-30f); for (int i = 0; i < dimensions; i++) { if constexpr (scalefactor::num != scalefactor::den) { - dist_t element = (data[i] * (dist_t)scalefactor::num) / (dist_t)scalefactor::den; + dist_t element = + (data[i] * (dist_t)scalefactor::num) / (dist_t)scalefactor::den; dist_t normalizedElement = element * norm; norm_array[i] = (normalizedElement * scalefactor::den) / scalefactor::num; } else { @@ -226,12 +249,14 @@ void normalizeVector(const float *data, data_t *norm_array, int dimensions) { } } -template > +template > dist_t getNorm(const data_t *data, int dimensions) { dist_t norm = 0.0; for (int i = 0; i < dimensions; i++) { if constexpr (scalefactor::num != scalefactor::den) { - dist_t point = (dist_t)(data[i] * (dist_t)scalefactor::num) / (dist_t)scalefactor::den; + dist_t point = (dist_t)(data[i] * (dist_t)scalefactor::num) / + (dist_t)scalefactor::den; norm += point * point; } else { norm += data[i] * data[i]; @@ -240,12 +265,14 @@ dist_t getNorm(const data_t *data, int dimensions) { return sqrtf(norm); } -template > +template > bool isNormalized(const data_t *data, int dimensions, dist_t maxNorm) { dist_t norm = 0.0; for (int i = 0; i < dimensions; i++) { if constexpr (scalefactor::num != scalefactor::den) { - dist_t point = (dist_t)(data[i] * (dist_t)scalefactor::num) / (dist_t)scalefactor::den; + dist_t point = (dist_t)(data[i] * (dist_t)scalefactor::num) / + (dist_t)scalefactor::den; norm += point * point; } else { norm += data[i] * data[i]; @@ -254,13 +281,15 @@ bool isNormalized(const data_t *data, int dimensions, dist_t maxNorm) { return norm <= maxNorm; } -template > +template > std::string toFloatVectorString(data_t *vec, size_t size) { std::ostringstream ss; ss << "["; for (size_t i = 0; i < size; i++) { if constexpr (scalefactor::num != scalefactor::den) { - float point = (dist_t)(vec[i] * (dist_t)scalefactor::num) / (dist_t)scalefactor::den; + float point = (dist_t)(vec[i] * (dist_t)scalefactor::num) / + (dist_t)scalefactor::den; ss << ((float)point); } else { ss << ((float)vec[i]); @@ -274,7 +303,9 @@ std::string toFloatVectorString(data_t *vec, size_t size) { return ss.str(); } -template > +template > std::string toFloatVectorString(std::vector vec) { - return toFloatVectorString(vec.data(), vec.size()); + return toFloatVectorString(vec.data(), + vec.size()); } diff --git a/cpp/src/hnswalg.h b/cpp/src/hnswalg.h index 80d3606b..ab9b27b4 100644 --- a/cpp/src/hnswalg.h +++ b/cpp/src/hnswalg.h @@ -40,7 +40,8 @@ class IndexCannotBeShrunkError : public std::runtime_error { public: - IndexCannotBeShrunkError(const std::string &what) : std::runtime_error(what) {} + IndexCannotBeShrunkError(const std::string &what) + : std::runtime_error(what) {} }; class IndexFullError : public std::runtime_error { @@ -52,19 +53,22 @@ namespace hnswlib { typedef unsigned int tableint; typedef unsigned int linklistsizeint; -template class HierarchicalNSW : public AlgorithmInterface { +template +class HierarchicalNSW : public AlgorithmInterface { public: static const tableint max_update_element_locks = 65536; - HierarchicalNSW(Space *s, std::shared_ptr inputStream, size_t max_elements = 0, - bool search_only = false) + HierarchicalNSW(Space *s, + std::shared_ptr inputStream, + size_t max_elements = 0, bool search_only = false) : search_only_(search_only) { loadIndex(inputStream, s, max_elements); } - HierarchicalNSW(Space *s, size_t max_elements, size_t M = 16, size_t ef_construction = 200, - size_t random_seed = 100) - : link_list_locks_(max_elements), link_list_update_locks_(max_update_element_locks), + HierarchicalNSW(Space *s, size_t max_elements, size_t M = 16, + size_t ef_construction = 200, size_t random_seed = 100) + : link_list_locks_(max_elements), + link_list_update_locks_(max_update_element_locks), element_levels_(max_elements) { max_elements_ = max_elements; @@ -82,12 +86,14 @@ template class HierarchicalNSW : pub update_probability_generator_.seed(random_seed + 1); size_links_level0_ = maxM0_ * sizeof(tableint) + sizeof(linklistsizeint); - size_data_per_element_ = size_links_level0_ + data_size_ + sizeof(labeltype); + size_data_per_element_ = + size_links_level0_ + data_size_ + sizeof(labeltype); offsetData_ = size_links_level0_; label_offset_ = size_links_level0_ + data_size_; offsetLevel0_ = 0; - data_level0_memory_ = (char *)malloc(max_elements_ * size_data_per_element_); + data_level0_memory_ = + (char *)malloc(max_elements_ * size_data_per_element_); if (data_level0_memory_ == nullptr) throw std::runtime_error("Not enough memory"); @@ -101,15 +107,18 @@ template class HierarchicalNSW : pub linkLists_ = (char **)malloc(sizeof(void *) * max_elements_); if (linkLists_ == nullptr) - throw std::runtime_error("Not enough memory: HierarchicalNSW failed to allocate linklists"); - size_links_per_element_ = maxM_ * sizeof(tableint) + sizeof(linklistsizeint); + throw std::runtime_error( + "Not enough memory: HierarchicalNSW failed to allocate linklists"); + size_links_per_element_ = + maxM_ * sizeof(tableint) + sizeof(linklistsizeint); mult_ = 1 / log(1.0 * M_); revSize_ = 1.0 / mult_; } struct CompareByFirst { - constexpr bool operator()(std::pair const &a, - std::pair const &b) const noexcept { + constexpr bool + operator()(std::pair const &a, + std::pair const &b) const noexcept { return a.first < b.first; } }; @@ -172,21 +181,28 @@ template class HierarchicalNSW : pub inline labeltype getExternalLabel(tableint internal_id) const { labeltype return_label; - memcpy(&return_label, (data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_), + memcpy(&return_label, + (data_level0_memory_ + internal_id * size_data_per_element_ + + label_offset_), sizeof(labeltype)); return return_label; } inline void setExternalLabel(tableint internal_id, labeltype label) const { - memcpy((data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_), &label, sizeof(labeltype)); + memcpy((data_level0_memory_ + internal_id * size_data_per_element_ + + label_offset_), + &label, sizeof(labeltype)); } inline labeltype *getExternalLabeLp(tableint internal_id) const { - return (labeltype *)(data_level0_memory_ + internal_id * size_data_per_element_ + label_offset_); + return (labeltype *)(data_level0_memory_ + + internal_id * size_data_per_element_ + label_offset_); } inline data_t *getDataByInternalId(tableint internal_id) const { - return reinterpret_cast(data_level0_memory_ + internal_id * size_data_per_element_ + offsetData_); + return reinterpret_cast(data_level0_memory_ + + internal_id * size_data_per_element_ + + offsetData_); } int getRandomLevel(double reverse_size) { @@ -195,8 +211,10 @@ template class HierarchicalNSW : pub return (int)r; } - std::priority_queue, std::vector>, CompareByFirst> - searchBaseLayer(tableint ep_id, const data_t *data_point, int layer, VisitedList *vl = nullptr) { + std::priority_queue, + std::vector>, CompareByFirst> + searchBaseLayer(tableint ep_id, const data_t *data_point, int layer, + VisitedList *vl = nullptr) { bool wasPassedVisitedList = vl != nullptr; if (!wasPassedVisitedList) { vl = visited_list_pool_->getFreeVisitedList(); @@ -207,14 +225,19 @@ template class HierarchicalNSW : pub vl_type *visited_array = vl->mass; vl_type visited_array_tag = vl->curV; - std::priority_queue, std::vector>, CompareByFirst> + std::priority_queue, + std::vector>, + CompareByFirst> top_candidates; - std::priority_queue, std::vector>, CompareByFirst> + std::priority_queue, + std::vector>, + CompareByFirst> candidateSet; dist_t lowerBound; if (!isMarkedDeleted(ep_id)) { - dist_t dist = fstdistfunc_(data_point, getDataByInternalId(ep_id), dist_func_param_); + dist_t dist = fstdistfunc_(data_point, getDataByInternalId(ep_id), + dist_func_param_); top_candidates.emplace(dist, ep_id); lowerBound = dist; candidateSet.emplace(-dist, ep_id); @@ -226,7 +249,8 @@ template class HierarchicalNSW : pub while (!candidateSet.empty()) { std::pair curr_el_pair = candidateSet.top(); - if ((-curr_el_pair.first) > lowerBound && top_candidates.size() == ef_construction_) { + if ((-curr_el_pair.first) > lowerBound && + top_candidates.size() == ef_construction_) { break; } candidateSet.pop(); @@ -282,8 +306,10 @@ template class HierarchicalNSW : pub mutable std::atomic metric_hops; template - std::priority_queue, std::vector>, CompareByFirst> - searchBaseLayerST(tableint ep_id, const data_t *data_point, size_t ef, VisitedList *vl = nullptr) const { + std::priority_queue, + std::vector>, CompareByFirst> + searchBaseLayerST(tableint ep_id, const data_t *data_point, size_t ef, + VisitedList *vl = nullptr) const { bool wasPassedVisitedList = vl != nullptr; if (!wasPassedVisitedList) { vl = visited_list_pool_->getFreeVisitedList(); @@ -294,14 +320,19 @@ template class HierarchicalNSW : pub vl_type *visited_array = vl->mass; vl_type visited_array_tag = vl->curV; - std::priority_queue, std::vector>, CompareByFirst> + std::priority_queue, + std::vector>, + CompareByFirst> top_candidates; - std::priority_queue, std::vector>, CompareByFirst> + std::priority_queue, + std::vector>, + CompareByFirst> candidate_set; dist_t lowerBound; if (!has_deletions || !isMarkedDeleted(ep_id)) { - dist_t dist = fstdistfunc_(data_point, getDataByInternalId(ep_id), dist_func_param_); + dist_t dist = fstdistfunc_(data_point, getDataByInternalId(ep_id), + dist_func_param_); lowerBound = dist; top_candidates.emplace(dist, ep_id); candidate_set.emplace(-dist, ep_id); @@ -316,7 +347,8 @@ template class HierarchicalNSW : pub std::pair current_node_pair = candidate_set.top(); - if ((-current_node_pair.first) > lowerBound && (top_candidates.size() == ef || has_deletions == false)) { + if ((-current_node_pair.first) > lowerBound && + (top_candidates.size() == ef || has_deletions == false)) { break; } candidate_set.pop(); @@ -363,8 +395,9 @@ template class HierarchicalNSW : pub } void getNeighborsByHeuristic2( - std::priority_queue, std::vector>, CompareByFirst> - &top_candidates, + std::priority_queue, + std::vector>, + CompareByFirst> &top_candidates, const size_t M) { if (top_candidates.size() < M) { return; @@ -373,7 +406,8 @@ template class HierarchicalNSW : pub std::priority_queue> queue_closest; std::vector> return_list; while (top_candidates.size() > 0) { - queue_closest.emplace(-top_candidates.top().first, top_candidates.top().second); + queue_closest.emplace(-top_candidates.top().first, + top_candidates.top().second); top_candidates.pop(); } @@ -386,7 +420,8 @@ template class HierarchicalNSW : pub bool good = true; for (std::pair second_pair : return_list) { - dist_t curdist = fstdistfunc_(getDataByInternalId(second_pair.second), getDataByInternalId(curent_pair.second), + dist_t curdist = fstdistfunc_(getDataByInternalId(second_pair.second), + getDataByInternalId(curent_pair.second), dist_func_param_); ; if (curdist < dist_to_query) { @@ -405,30 +440,40 @@ template class HierarchicalNSW : pub } linklistsizeint *get_linklist0(tableint internal_id) const { - return (linklistsizeint *)(data_level0_memory_ + internal_id * size_data_per_element_ + offsetLevel0_); + return (linklistsizeint *)(data_level0_memory_ + + internal_id * size_data_per_element_ + + offsetLevel0_); }; - linklistsizeint *get_linklist0(tableint internal_id, char *data_level0_memory_) const { - return (linklistsizeint *)(data_level0_memory_ + internal_id * size_data_per_element_ + offsetLevel0_); + linklistsizeint *get_linklist0(tableint internal_id, + char *data_level0_memory_) const { + return (linklistsizeint *)(data_level0_memory_ + + internal_id * size_data_per_element_ + + offsetLevel0_); }; linklistsizeint *get_linklist(tableint internal_id, int level) const { - return (linklistsizeint *)(linkLists_[internal_id] + (level - 1) * size_links_per_element_); + return (linklistsizeint *)(linkLists_[internal_id] + + (level - 1) * size_links_per_element_); }; - linklistsizeint *get_linklist_at_level(tableint internal_id, int level) const { - return level == 0 ? get_linklist0(internal_id) : get_linklist(internal_id, level); + linklistsizeint *get_linklist_at_level(tableint internal_id, + int level) const { + return level == 0 ? get_linklist0(internal_id) + : get_linklist(internal_id, level); }; tableint mutuallyConnectNewElement( const data_t *data_point, tableint cur_c, - std::priority_queue, std::vector>, CompareByFirst> - &top_candidates, + std::priority_queue, + std::vector>, + CompareByFirst> &top_candidates, int level, bool isUpdate) { size_t Mcurmax = level ? maxM_ : maxM0_; getNeighborsByHeuristic2(top_candidates, M_); if (top_candidates.size() > M_) - throw std::runtime_error("Should be not be more than M_ candidates returned by the heuristic"); + throw std::runtime_error( + "Should be not be more than M_ candidates returned by the heuristic"); std::vector selectedNeighbors; selectedNeighbors.reserve(M_); @@ -447,7 +492,8 @@ template class HierarchicalNSW : pub ll_cur = get_linklist(cur_c, level); if (*ll_cur && !isUpdate) { - throw std::runtime_error("The newly inserted element should have blank link list"); + throw std::runtime_error( + "The newly inserted element should have blank link list"); } setListCount(ll_cur, selectedNeighbors.size()); tableint *data = (tableint *)(ll_cur + 1); @@ -455,7 +501,8 @@ template class HierarchicalNSW : pub if (data[idx] && !isUpdate) throw std::runtime_error("Possible memory corruption"); if (level > element_levels_[selectedNeighbors[idx]]) - throw std::runtime_error("Trying to make a link on a non-existent level"); + throw std::runtime_error( + "Trying to make a link on a non-existent level"); data[idx] = selectedNeighbors[idx]; } @@ -463,7 +510,8 @@ template class HierarchicalNSW : pub for (size_t idx = 0; idx < selectedNeighbors.size(); idx++) { - std::unique_lock lock(link_list_locks_[selectedNeighbors[idx]]); + std::unique_lock lock( + link_list_locks_[selectedNeighbors[idx]]); linklistsizeint *ll_other; if (level == 0) @@ -478,7 +526,8 @@ template class HierarchicalNSW : pub if (selectedNeighbors[idx] == cur_c) throw std::runtime_error("Trying to connect an element to itself"); if (level > element_levels_[selectedNeighbors[idx]]) - throw std::runtime_error("Trying to make a link on a non-existent level"); + throw std::runtime_error( + "Trying to make a link on a non-existent level"); tableint *data = (tableint *)(ll_other + 1); @@ -501,17 +550,22 @@ template class HierarchicalNSW : pub setListCount(ll_other, sz_link_list_other + 1); } else { // finding the "weakest" element to replace it with the new one - dist_t d_max = - fstdistfunc_(getDataByInternalId(cur_c), getDataByInternalId(selectedNeighbors[idx]), dist_func_param_); + dist_t d_max = fstdistfunc_( + getDataByInternalId(cur_c), + getDataByInternalId(selectedNeighbors[idx]), dist_func_param_); // Heuristic: - std::priority_queue, std::vector>, CompareByFirst> + std::priority_queue, + std::vector>, + CompareByFirst> candidates; candidates.emplace(d_max, cur_c); for (size_t j = 0; j < sz_link_list_other; j++) { - candidates.emplace(fstdistfunc_(getDataByInternalId(data[j]), getDataByInternalId(selectedNeighbors[idx]), - dist_func_param_), - data[j]); + candidates.emplace( + fstdistfunc_(getDataByInternalId(data[j]), + getDataByInternalId(selectedNeighbors[idx]), + dist_func_param_), + data[j]); } getNeighborsByHeuristic2(candidates, Mcurmax); @@ -548,13 +602,14 @@ template class HierarchicalNSW : pub void setEf(size_t ef) { ef_ = ef; } - std::priority_queue> searchKnnInternal(data_t *query_data, int k, - VisitedList *vl = nullptr) { + std::priority_queue> + searchKnnInternal(data_t *query_data, int k, VisitedList *vl = nullptr) { std::priority_queue> top_candidates; if (cur_element_count == 0) return top_candidates; tableint currObj = enterpoint_node_; - dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_); + dist_t curdist = fstdistfunc_( + query_data, getDataByInternalId(enterpoint_node_), dist_func_param_); for (size_t level = maxlevel_; level > 0; level--) { bool changed = true; @@ -568,7 +623,8 @@ template class HierarchicalNSW : pub tableint cand = datal[i]; if (cand < 0 || cand > max_elements_) throw std::runtime_error("cand error"); - dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_); + dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), + dist_func_param_); if (d < curdist) { curdist = d; @@ -597,13 +653,15 @@ template class HierarchicalNSW : pub void resizeIndex(size_t new_max_elements) { if (search_only_) - throw std::runtime_error("resizeIndex is not supported in search only mode"); + throw std::runtime_error( + "resizeIndex is not supported in search only mode"); std::unique_lock lock(resizeLock); if (new_max_elements < cur_element_count) - throw IndexCannotBeShrunkError("Cannot resize to " + std::to_string(new_max_elements) + - " elements, as this index already contains " + std::to_string(cur_element_count) + - " elements."); + throw IndexCannotBeShrunkError( + "Cannot resize to " + std::to_string(new_max_elements) + + " elements, as this index already contains " + + std::to_string(cur_element_count) + " elements."); delete visited_list_pool_; visited_list_pool_ = new VisitedListPool(1, new_max_elements); @@ -613,21 +671,27 @@ template class HierarchicalNSW : pub std::vector(new_max_elements).swap(link_list_locks_); // Reallocate base layer - char *data_level0_memory_new = (char *)realloc(data_level0_memory_, new_max_elements * size_data_per_element_); + char *data_level0_memory_new = (char *)realloc( + data_level0_memory_, new_max_elements * size_data_per_element_); if (data_level0_memory_new == nullptr) - throw std::runtime_error("Not enough memory: resizeIndex failed to allocate base layer"); + throw std::runtime_error( + "Not enough memory: resizeIndex failed to allocate base layer"); data_level0_memory_ = data_level0_memory_new; // Reallocate all other layers - char **linkLists_new = (char **)realloc(linkLists_, sizeof(void *) * new_max_elements); + char **linkLists_new = + (char **)realloc(linkLists_, sizeof(void *) * new_max_elements); if (linkLists_new == nullptr) - throw std::runtime_error("Not enough memory: resizeIndex failed to allocate other layers"); + throw std::runtime_error( + "Not enough memory: resizeIndex failed to allocate other layers"); linkLists_ = linkLists_new; max_elements_ = new_max_elements; } - void saveIndex(const std::string &filename) { saveIndex(std::make_shared(filename)); } + void saveIndex(const std::string &filename) { + saveIndex(std::make_shared(filename)); + } void saveIndex(std::shared_ptr output) { writeBinaryPOD(output, offsetLevel0_); @@ -645,17 +709,21 @@ template class HierarchicalNSW : pub writeBinaryPOD(output, mult_); writeBinaryPOD(output, ef_construction_); - output->write(data_level0_memory_, cur_element_count * size_data_per_element_); + output->write(data_level0_memory_, + cur_element_count * size_data_per_element_); for (size_t i = 0; i < cur_element_count; i++) { - unsigned int linkListSize = element_levels_[i] > 0 ? size_links_per_element_ * element_levels_[i] : 0; + unsigned int linkListSize = + element_levels_[i] > 0 ? size_links_per_element_ * element_levels_[i] + : 0; writeBinaryPOD(output, linkListSize); if (linkListSize) output->write(linkLists_[i], linkListSize); } } - void loadIndex(std::shared_ptr inputStream, Space *s, size_t max_elements_i = 0) { + void loadIndex(std::shared_ptr inputStream, + Space *s, size_t max_elements_i = 0) { size_t totalFileSize = 0; if (inputStream->isSeekable()) { totalFileSize = inputStream->getTotalLength(); @@ -664,7 +732,8 @@ template class HierarchicalNSW : pub if (totalFileSize > 0 && offsetLevel0_ > totalFileSize) { throw std::domain_error("Index appears to contain corrupted data; level " "0 offset parameter (" + - std::to_string(offsetLevel0_) + ") exceeded size of index file (" + + std::to_string(offsetLevel0_) + + ") exceeded size of index file (" + std::to_string(totalFileSize) + ")."); } @@ -682,10 +751,12 @@ template class HierarchicalNSW : pub readBinaryPOD(inputStream, enterpoint_node_); if (enterpoint_node_ >= cur_element_count) { - throw std::runtime_error("Index seems to be corrupted or unsupported. " - "Entry point into HNSW data structure was at element index " + - std::to_string(enterpoint_node_) + ", but only " + std::to_string(cur_element_count) + - " elements are present in the index."); + throw std::runtime_error( + "Index seems to be corrupted or unsupported. " + "Entry point into HNSW data structure was at element index " + + std::to_string(enterpoint_node_) + ", but only " + + std::to_string(cur_element_count) + + " elements are present in the index."); } readBinaryPOD(inputStream, maxM_); @@ -698,18 +769,22 @@ template class HierarchicalNSW : pub fstdistfunc_ = s->get_dist_func(); dist_func_param_ = s->get_dist_func_param(); - size_links_per_element_ = maxM_ * sizeof(tableint) + sizeof(linklistsizeint); + size_links_per_element_ = + maxM_ * sizeof(tableint) + sizeof(linklistsizeint); size_links_level0_ = maxM0_ * sizeof(tableint) + sizeof(linklistsizeint); - size_t expected_size_per_element = size_links_level0_ + data_size_ + sizeof(labeltype); + size_t expected_size_per_element = + size_links_level0_ + data_size_ + sizeof(labeltype); if (size_data_per_element_ != expected_size_per_element) { - throw std::domain_error("Storage data type does not match the index data being loaded; " - "expected " + - std::to_string(expected_size_per_element) + - " bytes per element, but loaded data contains " + std::to_string(size_data_per_element_) + - " bytes per element. Data being loaded might not be a Voyager index, " - "may be corrupt, or may be using a different storage data type."); + throw std::domain_error( + "Storage data type does not match the index data being loaded; " + "expected " + + std::to_string(expected_size_per_element) + + " bytes per element, but loaded data contains " + + std::to_string(size_data_per_element_) + + " bytes per element. Data being loaded might not be a Voyager index, " + "may be corrupt, or may be using a different storage data type."); } long long position = inputStream->getPosition(); @@ -717,55 +792,68 @@ template class HierarchicalNSW : pub if (inputStream->isSeekable()) { inputStream->advanceBy(cur_element_count * size_data_per_element_); for (size_t i = 0; i < cur_element_count; i++) { - if (inputStream->getPosition() < 0 || inputStream->getPosition() >= (long long)totalFileSize) { + if (inputStream->getPosition() < 0 || + inputStream->getPosition() >= (long long)totalFileSize) { throw std::runtime_error( "Index seems to be corrupted or unsupported. Seeked to " + - std::to_string(position + (cur_element_count * size_data_per_element_) + (sizeof(unsigned int) * i)) + + std::to_string(position + + (cur_element_count * size_data_per_element_) + + (sizeof(unsigned int) * i)) + " bytes to read linked list, but resulting stream position was " + - std::to_string(inputStream->getPosition()) + " (of total file size " + std::to_string(totalFileSize) + + std::to_string(inputStream->getPosition()) + + " (of total file size " + std::to_string(totalFileSize) + " bytes)."); } unsigned int linkListSize; readBinaryPOD(inputStream, linkListSize); if (linkListSize != 0) { - if ((size_t)inputStream->getPosition() + linkListSize > totalFileSize) { - throw std::runtime_error("Index seems to be corrupted or unsupported. Advancing to the " - "next linked list requires " + - std::to_string(linkListSize) + " additional bytes (from position " + - std::to_string(inputStream->getPosition()) + "), but index data only has " + - std::to_string(totalFileSize) + " bytes in total."); + if ((size_t)inputStream->getPosition() + linkListSize > + totalFileSize) { + throw std::runtime_error( + "Index seems to be corrupted or unsupported. Advancing to the " + "next linked list requires " + + std::to_string(linkListSize) + + " additional bytes (from position " + + std::to_string(inputStream->getPosition()) + + "), but index data only has " + std::to_string(totalFileSize) + + " bytes in total."); } inputStream->advanceBy(linkListSize); } } if (inputStream->getPosition() != (long long)totalFileSize) - throw std::runtime_error("Index seems to be corrupted or unsupported. After reading all " - "linked lists, extra data remained at the end of the index."); + throw std::runtime_error( + "Index seems to be corrupted or unsupported. After reading all " + "linked lists, extra data remained at the end of the index."); inputStream->setPosition(position); } data_level0_memory_ = (char *)malloc(max_elements * size_data_per_element_); if (data_level0_memory_ == nullptr) { - throw std::runtime_error("Not enough memory: loadIndex failed to allocate level0 (" + - std::to_string(max_elements * size_data_per_element_) + " bytes)"); + throw std::runtime_error( + "Not enough memory: loadIndex failed to allocate level0 (" + + std::to_string(max_elements * size_data_per_element_) + " bytes)"); } { size_t bytes_to_read = cur_element_count * size_data_per_element_; size_t bytes_read = inputStream->read(data_level0_memory_, bytes_to_read); if (bytes_read != bytes_to_read) { - throw std::runtime_error("Tried to read " + std::to_string(bytes_to_read) + - " bytes from stream, but only received " + std::to_string(bytes_read) + " bytes!"); + throw std::runtime_error("Tried to read " + + std::to_string(bytes_to_read) + + " bytes from stream, but only received " + + std::to_string(bytes_read) + " bytes!"); } } linkLists_ = (char **)malloc(sizeof(void *) * max_elements); if (linkLists_ == nullptr) - throw std::runtime_error("Not enough memory: loadIndex failed to allocate linklists (" + - std::to_string(sizeof(void *) * max_elements) + " bytes)"); + throw std::runtime_error( + "Not enough memory: loadIndex failed to allocate linklists (" + + std::to_string(sizeof(void *) * max_elements) + " bytes)"); size_t linkListBufferSize = sizeof(void *) * max_elements; std::vector linkListBuffer(linkListBufferSize); @@ -775,7 +863,8 @@ template class HierarchicalNSW : pub while (true) { long long bytes_to_read = linkListBuffer.size() - bytes_read; - long long bytes_read_this_iteration = inputStream->read(linkListBuffer.data() + bytes_read, bytes_to_read); + long long bytes_read_this_iteration = inputStream->read( + linkListBuffer.data() + bytes_read, bytes_to_read); if (bytes_read_this_iteration > 0) { bytes_read += bytes_read_this_iteration; @@ -788,10 +877,11 @@ template class HierarchicalNSW : pub try { linkListBuffer.resize(linkListBuffer.size() * 2); } catch (std::exception const &e) { - throw std::runtime_error("Failed to resize linked list buffer to " - "double its previous size (from " + - std::to_string(linkListBuffer.size()) + " to " + - std::to_string(linkListBuffer.size() * 2) + ")"); + throw std::runtime_error( + "Failed to resize linked list buffer to " + "double its previous size (from " + + std::to_string(linkListBuffer.size()) + " to " + + std::to_string(linkListBuffer.size() * 2) + ")"); } } else { // We've hit the end of the stream (as we read fewer bytes than asked @@ -810,7 +900,8 @@ template class HierarchicalNSW : pub if (!search_only_) { std::vector(max_elements).swap(link_list_locks_); - std::vector(max_update_element_locks).swap(link_list_update_locks_); + std::vector(max_update_element_locks) + .swap(link_list_update_locks_); } visited_list_pool_ = new VisitedListPool(1, max_elements); @@ -836,17 +927,23 @@ template class HierarchicalNSW : pub element_levels_[i] = linkListSize / size_links_per_element_; linkLists_[i] = (char *)malloc(linkListSize); if (linkLists_[i] == nullptr) - throw std::runtime_error("Not enough memory: loadIndex failed to allocate linklist"); + throw std::runtime_error( + "Not enough memory: loadIndex failed to allocate linklist"); - std::memcpy(linkLists_[i], (linkListBuffer.data() + indexInLinkListBuffer), linkListSize); + std::memcpy(linkLists_[i], + (linkListBuffer.data() + indexInLinkListBuffer), + linkListSize); indexInLinkListBuffer += linkListSize; } } - if (enterpoint_node_ > 0 && enterpoint_node_ != (tableint)-1 && !linkLists_[enterpoint_node_]) { - throw std::runtime_error("Index seems to be corrupted or unsupported. " - "Entry point into HNSW data structure was at element index " + - std::to_string(enterpoint_node_) + ", but no linked list was present at that index."); + if (enterpoint_node_ > 0 && enterpoint_node_ != (tableint)-1 && + !linkLists_[enterpoint_node_]) { + throw std::runtime_error( + "Index seems to be corrupted or unsupported. " + "Entry point into HNSW data structure was at element index " + + std::to_string(enterpoint_node_) + + ", but no linked list was present at that index."); } for (size_t i = 0; i < cur_element_count; i++) { @@ -861,12 +958,14 @@ template class HierarchicalNSW : pub std::vector getDataByLabel(labeltype label) const { if (search_only_) - throw std::runtime_error("getDataByLabel is not supported in search only mode"); + throw std::runtime_error( + "getDataByLabel is not supported in search only mode"); tableint label_c; auto search = label_lookup_.find(label); if (search == label_lookup_.end() || isMarkedDeleted(search->second)) { - throw std::runtime_error("Label " + std::to_string(label) + " not found in index."); + throw std::runtime_error("Label " + std::to_string(label) + + " not found in index."); } label_c = search->second; @@ -905,7 +1004,8 @@ template class HierarchicalNSW : pub } internalIDB = search->second; - return fstdistfunc_(getDataByInternalId(internalIDA), getDataByInternalId(internalIDB), dist_func_param_); + return fstdistfunc_(getDataByInternalId(internalIDA), + getDataByInternalId(internalIDB), dist_func_param_); } static const unsigned char DELETE_MARK = 0x01; @@ -917,7 +1017,8 @@ template class HierarchicalNSW : pub */ void markDelete(labeltype label) { if (search_only_) - throw std::runtime_error("markDelete is not supported in search only mode"); + throw std::runtime_error( + "markDelete is not supported in search only mode"); auto search = label_lookup_.find(label); if (search == label_lookup_.end()) { @@ -940,7 +1041,8 @@ template class HierarchicalNSW : pub *ll_cur |= DELETE_MARK; num_deleted_ += 1; } else { - throw std::runtime_error("The requested to delete element is already deleted"); + throw std::runtime_error( + "The requested to delete element is already deleted"); } } @@ -969,7 +1071,8 @@ template class HierarchicalNSW : pub *ll_cur &= ~DELETE_MARK; num_deleted_ -= 1; } else { - throw std::runtime_error("The requested to undelete element is not deleted"); + throw std::runtime_error( + "The requested to undelete element is not deleted"); } } @@ -984,7 +1087,9 @@ template class HierarchicalNSW : pub return *ll_cur & DELETE_MARK; } - unsigned short int getListCount(linklistsizeint *ptr) const { return *((unsigned short int *)ptr); } + unsigned short int getListCount(linklistsizeint *ptr) const { + return *((unsigned short int *)ptr); + } void setListCount(linklistsizeint *ptr, unsigned short int size) const { *((unsigned short int *)(ptr)) = *((unsigned short int *)&size); @@ -997,7 +1102,8 @@ template class HierarchicalNSW : pub addPoint(data_point, label, -1); } - void updatePoint(const data_t *dataPoint, tableint internalId, float updateNeighborProbability) { + void updatePoint(const data_t *dataPoint, tableint internalId, + float updateNeighborProbability) { // update the feature vector associated with existing point with new vector memcpy(getDataByInternalId(internalId), dataPoint, data_size_); @@ -1013,7 +1119,8 @@ template class HierarchicalNSW : pub for (int layer = 0; layer <= elemLevel; layer++) { std::unordered_set sCand; std::unordered_set sNeigh; - std::vector listOneHop = getConnectionsWithLock(internalId, layer); + std::vector listOneHop = + getConnectionsWithLock(internalId, layer); if (listOneHop.size() == 0) continue; @@ -1022,12 +1129,14 @@ template class HierarchicalNSW : pub for (auto &&elOneHop : listOneHop) { sCand.insert(elOneHop); - if (distribution(update_probability_generator_) > updateNeighborProbability) + if (distribution(update_probability_generator_) > + updateNeighborProbability) continue; sNeigh.insert(elOneHop); - std::vector listTwoHop = getConnectionsWithLock(elOneHop, layer); + std::vector listTwoHop = + getConnectionsWithLock(elOneHop, layer); for (auto &&elTwoHop : listTwoHop) { sCand.insert(elTwoHop); } @@ -1037,16 +1146,22 @@ template class HierarchicalNSW : pub // if (neigh == internalId) // continue; - std::priority_queue, std::vector>, CompareByFirst> + std::priority_queue, + std::vector>, + CompareByFirst> candidates; size_t size = - sCand.find(neigh) == sCand.end() ? sCand.size() : sCand.size() - 1; // sCand guaranteed to have size >= 1 + sCand.find(neigh) == sCand.end() + ? sCand.size() + : sCand.size() - 1; // sCand guaranteed to have size >= 1 size_t elementsToKeep = std::min(ef_construction_, size); for (auto &&cand : sCand) { if (cand == neigh) continue; - dist_t distance = fstdistfunc_(getDataByInternalId(neigh), getDataByInternalId(cand), dist_func_param_); + dist_t distance = + fstdistfunc_(getDataByInternalId(neigh), + getDataByInternalId(cand), dist_func_param_); if (candidates.size() < elementsToKeep) { candidates.emplace(distance, cand); } else { @@ -1075,14 +1190,18 @@ template class HierarchicalNSW : pub } } - repairConnectionsForUpdate(dataPoint, entryPointCopy, internalId, elemLevel, maxLevelCopy); + repairConnectionsForUpdate(dataPoint, entryPointCopy, internalId, elemLevel, + maxLevelCopy); }; - void repairConnectionsForUpdate(const data_t *dataPoint, tableint entryPointInternalId, tableint dataPointInternalId, + void repairConnectionsForUpdate(const data_t *dataPoint, + tableint entryPointInternalId, + tableint dataPointInternalId, int dataPointLevel, int maxLevel) { tableint currObj = entryPointInternalId; if (dataPointLevel < maxLevel) { - dist_t curdist = fstdistfunc_(dataPoint, getDataByInternalId(currObj), dist_func_param_); + dist_t curdist = fstdistfunc_(dataPoint, getDataByInternalId(currObj), + dist_func_param_); for (int level = maxLevel; level > dataPointLevel; level--) { bool changed = true; while (changed) { @@ -1094,7 +1213,8 @@ template class HierarchicalNSW : pub tableint *datal = (tableint *)(data + 1); for (int i = 0; i < size; i++) { tableint cand = datal[i]; - dist_t d = fstdistfunc_(dataPoint, getDataByInternalId(cand), dist_func_param_); + dist_t d = fstdistfunc_(dataPoint, getDataByInternalId(cand), + dist_func_param_); if (d < curdist) { curdist = d; currObj = cand; @@ -1106,13 +1226,18 @@ template class HierarchicalNSW : pub } if (dataPointLevel > maxLevel) - throw std::runtime_error("Level of item to be updated cannot be bigger than max level"); + throw std::runtime_error( + "Level of item to be updated cannot be bigger than max level"); for (int level = dataPointLevel; level >= 0; level--) { - std::priority_queue, std::vector>, CompareByFirst> + std::priority_queue, + std::vector>, + CompareByFirst> topCandidates = searchBaseLayer(currObj, dataPoint, level); - std::priority_queue, std::vector>, CompareByFirst> + std::priority_queue, + std::vector>, + CompareByFirst> filteredTopCandidates; while (topCandidates.size() > 0) { if (topCandidates.top().second != dataPointInternalId) @@ -1129,13 +1254,15 @@ template class HierarchicalNSW : pub bool epDeleted = isMarkedDeleted(entryPointInternalId); if (epDeleted) { filteredTopCandidates.emplace( - fstdistfunc_(dataPoint, getDataByInternalId(entryPointInternalId), dist_func_param_), + fstdistfunc_(dataPoint, getDataByInternalId(entryPointInternalId), + dist_func_param_), entryPointInternalId); if (filteredTopCandidates.size() > ef_construction_) filteredTopCandidates.pop(); } - currObj = mutuallyConnectNewElement(dataPoint, dataPointInternalId, filteredTopCandidates, level, true); + currObj = mutuallyConnectNewElement(dataPoint, dataPointInternalId, + filteredTopCandidates, level, true); } } } @@ -1162,8 +1289,8 @@ template class HierarchicalNSW : pub tableint existingInternalId = search->second; templock_curr.unlock(); - std::unique_lock lock_el_update( - link_list_update_locks_[(existingInternalId & (max_update_element_locks - 1))]); + std::unique_lock lock_el_update(link_list_update_locks_[( + existingInternalId & (max_update_element_locks - 1))]); if (isMarkedDeleted(existingInternalId)) { unmarkDeletedInternal(existingInternalId); @@ -1174,11 +1301,13 @@ template class HierarchicalNSW : pub } if (cur_element_count >= max_elements_) { - throw IndexFullError("Cannot insert elements; this index already contains " + - std::to_string(cur_element_count) + " elements, and its maximum size is " + - std::to_string(max_elements_) + - ". Call resizeIndex first to increase the maximum size of the " - "index."); + throw IndexFullError( + "Cannot insert elements; this index already contains " + + std::to_string(cur_element_count) + + " elements, and its maximum size is " + + std::to_string(max_elements_) + + ". Call resizeIndex first to increase the maximum size of the " + "index."); }; cur_c = cur_element_count; @@ -1188,7 +1317,8 @@ template class HierarchicalNSW : pub // Take update lock to prevent race conditions on an element with // insertion/update at the same time. - std::unique_lock lock_el_update(link_list_update_locks_[(cur_c & (max_update_element_locks - 1))]); + std::unique_lock lock_el_update( + link_list_update_locks_[(cur_c & (max_update_element_locks - 1))]); std::unique_lock lock_el(link_list_locks_[cur_c]); int curlevel = getRandomLevel(mult_); if (level > 0) @@ -1203,16 +1333,19 @@ template class HierarchicalNSW : pub tableint currObj = enterpoint_node_; tableint enterpoint_copy = enterpoint_node_; - memset(data_level0_memory_ + cur_c * size_data_per_element_ + offsetLevel0_, 0, size_data_per_element_); + memset(data_level0_memory_ + cur_c * size_data_per_element_ + offsetLevel0_, + 0, size_data_per_element_); // Initialisation of the data and label memcpy(getExternalLabeLp(cur_c), &label, sizeof(labeltype)); memcpy(getDataByInternalId(cur_c), data_point, data_size_); if (curlevel) { - linkLists_[cur_c] = (char *)malloc(size_links_per_element_ * curlevel + 1); + linkLists_[cur_c] = + (char *)malloc(size_links_per_element_ * curlevel + 1); if (linkLists_[cur_c] == nullptr) - throw std::runtime_error("Not enough memory: addPoint failed to allocate linklist"); + throw std::runtime_error( + "Not enough memory: addPoint failed to allocate linklist"); memset(linkLists_[cur_c], 0, size_links_per_element_ * curlevel + 1); } @@ -1220,7 +1353,8 @@ template class HierarchicalNSW : pub if (curlevel < maxlevelcopy) { - dist_t curdist = fstdistfunc_(data_point, getDataByInternalId(currObj), dist_func_param_); + dist_t curdist = fstdistfunc_(data_point, getDataByInternalId(currObj), + dist_func_param_); for (int level = maxlevelcopy; level > curlevel; level--) { bool changed = true; @@ -1236,7 +1370,8 @@ template class HierarchicalNSW : pub tableint cand = datal[i]; if (cand < 0 || cand > max_elements_) throw std::runtime_error("cand error"); - dist_t d = fstdistfunc_(data_point, getDataByInternalId(cand), dist_func_param_); + dist_t d = fstdistfunc_(data_point, getDataByInternalId(cand), + dist_func_param_); if (d < curdist) { curdist = d; currObj = cand; @@ -1252,15 +1387,20 @@ template class HierarchicalNSW : pub if (level > maxlevelcopy || level < 0) // possible? throw std::runtime_error("Level error"); - std::priority_queue, std::vector>, CompareByFirst> + std::priority_queue, + std::vector>, + CompareByFirst> top_candidates = searchBaseLayer(currObj, data_point, level); if (epDeleted) { - top_candidates.emplace(fstdistfunc_(data_point, getDataByInternalId(enterpoint_copy), dist_func_param_), - enterpoint_copy); + top_candidates.emplace( + fstdistfunc_(data_point, getDataByInternalId(enterpoint_copy), + dist_func_param_), + enterpoint_copy); if (top_candidates.size() > ef_construction_) top_candidates.pop(); } - currObj = mutuallyConnectNewElement(data_point, cur_c, top_candidates, level, false); + currObj = mutuallyConnectNewElement(data_point, cur_c, top_candidates, + level, false); } } else { @@ -1277,15 +1417,17 @@ template class HierarchicalNSW : pub return cur_c; }; - std::priority_queue> searchKnn(const data_t *query_data, size_t k, - VisitedList *vl = nullptr, long queryEf = -1) { + std::priority_queue> + searchKnn(const data_t *query_data, size_t k, VisitedList *vl = nullptr, + long queryEf = -1) { std::shared_lock lock(resizeLock); std::priority_queue> result; if (cur_element_count == 0) return result; tableint currObj = enterpoint_node_; - dist_t curdist = fstdistfunc_(query_data, getDataByInternalId(enterpoint_node_), dist_func_param_); + dist_t curdist = fstdistfunc_( + query_data, getDataByInternalId(enterpoint_node_), dist_func_param_); for (int level = maxlevel_; level > 0; level--) { bool changed = true; @@ -1303,7 +1445,8 @@ template class HierarchicalNSW : pub tableint cand = datal[i]; if (cand < 0 || cand > max_elements_) throw std::runtime_error("cand error"); - dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), dist_func_param_); + dist_t d = fstdistfunc_(query_data, getDataByInternalId(cand), + dist_func_param_); if (d < curdist) { curdist = d; @@ -1314,13 +1457,17 @@ template class HierarchicalNSW : pub } } - std::priority_queue, std::vector>, CompareByFirst> + std::priority_queue, + std::vector>, + CompareByFirst> top_candidates; size_t effective_ef = queryEf > 0 ? queryEf : ef_; if (num_deleted_) { - top_candidates = searchBaseLayerST(currObj, query_data, std::max(effective_ef, k), vl); + top_candidates = searchBaseLayerST( + currObj, query_data, std::max(effective_ef, k), vl); } else { - top_candidates = searchBaseLayerST(currObj, query_data, std::max(effective_ef, k), vl); + top_candidates = searchBaseLayerST( + currObj, query_data, std::max(effective_ef, k), vl); } while (top_candidates.size() > k) { @@ -1328,7 +1475,8 @@ template class HierarchicalNSW : pub } while (top_candidates.size() > 0) { std::pair rez = top_candidates.top(); - result.push(std::pair(rez.first, getExternalLabel(rez.second))); + result.push(std::pair(rez.first, + getExternalLabel(rez.second))); top_candidates.pop(); } return result; @@ -1363,7 +1511,8 @@ template class HierarchicalNSW : pub } std::cout << "Min inbound: " << min1 << ", Max inbound:" << max1 << "\n"; } - std::cout << "integrity ok, checked " << connections_checked << " connections\n"; + std::cout << "integrity ok, checked " << connections_checked + << " connections\n"; } }; diff --git a/cpp/src/hnswlib.h b/cpp/src/hnswlib.h index 3a929050..13e21552 100644 --- a/cpp/src/hnswlib.h +++ b/cpp/src/hnswlib.h @@ -75,11 +75,13 @@ template class pairGreater { template class AlgorithmInterface { public: virtual void addPoint(const data_t *datapoint, labeltype label) = 0; - virtual std::priority_queue> searchKnn(const data_t *, size_t, VisitedList *a = nullptr, - long queryEf = -1) = 0; + virtual std::priority_queue> + searchKnn(const data_t *, size_t, VisitedList *a = nullptr, + long queryEf = -1) = 0; // Return k nearest neighbor in the order of closer fist - virtual std::vector> searchKnnCloserFirst(const data_t *query_data, size_t k); + virtual std::vector> + searchKnnCloserFirst(const data_t *query_data, size_t k); virtual void saveIndex(const std::string &location) = 0; virtual ~AlgorithmInterface() {} @@ -87,7 +89,8 @@ template class AlgorithmInterface { template std::vector> -AlgorithmInterface::searchKnnCloserFirst(const data_t *query_data, size_t k) { +AlgorithmInterface::searchKnnCloserFirst( + const data_t *query_data, size_t k) { std::vector> result; // here searchKnn returns the result in the order of further first diff --git a/cpp/src/std_utils.h b/cpp/src/std_utils.h index 7dc0110e..26caf55b 100644 --- a/cpp/src/std_utils.h +++ b/cpp/src/std_utils.h @@ -38,7 +38,9 @@ * * The method is borrowed from nmslib */ -template inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn) { +template +inline void ParallelFor(size_t start, size_t end, size_t numThreads, + Function fn) { if (numThreads <= 0) { numThreads = std::thread::hardware_concurrency(); } @@ -96,9 +98,12 @@ template inline void ParallelFor(size_t start, size_t end, size * This dramatically speeds up filtering of an std::priority_queue, as you no * longer need to modify the queue to iterate over it. */ -template S &GetContainerForQueue(std::priority_queue &q) { +template +S &GetContainerForQueue(std::priority_queue &q) { struct HackedQueue : private std::priority_queue { - static S &Container(std::priority_queue &q) { return q.*&HackedQueue::c; } + static S &Container(std::priority_queue &q) { + return q.*&HackedQueue::c; + } }; return HackedQueue::Container(q); } @@ -110,10 +115,13 @@ template S &GetContainerForQueue(std::priority_queue * IndexID will be added as the second tuple value of each element of the queue. */ template -void mergePriorityQueues(std::priority_queue> &dest, - std::priority_queue> &src, size_t maxElements, indexID_t indexID, - const label_t idMask, const std::set &labels, const dist_t maximumDistance) { - std::vector> &items = GetContainerForQueue(src); +void mergePriorityQueues( + std::priority_queue> &dest, + std::priority_queue> &src, size_t maxElements, + indexID_t indexID, const label_t idMask, const std::set &labels, + const dist_t maximumDistance) { + std::vector> &items = + GetContainerForQueue(src); for (auto i = items.begin(); i != items.end(); i++) { // To avoid copying unnecessarily, only move elements if: // - We don't have maxElements in `dest` yet diff --git a/cpp/test/test_main.cpp b/cpp/test/test_main.cpp index 073dc286..f9cc4e6c 100644 --- a/cpp/test/test_main.cpp +++ b/cpp/test/test_main.cpp @@ -4,8 +4,10 @@ #include #include -template > -void testCombination(TypedIndex &index, SpaceType spaceType, int numDimensions, +template > +void testCombination(TypedIndex &index, + SpaceType spaceType, int numDimensions, StorageDataType storageType) { CHECK(toString(index.getSpace()) == toString(spaceType)); CHECK(index.getNumDimensions() == numDimensions); @@ -13,11 +15,12 @@ void testCombination(TypedIndex &index, SpaceType s } TEST_CASE("Test combinations of different instantiations and sizes") { - std::vector spaceTypesSet = {SpaceType::Euclidean, SpaceType::InnerProduct}; + std::vector spaceTypesSet = {SpaceType::Euclidean, + SpaceType::InnerProduct}; std::vector numDimensionsSet = {4, 16, 128, 1024}; std::vector numElementsSet = {100, 1000, 100000}; - std::vector storageTypesSet = {StorageDataType::Float8, StorageDataType::Float32, - StorageDataType::E4M3}; + std::vector storageTypesSet = { + StorageDataType::Float8, StorageDataType::Float32, StorageDataType::E4M3}; for (auto spaceType : spaceTypesSet) { for (auto numDimensions : numDimensionsSet) { @@ -30,7 +33,8 @@ TEST_CASE("Test combinations of different instantiations and sizes") { CAPTURE(storageType); if (storageType == StorageDataType::Float8) { - auto index = TypedIndex>(spaceType, numDimensions); + auto index = TypedIndex>( + spaceType, numDimensions); testCombination(index, spaceType, numDimensions, storageType); } else if (storageType == StorageDataType::Float32) { auto index = TypedIndex(spaceType, numDimensions); diff --git a/java/JavaInputStream.h b/java/JavaInputStream.h index 752c6d59..2a22e1a6 100644 --- a/java/JavaInputStream.h +++ b/java/JavaInputStream.h @@ -30,7 +30,8 @@ class JavaInputStream : public InputStream { // hundreds of GB at once, which would allocate 2x that amount. static constexpr long long MAX_BUFFER_SIZE = 1024 * 1024 * 100; - JavaInputStream(JNIEnv *env, jobject inputStream) : env(env), inputStream(inputStream) { + JavaInputStream(JNIEnv *env, jobject inputStream) + : env(env), inputStream(inputStream) { jclass inputStreamClass = env->FindClass("java/io/InputStream"); if (!inputStreamClass) { @@ -38,7 +39,8 @@ class JavaInputStream : public InputStream { } if (!env->IsInstanceOf(inputStream, inputStreamClass)) { - throw std::runtime_error("Provided Java object is not a java.io.InputStream!"); + throw std::runtime_error( + "Provided Java object is not a java.io.InputStream!"); } }; @@ -47,7 +49,8 @@ class JavaInputStream : public InputStream { virtual long long getTotalLength() { return -1; } virtual long long read(char *buffer, long long bytesToRead) { - jmethodID readMethod = env->GetMethodID(env->FindClass("java/io/InputStream"), "read", "([BII)I"); + jmethodID readMethod = env->GetMethodID( + env->FindClass("java/io/InputStream"), "read", "([BII)I"); if (!readMethod) { throw std::runtime_error("Native code failed to find " @@ -59,11 +62,14 @@ class JavaInputStream : public InputStream { long long bufferSize = std::min(MAX_BUFFER_SIZE, bytesToRead); jbyteArray byteArray = env->NewByteArray(bufferSize); if (!byteArray) { - throw std::domain_error("Failed to instantiate Java byte array of size: " + std::to_string(bufferSize)); + throw std::domain_error( + "Failed to instantiate Java byte array of size: " + + std::to_string(bufferSize)); } if (peekValue.size()) { - long long bytesToCopy = std::min(bytesToRead, (long long)peekValue.size()); + long long bytesToCopy = + std::min(bytesToRead, (long long)peekValue.size()); std::memcpy(buffer, peekValue.data(), bytesToCopy); for (int i = 0; i < bytesToCopy; i++) peekValue.erase(peekValue.begin()); @@ -72,21 +78,26 @@ class JavaInputStream : public InputStream { } while (bytesRead < bytesToRead) { - int readResult = env->CallIntMethod(inputStream, readMethod, byteArray, 0, - (int)(std::min(bufferSize, bytesToRead - bytesRead))); + int readResult = env->CallIntMethod( + inputStream, readMethod, byteArray, 0, + (int)(std::min(bufferSize, bytesToRead - bytesRead))); if (env->ExceptionCheck()) { return 0; } if (readResult > 0) { if (bytesRead + readResult > bytesToRead) { - throw std::domain_error("java.io.InputStream#read(byte[]) returned " + std::to_string(readResult) + - ", but only " + std::to_string(bytesToRead - bytesRead) + " bytes were required."); + throw std::domain_error("java.io.InputStream#read(byte[]) returned " + + std::to_string(readResult) + ", but only " + + std::to_string(bytesToRead - bytesRead) + + " bytes were required."); } if (readResult > bufferSize) { - throw std::domain_error("java.io.InputStream#read(byte[]) returned " + std::to_string(readResult) + - ", but buffer is only " + std::to_string(bufferSize) + " bytes."); + throw std::domain_error("java.io.InputStream#read(byte[]) returned " + + std::to_string(readResult) + + ", but buffer is only " + + std::to_string(bufferSize) + " bytes."); } env->GetByteArrayRegion(byteArray, 0, readResult, (jbyte *)buffer); bytesRead += readResult; @@ -121,8 +132,10 @@ class JavaInputStream : public InputStream { peekValue.push_back(resultAsCharacters[3]); return result; } else { - throw std::runtime_error("Failed to peek " + std::to_string(sizeof(result)) + - " bytes from JavaInputStream at index " + std::to_string(lastPosition) + "."); + throw std::runtime_error("Failed to peek " + + std::to_string(sizeof(result)) + + " bytes from JavaInputStream at index " + + std::to_string(lastPosition) + "."); } } diff --git a/java/JavaOutputStream.h b/java/JavaOutputStream.h index 42b8975e..5db295ee 100644 --- a/java/JavaOutputStream.h +++ b/java/JavaOutputStream.h @@ -25,19 +25,23 @@ class JavaOutputStream : public OutputStream { static constexpr unsigned long long MAX_BUFFER_SIZE = 1024 * 1024 * 100; public: - JavaOutputStream(JNIEnv *env, jobject outputStream) : env(env), outputStream(outputStream) { + JavaOutputStream(JNIEnv *env, jobject outputStream) + : env(env), outputStream(outputStream) { jclass outputStreamClass = env->FindClass("java/io/OutputStream"); if (!outputStreamClass) { - throw std::runtime_error("Native code failed to find OutputStream class!"); + throw std::runtime_error( + "Native code failed to find OutputStream class!"); } if (!env->IsInstanceOf(outputStream, outputStreamClass)) { - throw std::runtime_error("Provided Java object is not a java.io.OutputStream!"); + throw std::runtime_error( + "Provided Java object is not a java.io.OutputStream!"); } }; virtual void flush() { - jmethodID flushMethod = env->GetMethodID(env->FindClass("java/io/OutputStream"), "flush", "()V"); + jmethodID flushMethod = env->GetMethodID( + env->FindClass("java/io/OutputStream"), "flush", "()V"); env->CallVoidMethod(outputStream, flushMethod); if (env->ExceptionCheck()) { @@ -46,7 +50,8 @@ class JavaOutputStream : public OutputStream { } virtual bool write(const char *ptr, unsigned long long numBytes) { - jmethodID writeMethod = env->GetMethodID(env->FindClass("java/io/OutputStream"), "write", "([B)V"); + jmethodID writeMethod = env->GetMethodID( + env->FindClass("java/io/OutputStream"), "write", "([B)V"); if (!writeMethod) { throw std::runtime_error("Native code failed to find " @@ -58,7 +63,9 @@ class JavaOutputStream : public OutputStream { jbyteArray byteArray = env->NewByteArray(chunkSize); if (!byteArray) { - throw std::domain_error("Failed to instantiate Java byte array of size: " + std::to_string(chunkSize)); + throw std::domain_error( + "Failed to instantiate Java byte array of size: " + + std::to_string(chunkSize)); } env->SetByteArrayRegion(byteArray, 0, chunkSize, (const jbyte *)ptr); diff --git a/java/com_spotify_voyager_jni_Index.cpp b/java/com_spotify_voyager_jni_Index.cpp index 42740a72..a2fdbdc4 100644 --- a/java/com_spotify_voyager_jni_Index.cpp +++ b/java/com_spotify_voyager_jni_Index.cpp @@ -42,7 +42,9 @@ jfieldID getHandleFieldID(JNIEnv *env, jobject obj) { return env->GetFieldID(c, "nativeHandle", "J"); } -template std::shared_ptr getHandle(JNIEnv *env, jobject obj, bool allow_missing = false) { +template +std::shared_ptr getHandle(JNIEnv *env, jobject obj, + bool allow_missing = false) { env->MonitorEnter(obj); jlong handle = env->GetLongField(obj, getHandleFieldID(env, obj)); env->MonitorExit(obj); @@ -53,7 +55,8 @@ template std::shared_ptr getHandle(JNIEnv *env, jobject obj, boo std::shared_ptr *pointer = reinterpret_cast *>(handle); if (!allow_missing && !pointer) { - throw std::runtime_error("This Voyager index has been closed and can no longer be used."); + throw std::runtime_error( + "This Voyager index has been closed and can no longer be used."); } // Return a copy of this shared pointer, thereby ensuring that it remains @@ -64,7 +67,8 @@ template std::shared_ptr getHandle(JNIEnv *env, jobject obj, boo template void setHandle(JNIEnv *env, jobject obj, T *t) { std::shared_ptr *sharedPointerForJava = new std::shared_ptr(t); env->MonitorEnter(obj); - env->SetLongField(obj, getHandleFieldID(env, obj), reinterpret_cast(sharedPointerForJava)); + env->SetLongField(obj, getHandleFieldID(env, obj), + reinterpret_cast(sharedPointerForJava)); env->MonitorExit(obj); } @@ -99,11 +103,13 @@ std::string toString(JNIEnv *env, jstring js) { std::string toString(JNIEnv *env, jobject object) { jclass javaClass = env->GetObjectClass(object); if (javaClass == 0) { - throw std::runtime_error("C++ bindings were unable to get the class for the provided object."); + throw std::runtime_error( + "C++ bindings were unable to get the class for the provided object."); } - return toString( - env, (jstring)env->CallObjectMethod(object, env->GetMethodID(javaClass, "toString", "()Ljava/lang/String;"))); + return toString(env, (jstring)env->CallObjectMethod( + object, env->GetMethodID(javaClass, "toString", + "()Ljava/lang/String;"))); } SpaceType toSpaceType(JNIEnv *env, jobject enumVal) { @@ -117,14 +123,17 @@ SpaceType toSpaceType(JNIEnv *env, jobject enumVal) { } else if (enumValueName == "Cosine") { return SpaceType::Cosine; } else { - throw std::runtime_error("Voyager C++ bindings received unknown enum value \"" + enumValueName + "\"."); + throw std::runtime_error( + "Voyager C++ bindings received unknown enum value \"" + enumValueName + + "\"."); } } jobject toSpaceType(JNIEnv *env, SpaceType enumVal) { jclass enumClass = env->FindClass("com/spotify/voyager/jni/Index$SpaceType"); if (!enumClass) { - throw std::runtime_error("C++ bindings could not find SpaceType Java enum!"); + throw std::runtime_error( + "C++ bindings could not find SpaceType Java enum!"); } const char *enumValueName = nullptr; @@ -140,12 +149,15 @@ jobject toSpaceType(JNIEnv *env, SpaceType enumVal) { enumValueName = "Cosine"; break; default: - throw std::runtime_error("Voyager C++ bindings received unknown enum value."); + throw std::runtime_error( + "Voyager C++ bindings received unknown enum value."); } - jfieldID fieldID = env->GetStaticFieldID(enumClass, enumValueName, "Lcom/spotify/voyager/jni/Index$SpaceType;"); + jfieldID fieldID = env->GetStaticFieldID( + enumClass, enumValueName, "Lcom/spotify/voyager/jni/Index$SpaceType;"); if (!fieldID) { - throw std::runtime_error("C++ bindings could not find value in SpaceType Java enum!"); + throw std::runtime_error( + "C++ bindings could not find value in SpaceType Java enum!"); } jobject javaValue = env->GetStaticObjectField(enumClass, fieldID); @@ -168,15 +180,19 @@ StorageDataType toStorageDataType(JNIEnv *env, jobject enumVal) { } else if (enumValueName == "E4M3") { return StorageDataType::E4M3; } else { - throw std::runtime_error("Voyager C++ bindings received unknown enum value \"" + enumValueName + "\"."); + throw std::runtime_error( + "Voyager C++ bindings received unknown enum value \"" + enumValueName + + "\"."); } } jobject toStorageDataType(JNIEnv *env, StorageDataType enumVal) { - jclass enumClass = env->FindClass("com/spotify/voyager/jni/Index$StorageDataType"); + jclass enumClass = + env->FindClass("com/spotify/voyager/jni/Index$StorageDataType"); if (!enumClass) { - throw std::runtime_error("C++ bindings could not find StorageDataType Java enum!"); + throw std::runtime_error( + "C++ bindings could not find StorageDataType Java enum!"); } const char *enumValueName = nullptr; @@ -192,12 +208,16 @@ jobject toStorageDataType(JNIEnv *env, StorageDataType enumVal) { enumValueName = "E4M3"; break; default: - throw std::runtime_error("Voyager C++ bindings received unknown enum value."); + throw std::runtime_error( + "Voyager C++ bindings received unknown enum value."); } - jfieldID fieldID = env->GetStaticFieldID(enumClass, enumValueName, "Lcom/spotify/voyager/jni/Index$StorageDataType;"); + jfieldID fieldID = + env->GetStaticFieldID(enumClass, enumValueName, + "Lcom/spotify/voyager/jni/Index$StorageDataType;"); if (!fieldID) { - throw std::runtime_error("C++ bindings could not find value in StorageDataType Java enum!"); + throw std::runtime_error( + "C++ bindings could not find value in StorageDataType Java enum!"); } jobject javaValue = env->GetStaticObjectField(enumClass, fieldID); @@ -271,7 +291,8 @@ std::vector toStdVector(JNIEnv *env, jfloatArray floatArray) { */ jfloatArray toFloatArray(JNIEnv *env, std::vector floatArray) { jfloatArray returnArray = env->NewFloatArray(floatArray.size()); - env->SetFloatArrayRegion(returnArray, 0, floatArray.size(), floatArray.data()); + env->SetFloatArrayRegion(returnArray, 0, floatArray.size(), + floatArray.data()); return returnArray; } @@ -289,27 +310,30 @@ std::vector toUnsignedStdVector(JNIEnv *env, jlongArray longArray) { //////////////////////////////////////////////////////////////////////////////////////////////////// // Index Construction and Indexing //////////////////////////////////////////////////////////////////////////////////////////////////// -void Java_com_spotify_voyager_jni_Index_nativeConstructor(JNIEnv *env, jobject self, jobject spaceType, - jint numDimensions, jlong M, jlong efConstruction, - jlong randomSeed, jlong maxElements, - jobject storageDataType) { +void Java_com_spotify_voyager_jni_Index_nativeConstructor( + JNIEnv *env, jobject self, jobject spaceType, jint numDimensions, jlong M, + jlong efConstruction, jlong randomSeed, jlong maxElements, + jobject storageDataType) { try { switch (toStorageDataType(env, storageDataType)) { case StorageDataType::Float32: setHandle(env, self, - new TypedIndex(toSpaceType(env, spaceType), numDimensions, M, efConstruction, randomSeed, - maxElements)); + new TypedIndex(toSpaceType(env, spaceType), + numDimensions, M, efConstruction, + randomSeed, maxElements)); break; case StorageDataType::Float8: setHandle(env, self, - new TypedIndex>(toSpaceType(env, spaceType), numDimensions, M, - efConstruction, randomSeed, maxElements)); + new TypedIndex>( + toSpaceType(env, spaceType), numDimensions, M, + efConstruction, randomSeed, maxElements)); break; case StorageDataType::E4M3: setHandle(env, self, - new TypedIndex(toSpaceType(env, spaceType), numDimensions, M, efConstruction, - randomSeed, maxElements)); + new TypedIndex( + toSpaceType(env, spaceType), numDimensions, M, + efConstruction, randomSeed, maxElements)); break; } } catch (std::exception const &e) { @@ -319,7 +343,8 @@ void Java_com_spotify_voyager_jni_Index_nativeConstructor(JNIEnv *env, jobject s } } -void Java_com_spotify_voyager_jni_Index_addItem___3F(JNIEnv *env, jobject self, jfloatArray vector) { +void Java_com_spotify_voyager_jni_Index_addItem___3F(JNIEnv *env, jobject self, + jfloatArray vector) { try { std::shared_ptr index = getHandle(env, self); index->addItem(toStdVector(env, vector), {}); @@ -330,7 +355,9 @@ void Java_com_spotify_voyager_jni_Index_addItem___3F(JNIEnv *env, jobject self, } } -void Java_com_spotify_voyager_jni_Index_addItem___3FJ(JNIEnv *env, jobject self, jfloatArray vector, jlong id) { +void Java_com_spotify_voyager_jni_Index_addItem___3FJ(JNIEnv *env, jobject self, + jfloatArray vector, + jlong id) { try { std::shared_ptr index = getHandle(env, self); index->addItem(toStdVector(env, vector), {id}); @@ -341,7 +368,9 @@ void Java_com_spotify_voyager_jni_Index_addItem___3FJ(JNIEnv *env, jobject self, } } -void Java_com_spotify_voyager_jni_Index_addItems___3_3FI(JNIEnv *env, jobject self, jobjectArray vectors, +void Java_com_spotify_voyager_jni_Index_addItems___3_3FI(JNIEnv *env, + jobject self, + jobjectArray vectors, jint numThreads) { try { std::shared_ptr index = getHandle(env, self); @@ -353,11 +382,13 @@ void Java_com_spotify_voyager_jni_Index_addItems___3_3FI(JNIEnv *env, jobject se } } -void Java_com_spotify_voyager_jni_Index_addItems___3_3F_3JI(JNIEnv *env, jobject self, jobjectArray vectors, - jlongArray ids, jint numThreads) { +void Java_com_spotify_voyager_jni_Index_addItems___3_3F_3JI( + JNIEnv *env, jobject self, jobjectArray vectors, jlongArray ids, + jint numThreads) { try { std::shared_ptr index = getHandle(env, self); - index->addItems(toNDArray(env, vectors), toUnsignedStdVector(env, ids), numThreads); + index->addItems(toNDArray(env, vectors), toUnsignedStdVector(env, ids), + numThreads); } catch (std::exception const &e) { if (!env->ExceptionCheck()) { env->ThrowNew(env->FindClass("java/lang/RuntimeException"), e.what()); @@ -368,23 +399,31 @@ void Java_com_spotify_voyager_jni_Index_addItems___3_3F_3JI(JNIEnv *env, jobject //////////////////////////////////////////////////////////////////////////////////////////////////// // Querying //////////////////////////////////////////////////////////////////////////////////////////////////// -jobject Java_com_spotify_voyager_jni_Index_query___3FIJ(JNIEnv *env, jobject self, jfloatArray queryVector, - jint numNeighbors, jlong queryEf) { +jobject Java_com_spotify_voyager_jni_Index_query___3FIJ(JNIEnv *env, + jobject self, + jfloatArray queryVector, + jint numNeighbors, + jlong queryEf) { try { std::shared_ptr index = getHandle(env, self); - std::tuple, std::vector> queryResults = - index->query(toStdVector(env, queryVector), numNeighbors, queryEf); + std::tuple, std::vector> + queryResults = + index->query(toStdVector(env, queryVector), numNeighbors, queryEf); - jclass queryResultsClass = env->FindClass("com/spotify/voyager/jni/Index$QueryResults"); + jclass queryResultsClass = + env->FindClass("com/spotify/voyager/jni/Index$QueryResults"); if (!queryResultsClass) { - throw std::runtime_error("C++ bindings failed to find QueryResults class."); + throw std::runtime_error( + "C++ bindings failed to find QueryResults class."); } - jmethodID constructor = env->GetMethodID(queryResultsClass, "", "([J[F)V"); + jmethodID constructor = + env->GetMethodID(queryResultsClass, "", "([J[F)V"); if (!constructor) { - throw std::runtime_error("C++ bindings failed to find QueryResults constructor."); + throw std::runtime_error( + "C++ bindings failed to find QueryResults constructor."); } // Allocate a Java long array for the IDs: @@ -393,10 +432,12 @@ jobject Java_com_spotify_voyager_jni_Index_query___3FIJ(JNIEnv *env, jobject sel // queryResults is a (size_t *), but labels is a signed (long *). // This may overflow if we have more than... 2^63 = 9.223372037e18 // elements. We're probably safe doing this. - env->SetLongArrayRegion(labels, 0, numNeighbors, (jlong *)std::get<0>(queryResults).data()); + env->SetLongArrayRegion(labels, 0, numNeighbors, + (jlong *)std::get<0>(queryResults).data()); jfloatArray distances = env->NewFloatArray(numNeighbors); - env->SetFloatArrayRegion(distances, 0, numNeighbors, std::get<1>(queryResults).data()); + env->SetFloatArrayRegion(distances, 0, numNeighbors, + std::get<1>(queryResults).data()); return env->NewObject(queryResultsClass, constructor, labels, distances); } catch (std::exception const &e) { @@ -407,28 +448,35 @@ jobject Java_com_spotify_voyager_jni_Index_query___3FIJ(JNIEnv *env, jobject sel } } -jobjectArray Java_com_spotify_voyager_jni_Index_query___3_3FIIJ(JNIEnv *env, jobject self, jobjectArray queryVectors, - jint numNeighbors, jint numThreads, jlong queryEf) { +jobjectArray Java_com_spotify_voyager_jni_Index_query___3_3FIIJ( + JNIEnv *env, jobject self, jobjectArray queryVectors, jint numNeighbors, + jint numThreads, jlong queryEf) { try { std::shared_ptr index = getHandle(env, self); int numQueries = env->GetArrayLength(queryVectors); std::tuple, NDArray> queryResults = - index->query(toNDArray(env, queryVectors), numNeighbors, numThreads, queryEf); + index->query(toNDArray(env, queryVectors), numNeighbors, numThreads, + queryEf); - jclass queryResultsClass = env->FindClass("com/spotify/voyager/jni/Index$QueryResults"); + jclass queryResultsClass = + env->FindClass("com/spotify/voyager/jni/Index$QueryResults"); if (!queryResultsClass) { - throw std::runtime_error("C++ bindings failed to find QueryResults class."); + throw std::runtime_error( + "C++ bindings failed to find QueryResults class."); } - jmethodID constructor = env->GetMethodID(queryResultsClass, "", "([J[F)V"); + jmethodID constructor = + env->GetMethodID(queryResultsClass, "", "([J[F)V"); if (!constructor) { - throw std::runtime_error("C++ bindings failed to find QueryResults constructor."); + throw std::runtime_error( + "C++ bindings failed to find QueryResults constructor."); } - jobjectArray javaQueryResults = env->NewObjectArray(numQueries, queryResultsClass, NULL); + jobjectArray javaQueryResults = + env->NewObjectArray(numQueries, queryResultsClass, NULL); for (int i = 0; i < numQueries; i++) { // Allocate a Java long array for the indices, and a float array for the @@ -438,12 +486,15 @@ jobjectArray Java_com_spotify_voyager_jni_Index_query___3_3FIIJ(JNIEnv *env, job // queryResults is a (size_t *), but labels is a signed (long *). // This may overflow if we have more than... 2^63 = 9.223372037e18 // elements. We're probably safe doing this. - env->SetLongArrayRegion(labels, 0, numNeighbors, (jlong *)std::get<0>(queryResults)[i]); + env->SetLongArrayRegion(labels, 0, numNeighbors, + (jlong *)std::get<0>(queryResults)[i]); jfloatArray distances = env->NewFloatArray(numNeighbors); - env->SetFloatArrayRegion(distances, 0, numNeighbors, std::get<1>(queryResults)[i]); + env->SetFloatArrayRegion(distances, 0, numNeighbors, + std::get<1>(queryResults)[i]); - jobject queryResults = env->NewObject(queryResultsClass, constructor, labels, distances); + jobject queryResults = + env->NewObject(queryResultsClass, constructor, labels, distances); env->SetObjectArrayElement(javaQueryResults, i, queryResults); env->DeleteLocalRef(labels); env->DeleteLocalRef(distances); @@ -473,7 +524,8 @@ jobject Java_com_spotify_voyager_jni_Index_getSpace(JNIEnv *env, jobject self) { return nullptr; } -jint Java_com_spotify_voyager_jni_Index_getNumDimensions(JNIEnv *env, jobject self) { +jint Java_com_spotify_voyager_jni_Index_getNumDimensions(JNIEnv *env, + jobject self) { try { return getHandle(env, self)->getNumDimensions(); } catch (std::exception const &e) { @@ -493,7 +545,8 @@ jlong Java_com_spotify_voyager_jni_Index_getM(JNIEnv *env, jobject self) { return 0; } -jlong Java_com_spotify_voyager_jni_Index_getEfConstruction(JNIEnv *env, jobject self) { +jlong Java_com_spotify_voyager_jni_Index_getEfConstruction(JNIEnv *env, + jobject self) { try { return getHandle(env, self)->getEfConstruction(); } catch (std::exception const &e) { @@ -502,7 +555,8 @@ jlong Java_com_spotify_voyager_jni_Index_getEfConstruction(JNIEnv *env, jobject return 0; } -jlong Java_com_spotify_voyager_jni_Index_getMaxElements(JNIEnv *env, jobject self) { +jlong Java_com_spotify_voyager_jni_Index_getMaxElements(JNIEnv *env, + jobject self) { try { return getHandle(env, self)->getMaxElements(); } catch (std::exception const &e) { @@ -511,9 +565,11 @@ jlong Java_com_spotify_voyager_jni_Index_getMaxElements(JNIEnv *env, jobject sel return 0; } -jobject Java_com_spotify_voyager_jni_Index_getStorageDataType(JNIEnv *env, jobject self) { +jobject Java_com_spotify_voyager_jni_Index_getStorageDataType(JNIEnv *env, + jobject self) { try { - return toStorageDataType(env, getHandle(env, self)->getStorageDataType()); + return toStorageDataType(env, + getHandle(env, self)->getStorageDataType()); } catch (std::exception const &e) { env->ThrowNew(env->FindClass("java/lang/RuntimeException"), e.what()); } @@ -523,7 +579,8 @@ jobject Java_com_spotify_voyager_jni_Index_getStorageDataType(JNIEnv *env, jobje //////////////////////////////////////////////////////////////////////////////////////////////////// // Index Accessor Methods //////////////////////////////////////////////////////////////////////////////////////////////////// -jlong Java_com_spotify_voyager_jni_Index_getNumElements(JNIEnv *env, jobject self) { +jlong Java_com_spotify_voyager_jni_Index_getNumElements(JNIEnv *env, + jobject self) { try { return getHandle(env, self)->getNumElements(); } catch (std::exception const &e) { @@ -534,7 +591,9 @@ jlong Java_com_spotify_voyager_jni_Index_getNumElements(JNIEnv *env, jobject sel return 0; } -jfloatArray Java_com_spotify_voyager_jni_Index_getVector(JNIEnv *env, jobject self, jlong id) { +jfloatArray Java_com_spotify_voyager_jni_Index_getVector(JNIEnv *env, + jobject self, + jlong id) { try { std::shared_ptr index = getHandle(env, self); return toFloatArray(env, index->getVector(id)); @@ -546,17 +605,21 @@ jfloatArray Java_com_spotify_voyager_jni_Index_getVector(JNIEnv *env, jobject se } } -jobjectArray Java_com_spotify_voyager_jni_Index_getVectors(JNIEnv *env, jobject self, jlongArray ids) { +jobjectArray Java_com_spotify_voyager_jni_Index_getVectors(JNIEnv *env, + jobject self, + jlongArray ids) { try { std::shared_ptr index = getHandle(env, self); - NDArray vectors = index->getVectors(toUnsignedStdVector(env, ids)); + NDArray vectors = + index->getVectors(toUnsignedStdVector(env, ids)); jclass floatArrayClass = env->FindClass("[F"); if (!floatArrayClass) { throw std::runtime_error("C++ bindings failed to find float[] class."); } - jobjectArray javaVectors = env->NewObjectArray(vectors.shape[0], floatArrayClass, NULL); + jobjectArray javaVectors = + env->NewObjectArray(vectors.shape[0], floatArrayClass, NULL); for (int i = 0; i < vectors.shape[0]; i++) { jfloatArray vector = env->NewFloatArray(vectors.shape[1]); @@ -574,7 +637,8 @@ jobjectArray Java_com_spotify_voyager_jni_Index_getVectors(JNIEnv *env, jobject } } -jlongArray Java_com_spotify_voyager_jni_Index_getIDs(JNIEnv *env, jobject self) { +jlongArray Java_com_spotify_voyager_jni_Index_getIDs(JNIEnv *env, + jobject self) { try { std::shared_ptr index = getHandle(env, self); @@ -603,7 +667,8 @@ jlongArray Java_com_spotify_voyager_jni_Index_getIDs(JNIEnv *env, jobject self) //////////////////////////////////////////////////////////////////////////////////////////////////// // Index Modifier Methods //////////////////////////////////////////////////////////////////////////////////////////////////// -void Java_com_spotify_voyager_jni_Index_setEf(JNIEnv *env, jobject self, jlong newEf) { +void Java_com_spotify_voyager_jni_Index_setEf(JNIEnv *env, jobject self, + jlong newEf) { try { getHandle(env, self)->setEF(newEf); } catch (std::exception const &e) { @@ -624,7 +689,8 @@ jint Java_com_spotify_voyager_jni_Index_getEf(JNIEnv *env, jobject self) { return 0; } -void Java_com_spotify_voyager_jni_Index_markDeleted(JNIEnv *env, jobject self, jlong label) { +void Java_com_spotify_voyager_jni_Index_markDeleted(JNIEnv *env, jobject self, + jlong label) { try { getHandle(env, self)->markDeleted(label); } catch (std::exception const &e) { @@ -634,7 +700,8 @@ void Java_com_spotify_voyager_jni_Index_markDeleted(JNIEnv *env, jobject self, j } } -void Java_com_spotify_voyager_jni_Index_unmarkDeleted(JNIEnv *env, jobject self, jlong label) { +void Java_com_spotify_voyager_jni_Index_unmarkDeleted(JNIEnv *env, jobject self, + jlong label) { try { getHandle(env, self)->unmarkDeleted(label); } catch (std::exception const &e) { @@ -644,7 +711,8 @@ void Java_com_spotify_voyager_jni_Index_unmarkDeleted(JNIEnv *env, jobject self, } } -void Java_com_spotify_voyager_jni_Index_resizeIndex(JNIEnv *env, jobject self, jlong newSize) { +void Java_com_spotify_voyager_jni_Index_resizeIndex(JNIEnv *env, jobject self, + jlong newSize) { try { std::shared_ptr index = getHandle(env, self); index->resizeIndex(newSize); @@ -658,7 +726,8 @@ void Java_com_spotify_voyager_jni_Index_resizeIndex(JNIEnv *env, jobject self, j //////////////////////////////////////////////////////////////////////////////////////////////////// // Save Index //////////////////////////////////////////////////////////////////////////////////////////////////// -void Java_com_spotify_voyager_jni_Index_saveIndex__Ljava_lang_String_2(JNIEnv *env, jobject self, jstring filename) { +void Java_com_spotify_voyager_jni_Index_saveIndex__Ljava_lang_String_2( + JNIEnv *env, jobject self, jstring filename) { try { std::shared_ptr index = getHandle(env, self); index->saveIndex(toString(env, filename)); @@ -669,8 +738,8 @@ void Java_com_spotify_voyager_jni_Index_saveIndex__Ljava_lang_String_2(JNIEnv *e } } -void Java_com_spotify_voyager_jni_Index_saveIndex__Ljava_io_OutputStream_2(JNIEnv *env, jobject self, - jobject outputStream) { +void Java_com_spotify_voyager_jni_Index_saveIndex__Ljava_io_OutputStream_2( + JNIEnv *env, jobject self, jobject outputStream) { try { std::shared_ptr index = getHandle(env, self); index->saveIndex(std::make_shared(env, outputStream)); @@ -685,45 +754,62 @@ void Java_com_spotify_voyager_jni_Index_saveIndex__Ljava_io_OutputStream_2(JNIEn // Load Index //////////////////////////////////////////////////////////////////////////////////////////////////// // TODO: Convert these to static methods -void Java_com_spotify_voyager_jni_Index_nativeLoadFromFileWithParameters(JNIEnv *env, jobject self, jstring filename, - jobject spaceType, jint numDimensions, - jobject storageDataType) { +void Java_com_spotify_voyager_jni_Index_nativeLoadFromFileWithParameters( + JNIEnv *env, jobject self, jstring filename, jobject spaceType, + jint numDimensions, jobject storageDataType) { try { - auto inputStream = std::make_shared(toString(env, filename)); - std::unique_ptr metadata = voyager::Metadata::loadFromStream(inputStream); + auto inputStream = + std::make_shared(toString(env, filename)); + std::unique_ptr metadata = + voyager::Metadata::loadFromStream(inputStream); if (metadata) { - if (metadata->getStorageDataType() != toStorageDataType(env, storageDataType)) { - throw std::domain_error("Provided storage data type (" + toString(toStorageDataType(env, storageDataType)) + - ") does not match the data type used in this file (" + - toString(metadata->getStorageDataType()) + ")."); + if (metadata->getStorageDataType() != + toStorageDataType(env, storageDataType)) { + throw std::domain_error( + "Provided storage data type (" + + toString(toStorageDataType(env, storageDataType)) + + ") does not match the data type used in this file (" + + toString(metadata->getStorageDataType()) + ")."); } if (metadata->getSpaceType() != toSpaceType(env, spaceType)) { - throw std::domain_error("Provided space type (" + toString(toSpaceType(env, spaceType)) + - ") does not match the space type used in this file (" + - toString(metadata->getSpaceType()) + ")."); + throw std::domain_error( + "Provided space type (" + toString(toSpaceType(env, spaceType)) + + ") does not match the space type used in this file (" + + toString(metadata->getSpaceType()) + ")."); } if (metadata->getNumDimensions() != numDimensions) { - throw std::domain_error("Provided number of dimensions (" + std::to_string(numDimensions) + - ") does not match the number of dimensions used in this file (" + - std::to_string(metadata->getNumDimensions()) + ")."); + throw std::domain_error( + "Provided number of dimensions (" + std::to_string(numDimensions) + + ") does not match the number of dimensions used in this file (" + + std::to_string(metadata->getNumDimensions()) + ")."); } - setHandle(env, self, loadTypedIndexFromMetadata(std::move(metadata), inputStream).release()); + setHandle( + env, self, + loadTypedIndexFromMetadata(std::move(metadata), inputStream) + .release()); return; } switch (toStorageDataType(env, storageDataType)) { case StorageDataType::Float32: - setHandle(env, self, new TypedIndex(inputStream, toSpaceType(env, spaceType), numDimensions)); + setHandle(env, self, + new TypedIndex(inputStream, + toSpaceType(env, spaceType), + numDimensions)); break; case StorageDataType::Float8: setHandle( env, self, - new TypedIndex>(inputStream, toSpaceType(env, spaceType), numDimensions)); + new TypedIndex>( + inputStream, toSpaceType(env, spaceType), numDimensions)); break; case StorageDataType::E4M3: - setHandle(env, self, new TypedIndex(inputStream, toSpaceType(env, spaceType), numDimensions)); + setHandle(env, self, + new TypedIndex(inputStream, + toSpaceType(env, spaceType), + numDimensions)); break; } } catch (std::exception const &e) { @@ -733,46 +819,61 @@ void Java_com_spotify_voyager_jni_Index_nativeLoadFromFileWithParameters(JNIEnv } } -void Java_com_spotify_voyager_jni_Index_nativeLoadFromInputStreamWithParameters(JNIEnv *env, jobject self, - jobject jInputStream, jobject spaceType, - jint numDimensions, - jobject storageDataType) { +void Java_com_spotify_voyager_jni_Index_nativeLoadFromInputStreamWithParameters( + JNIEnv *env, jobject self, jobject jInputStream, jobject spaceType, + jint numDimensions, jobject storageDataType) { try { auto inputStream = std::make_shared(env, jInputStream); - std::unique_ptr metadata = voyager::Metadata::loadFromStream(inputStream); + std::unique_ptr metadata = + voyager::Metadata::loadFromStream(inputStream); if (metadata) { - if (metadata->getStorageDataType() != toStorageDataType(env, storageDataType)) { - throw std::domain_error("Provided storage data type (" + toString(toStorageDataType(env, storageDataType)) + - ") does not match the data type used in this file (" + - toString(metadata->getStorageDataType()) + ")."); + if (metadata->getStorageDataType() != + toStorageDataType(env, storageDataType)) { + throw std::domain_error( + "Provided storage data type (" + + toString(toStorageDataType(env, storageDataType)) + + ") does not match the data type used in this file (" + + toString(metadata->getStorageDataType()) + ")."); } if (metadata->getSpaceType() != toSpaceType(env, spaceType)) { - throw std::domain_error("Provided space type (" + toString(toSpaceType(env, spaceType)) + - ") does not match the space type used in this file (" + - toString(metadata->getSpaceType()) + ")."); + throw std::domain_error( + "Provided space type (" + toString(toSpaceType(env, spaceType)) + + ") does not match the space type used in this file (" + + toString(metadata->getSpaceType()) + ")."); } if (metadata->getNumDimensions() != numDimensions) { - throw std::domain_error("Provided number of dimensions (" + std::to_string(numDimensions) + - ") does not match the number of dimensions used in this file (" + - std::to_string(metadata->getNumDimensions()) + ")."); + throw std::domain_error( + "Provided number of dimensions (" + std::to_string(numDimensions) + + ") does not match the number of dimensions used in this file (" + + std::to_string(metadata->getNumDimensions()) + ")."); } - setHandle(env, self, loadTypedIndexFromMetadata(std::move(metadata), inputStream).release()); + setHandle( + env, self, + loadTypedIndexFromMetadata(std::move(metadata), inputStream) + .release()); return; } switch (toStorageDataType(env, storageDataType)) { case StorageDataType::Float32: - setHandle(env, self, new TypedIndex(inputStream, toSpaceType(env, spaceType), numDimensions)); + setHandle(env, self, + new TypedIndex(inputStream, + toSpaceType(env, spaceType), + numDimensions)); break; case StorageDataType::Float8: setHandle( env, self, - new TypedIndex>(inputStream, toSpaceType(env, spaceType), numDimensions)); + new TypedIndex>( + inputStream, toSpaceType(env, spaceType), numDimensions)); break; case StorageDataType::E4M3: - setHandle(env, self, new TypedIndex(inputStream, toSpaceType(env, spaceType), numDimensions)); + setHandle(env, self, + new TypedIndex(inputStream, + toSpaceType(env, spaceType), + numDimensions)); break; } } catch (std::exception const &e) { @@ -782,17 +883,25 @@ void Java_com_spotify_voyager_jni_Index_nativeLoadFromInputStreamWithParameters( } } -void Java_com_spotify_voyager_jni_Index_nativeLoadFromFile(JNIEnv *env, jobject self, jstring filename) { +void Java_com_spotify_voyager_jni_Index_nativeLoadFromFile(JNIEnv *env, + jobject self, + jstring filename) { try { - auto inputStream = std::make_shared(toString(env, filename)); - std::unique_ptr metadata = voyager::Metadata::loadFromStream(inputStream); + auto inputStream = + std::make_shared(toString(env, filename)); + std::unique_ptr metadata = + voyager::Metadata::loadFromStream(inputStream); if (metadata) { - setHandle(env, self, loadTypedIndexFromMetadata(std::move(metadata), inputStream).release()); + setHandle( + env, self, + loadTypedIndexFromMetadata(std::move(metadata), inputStream) + .release()); } else { - throw std::domain_error("Provided index file has no metadata and no index parameters were " - "specified. Must either provide an index with metadata or specify " - "storageDataType, spaceType, and numDimensions."); + throw std::domain_error( + "Provided index file has no metadata and no index parameters were " + "specified. Must either provide an index with metadata or specify " + "storageDataType, spaceType, and numDimensions."); } } catch (std::exception const &e) { @@ -802,17 +911,23 @@ void Java_com_spotify_voyager_jni_Index_nativeLoadFromFile(JNIEnv *env, jobject } } -void Java_com_spotify_voyager_jni_Index_nativeLoadFromInputStream(JNIEnv *env, jobject self, jobject jInputStream) { +void Java_com_spotify_voyager_jni_Index_nativeLoadFromInputStream( + JNIEnv *env, jobject self, jobject jInputStream) { try { auto inputStream = std::make_shared(env, jInputStream); - std::unique_ptr metadata = voyager::Metadata::loadFromStream(inputStream); + std::unique_ptr metadata = + voyager::Metadata::loadFromStream(inputStream); if (metadata) { - setHandle(env, self, loadTypedIndexFromMetadata(std::move(metadata), inputStream).release()); + setHandle( + env, self, + loadTypedIndexFromMetadata(std::move(metadata), inputStream) + .release()); } else { - throw std::domain_error("Provided index file has no metadata and no index parameters were " - "specified. Must either provide an index with metadata or specify " - "storageDataType, spaceType, and numDimensions."); + throw std::domain_error( + "Provided index file has no metadata and no index parameters were " + "specified. Must either provide an index with metadata or specify " + "storageDataType, spaceType, and numDimensions."); } } catch (std::exception const &e) { if (!env->ExceptionCheck()) { @@ -821,7 +936,8 @@ void Java_com_spotify_voyager_jni_Index_nativeLoadFromInputStream(JNIEnv *env, j } } -void Java_com_spotify_voyager_jni_Index_nativeDestructor(JNIEnv *env, jobject self) { +void Java_com_spotify_voyager_jni_Index_nativeDestructor(JNIEnv *env, + jobject self) { try { deleteHandle(env, self); } catch (std::exception const &e) { diff --git a/java/com_spotify_voyager_jni_Index.h b/java/com_spotify_voyager_jni_Index.h index d7edf057..25587917 100644 --- a/java/com_spotify_voyager_jni_Index.h +++ b/java/com_spotify_voyager_jni_Index.h @@ -10,10 +10,11 @@ extern "C" { /* * Class: com_spotify_voyager_jni_Index * Method: nativeConstructor - * Signature: (Lcom/spotify/voyager/jni/Index/SpaceType;IJJJJLcom/spotify/voyager/jni/Index/StorageDataType;)V + * Signature: + * (Lcom/spotify/voyager/jni/Index/SpaceType;IJJJJLcom/spotify/voyager/jni/Index/StorageDataType;)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeConstructor(JNIEnv *, jobject, jobject, jint, jlong, - jlong, jlong, jlong, jobject); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeConstructor( + JNIEnv *, jobject, jobject, jint, jlong, jlong, jlong, jlong, jobject); /* * Class: com_spotify_voyager_jni_Index @@ -21,15 +22,17 @@ JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeConstructor(JNIE * Signature: * (Ljava/lang/String;Lcom/spotify/voyager/jni/Index/SpaceType;ILcom/spotify/voyager/jni/Index/StorageDataType;)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeLoadFromFileWithParameters(JNIEnv *, jobject, jstring, - jobject, jint, jobject); +JNIEXPORT void JNICALL +Java_com_spotify_voyager_jni_Index_nativeLoadFromFileWithParameters( + JNIEnv *, jobject, jstring, jobject, jint, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: nativeLoadFromFile * Signature: (Ljava/lang/String;)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeLoadFromFile(JNIEnv *, jobject, jstring); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeLoadFromFile( + JNIEnv *, jobject, jstring); /* * Class: com_spotify_voyager_jni_Index @@ -37,194 +40,226 @@ JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeLoadFromFile(JNI * Signature: * (Ljava/io/InputStream;Lcom/spotify/voyager/jni/Index/SpaceType;ILcom/spotify/voyager/jni/Index/StorageDataType;)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeLoadFromInputStreamWithParameters(JNIEnv *, jobject, - jobject, jobject, - jint, jobject); +JNIEXPORT void JNICALL +Java_com_spotify_voyager_jni_Index_nativeLoadFromInputStreamWithParameters( + JNIEnv *, jobject, jobject, jobject, jint, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: nativeLoadFromInputStream * Signature: (Ljava/io/InputStream;)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeLoadFromInputStream(JNIEnv *, jobject, jobject); +JNIEXPORT void JNICALL +Java_com_spotify_voyager_jni_Index_nativeLoadFromInputStream(JNIEnv *, jobject, + jobject); /* * Class: com_spotify_voyager_jni_Index * Method: nativeDestructor * Signature: ()V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_nativeDestructor(JNIEnv *, jobject); +JNIEXPORT void JNICALL +Java_com_spotify_voyager_jni_Index_nativeDestructor(JNIEnv *, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: setEf * Signature: (J)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_setEf(JNIEnv *, jobject, jlong); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_setEf(JNIEnv *, + jobject, jlong); /* * Class: com_spotify_voyager_jni_Index * Method: getEf * Signature: ()I */ -JNIEXPORT jint JNICALL Java_com_spotify_voyager_jni_Index_getEf(JNIEnv *, jobject); +JNIEXPORT jint JNICALL Java_com_spotify_voyager_jni_Index_getEf(JNIEnv *, + jobject); /* * Class: com_spotify_voyager_jni_Index * Method: getSpace * Signature: ()Lcom/spotify/voyager/jni/Index/SpaceType; */ -JNIEXPORT jobject JNICALL Java_com_spotify_voyager_jni_Index_getSpace(JNIEnv *, jobject); +JNIEXPORT jobject JNICALL Java_com_spotify_voyager_jni_Index_getSpace(JNIEnv *, + jobject); /* * Class: com_spotify_voyager_jni_Index * Method: getNumDimensions * Signature: ()I */ -JNIEXPORT jint JNICALL Java_com_spotify_voyager_jni_Index_getNumDimensions(JNIEnv *, jobject); +JNIEXPORT jint JNICALL +Java_com_spotify_voyager_jni_Index_getNumDimensions(JNIEnv *, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: setNumThreads * Signature: (I)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_setNumThreads(JNIEnv *, jobject, jint); +JNIEXPORT void JNICALL +Java_com_spotify_voyager_jni_Index_setNumThreads(JNIEnv *, jobject, jint); /* * Class: com_spotify_voyager_jni_Index * Method: getNumThreads * Signature: ()I */ -JNIEXPORT jint JNICALL Java_com_spotify_voyager_jni_Index_getNumThreads(JNIEnv *, jobject); +JNIEXPORT jint JNICALL +Java_com_spotify_voyager_jni_Index_getNumThreads(JNIEnv *, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: saveIndex * Signature: (Ljava/lang/String;)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_saveIndex__Ljava_lang_String_2(JNIEnv *, jobject, jstring); +JNIEXPORT void JNICALL +Java_com_spotify_voyager_jni_Index_saveIndex__Ljava_lang_String_2(JNIEnv *, + jobject, + jstring); /* * Class: com_spotify_voyager_jni_Index * Method: saveIndex * Signature: (Ljava/io/OutputStream;)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_saveIndex__Ljava_io_OutputStream_2(JNIEnv *, jobject, - jobject); +JNIEXPORT void JNICALL +Java_com_spotify_voyager_jni_Index_saveIndex__Ljava_io_OutputStream_2(JNIEnv *, + jobject, + jobject); /* * Class: com_spotify_voyager_jni_Index * Method: addItem * Signature: ([F)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_addItem___3F(JNIEnv *, jobject, jfloatArray); +JNIEXPORT void JNICALL +Java_com_spotify_voyager_jni_Index_addItem___3F(JNIEnv *, jobject, jfloatArray); /* * Class: com_spotify_voyager_jni_Index * Method: addItem * Signature: ([FJ)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_addItem___3FJ(JNIEnv *, jobject, jfloatArray, jlong); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_addItem___3FJ( + JNIEnv *, jobject, jfloatArray, jlong); /* * Class: com_spotify_voyager_jni_Index * Method: addItems * Signature: ([[FI)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_addItems___3_3FI(JNIEnv *, jobject, jobjectArray, jint); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_addItems___3_3FI( + JNIEnv *, jobject, jobjectArray, jint); /* * Class: com_spotify_voyager_jni_Index * Method: addItems * Signature: ([[F[JI)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_addItems___3_3F_3JI(JNIEnv *, jobject, jobjectArray, - jlongArray, jint); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_addItems___3_3F_3JI( + JNIEnv *, jobject, jobjectArray, jlongArray, jint); /* * Class: com_spotify_voyager_jni_Index * Method: getVector * Signature: (J)[F */ -JNIEXPORT jfloatArray JNICALL Java_com_spotify_voyager_jni_Index_getVector(JNIEnv *, jobject, jlong); +JNIEXPORT jfloatArray JNICALL +Java_com_spotify_voyager_jni_Index_getVector(JNIEnv *, jobject, jlong); /* * Class: com_spotify_voyager_jni_Index * Method: getVectors * Signature: ([J)[[F */ -JNIEXPORT jobjectArray JNICALL Java_com_spotify_voyager_jni_Index_getVectors(JNIEnv *, jobject, jlongArray); +JNIEXPORT jobjectArray JNICALL +Java_com_spotify_voyager_jni_Index_getVectors(JNIEnv *, jobject, jlongArray); /* * Class: com_spotify_voyager_jni_Index * Method: getIDs * Signature: ()[J */ -JNIEXPORT jlongArray JNICALL Java_com_spotify_voyager_jni_Index_getIDs(JNIEnv *, jobject); +JNIEXPORT jlongArray JNICALL Java_com_spotify_voyager_jni_Index_getIDs(JNIEnv *, + jobject); /* * Class: com_spotify_voyager_jni_Index * Method: query * Signature: ([FIJ)Lcom/spotify/voyager/jni/Index/QueryResults; */ -JNIEXPORT jobject JNICALL Java_com_spotify_voyager_jni_Index_query___3FIJ(JNIEnv *, jobject, jfloatArray, jint, jlong); +JNIEXPORT jobject JNICALL Java_com_spotify_voyager_jni_Index_query___3FIJ( + JNIEnv *, jobject, jfloatArray, jint, jlong); /* * Class: com_spotify_voyager_jni_Index * Method: query * Signature: ([[FIIJ)[Lcom/spotify/voyager/jni/Index/QueryResults; */ -JNIEXPORT jobjectArray JNICALL Java_com_spotify_voyager_jni_Index_query___3_3FIIJ(JNIEnv *, jobject, jobjectArray, jint, - jint, jlong); +JNIEXPORT jobjectArray JNICALL +Java_com_spotify_voyager_jni_Index_query___3_3FIIJ(JNIEnv *, jobject, + jobjectArray, jint, jint, + jlong); /* * Class: com_spotify_voyager_jni_Index * Method: markDeleted * Signature: (J)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_markDeleted(JNIEnv *, jobject, jlong); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_markDeleted(JNIEnv *, + jobject, + jlong); /* * Class: com_spotify_voyager_jni_Index * Method: unmarkDeleted * Signature: (J)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_unmarkDeleted(JNIEnv *, jobject, jlong); +JNIEXPORT void JNICALL +Java_com_spotify_voyager_jni_Index_unmarkDeleted(JNIEnv *, jobject, jlong); /* * Class: com_spotify_voyager_jni_Index * Method: resizeIndex * Signature: (J)V */ -JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_resizeIndex(JNIEnv *, jobject, jlong); +JNIEXPORT void JNICALL Java_com_spotify_voyager_jni_Index_resizeIndex(JNIEnv *, + jobject, + jlong); /* * Class: com_spotify_voyager_jni_Index * Method: getMaxElements * Signature: ()J */ -JNIEXPORT jlong JNICALL Java_com_spotify_voyager_jni_Index_getMaxElements(JNIEnv *, jobject); +JNIEXPORT jlong JNICALL +Java_com_spotify_voyager_jni_Index_getMaxElements(JNIEnv *, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: getNumElements * Signature: ()J */ -JNIEXPORT jlong JNICALL Java_com_spotify_voyager_jni_Index_getNumElements(JNIEnv *, jobject); +JNIEXPORT jlong JNICALL +Java_com_spotify_voyager_jni_Index_getNumElements(JNIEnv *, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: getEfConstruction * Signature: ()J */ -JNIEXPORT jlong JNICALL Java_com_spotify_voyager_jni_Index_getEfConstruction(JNIEnv *, jobject); +JNIEXPORT jlong JNICALL +Java_com_spotify_voyager_jni_Index_getEfConstruction(JNIEnv *, jobject); /* * Class: com_spotify_voyager_jni_Index * Method: getM * Signature: ()J */ -JNIEXPORT jlong JNICALL Java_com_spotify_voyager_jni_Index_getM(JNIEnv *, jobject); +JNIEXPORT jlong JNICALL Java_com_spotify_voyager_jni_Index_getM(JNIEnv *, + jobject); #ifdef __cplusplus } diff --git a/java/thread_pool.h b/java/thread_pool.h index da113477..6d137eb7 100644 --- a/java/thread_pool.h +++ b/java/thread_pool.h @@ -22,7 +22,8 @@ namespace similarity { // See sample usage below -template bool GetNextQueueObj(std::mutex &mtx, std::queue &queue, T &obj) { +template +bool GetNextQueueObj(std::mutex &mtx, std::queue &queue, T &obj) { std::unique_lock lock(mtx); if (queue.empty()) { return false; @@ -59,7 +60,9 @@ template bool GetNextQueueObj(std::mutex &mtx, std::queue &queue, T * only handles a subset of functionality (no reductions etc) * Process ids from start (inclusive) to end (EXCLUSIVE) */ -template inline void ParallelFor(size_t start, size_t end, size_t numThreads, Function fn) { +template +inline void ParallelFor(size_t start, size_t end, size_t numThreads, + Function fn) { if (numThreads <= 0) { numThreads = std::thread::hardware_concurrency(); } diff --git a/python/bindings.cpp b/python/bindings.cpp index 6828b2ae..1a6dc299 100644 --- a/python/bindings.cpp +++ b/python/bindings.cpp @@ -34,11 +34,13 @@ using namespace pybind11::literals; // needed to bring in _a literal * Convert a PyArray (i.e.: numpy.ndarray) to a C++ NDArray. * This function copies the data from the PyArray into a new NDArray. */ -template NDArray pyArrayToNDArray(py::array_t input) { +template +NDArray pyArrayToNDArray(py::array_t input) { py::buffer_info inputInfo = input.request(); if (inputInfo.ndim != Dims) { - throw std::domain_error("Input array was expected to have rank " + std::to_string(Dims) + ", but had rank " + + throw std::domain_error("Input array was expected to have rank " + + std::to_string(Dims) + ", but had rank " + std::to_string(inputInfo.ndim) + "."); } @@ -63,7 +65,8 @@ template NDArray pyArrayToNDArray(py::array_t * This function copies the data, but may not have to if NDArray were * refactored. */ -template py::array_t ndArrayToPyArray(NDArray input) { +template +py::array_t ndArrayToPyArray(NDArray input) { py::array_t output(input.shape); T *outputPtr = static_cast(const_cast(output.data())); @@ -72,9 +75,10 @@ template py::array_t ndArrayToPyArray(NDArray numOutputElements *= input.shape[i]; } if (input.data.size() != numOutputElements) { - throw std::runtime_error("Internal error: NDArray input size (" + std::to_string(input.data.size()) + - " elements) does not match output shape: (" + std::to_string(numOutputElements) + - " elements)."); + throw std::runtime_error("Internal error: NDArray input size (" + + std::to_string(input.data.size()) + + " elements) does not match output shape: (" + + std::to_string(numOutputElements) + " elements)."); } { @@ -93,8 +97,9 @@ template std::vector pyArrayToVector(py::array_t input) { py::buffer_info inputInfo = input.request(); if (inputInfo.ndim != 1) { - throw std::domain_error("Input array was expected to have one dimension, but had " + - std::to_string(inputInfo.ndim) + " dimensions."); + throw std::domain_error( + "Input array was expected to have one dimension, but had " + + std::to_string(inputInfo.ndim) + " dimensions."); } std::vector output(inputInfo.shape[0]); @@ -131,15 +136,18 @@ template py::array_t vectorToPyArray(std::vector input) { */ class LabelSetView { public: - LabelSetView(const std::unordered_map &map) : map(map) {} + LabelSetView( + const std::unordered_map &map) + : map(map) {} std::unordered_map const ↦ }; inline void init_LabelSetView(py::module &m) { - py::class_(m, "LabelSetView", - "A read-only set-like object containing 64-bit integers. Use this object " - "like a regular Python :py:class:`set` object, by either iterating " - "through it, or checking for membership with the ``in`` operator.") + py::class_( + m, "LabelSetView", + "A read-only set-like object containing 64-bit integers. Use this object " + "like a regular Python :py:class:`set` object, by either iterating " + "through it, or checking for membership with the ``in`` operator.") .def("__repr__", [](LabelSetView &self) { std::ostringstream ss; @@ -168,21 +176,30 @@ inline void init_LabelSetView(py::module &m) { }) .def( "__contains__", - [](LabelSetView &self, hnswlib::labeltype element) { return self.map.find(element) != self.map.end(); }, + [](LabelSetView &self, hnswlib::labeltype element) { + return self.map.find(element) != self.map.end(); + }, py::arg("id")) .def( - "__contains__", [](LabelSetView &, const py::object &) { return false; }, py::arg("id")); + "__contains__", + [](LabelSetView &, const py::object &) { return false; }, + py::arg("id")); } -template > -inline void register_index_class(py::module &m, std::string className, std::string docstring) { +template > +inline void register_index_class(py::module &m, std::string className, + std::string docstring) { auto klass = py::class_, Index, - std::shared_ptr>>(m, className.c_str(), docstring.c_str()); + std::shared_ptr>>( + m, className.c_str(), docstring.c_str()); - klass.def(py::init(), - py::arg("space"), py::arg("num_dimensions"), py::arg("M") = 16, py::arg("ef_construction") = 200, - py::arg("random_seed") = 1, py::arg("max_elements") = 1, "Create a new, empty index."); + klass.def(py::init(), + py::arg("space"), py::arg("num_dimensions"), py::arg("M") = 16, + py::arg("ef_construction") = 200, py::arg("random_seed") = 1, + py::arg("max_elements") = 1, "Create a new, empty index."); klass.def("__repr__", [className](const Index &index) { return "( + m, "Space", "The method used to calculate the distance between vectors.") .value("Euclidean", SpaceType::Euclidean, "Euclidean distance; also known as L2 distance. The square root " "of the sum of the squared differences between each element of " "each vector.") - .value("Cosine", SpaceType::Cosine, "Cosine distance; also known as normalized inner product.") + .value("Cosine", SpaceType::Cosine, + "Cosine distance; also known as normalized inner product.") .value("InnerProduct", SpaceType::InnerProduct, "Inner product distance.") .export_values(); @@ -215,7 +234,8 @@ memory usage and index size by a factor of 4 compared to :py:class:`Float32`. .value("Float8", StorageDataType::Float8, "8-bit fixed-point decimal values. All values must be within [-1, " "1.00787402].") - .value("Float32", StorageDataType::Float32, "32-bit floating point (default).") + .value("Float32", StorageDataType::Float32, + "32-bit floating point (default).") .value("E4M3", StorageDataType::E4M3, "8-bit floating point with a range of [-448, 448], from " "the paper \"FP8 Formats for Deep Learning\" by Micikevicius et " @@ -226,9 +246,10 @@ memory usage and index size by a factor of 4 compared to :py:class:`Float32`. "correct ordering between results.") .export_values(); - py::class_(m, "E4M3T", - "An 8-bit floating point data type with reduced precision and range. " - "This class wraps a C++ struct and should probably not be used directly.") + py::class_( + m, "E4M3T", + "An 8-bit floating point data type with reduced precision and range. " + "This class wraps a C++ struct and should probably not be used directly.") .def(py::init([](float input) { E4M3 v(input); return v; @@ -247,20 +268,24 @@ memory usage and index size by a factor of 4 compared to :py:class:`Float32`. "from_char", [](int c) { if (c > 255 || c < 0) - throw std::range_error("Expected input to from_char to be on [0, 255]!"); + throw std::range_error( + "Expected input to from_char to be on [0, 255]!"); E4M3 v(static_cast(c)); return v; }, "Create an E4M3 number given a raw 8-bit value.", py::arg("value")) .def( - "__float__", [](E4M3 &self) { return (float)self; }, "Cast the given E4M3 number to a float.") + "__float__", [](E4M3 &self) { return (float)self; }, + "Cast the given E4M3 number to a float.") .def("__repr__", [](E4M3 &self) { std::ostringstream ss; ss << "(self.exponent) << ")"; - ss << " mantissa=" << self.effectiveMantissa() << " (" << std::bitset<3>(self.mantissa) << ")"; + ss << " exponent=" << (int)self.effectiveExponent() << " (" + << std::bitset<4>(self.exponent) << ")"; + ss << " mantissa=" << self.effectiveMantissa() << " (" + << std::bitset<3>(self.mantissa) << ")"; ss << " float=" << ((float)self); ss << " at " << &self; ss << ">"; @@ -395,14 +420,16 @@ Add a vector to this index. index.def( "add_items", - [](Index &index, py::array_t vectors, std::optional> _ids, int num_threads) { + [](Index &index, py::array_t vectors, + std::optional> _ids, int num_threads) { std::vector empty; auto ndArray = pyArrayToNDArray(vectors); py::gil_scoped_release release; return index.addItems(ndArray, (_ids ? *_ids : empty), num_threads); }, - py::arg("vectors"), py::arg("ids") = py::none(), py::arg("num_threads") = -1, + py::arg("vectors"), py::arg("ids") = py::none(), + py::arg("num_threads") = -1, R"( Add multiple vectors to this index simultaneously. @@ -435,30 +462,38 @@ as passing a batch of vectors helps avoid Python's Global Interpreter Lock. //////////////////////////////////////////////////////////////////////////////////////////////////// index.def( "query", - [](Index &index, py::array_t input, size_t k = 1, int num_threads = -1, long queryEf = -1) { + [](Index &index, py::array_t input, size_t k = 1, + int num_threads = -1, long queryEf = -1) { int inputNDim = input.request().ndim; switch (inputNDim) { case 1: { - auto idsAndDistances = index.query(pyArrayToVector(input), k, queryEf); - std::tuple, py::array_t> output = { - vectorToPyArray(std::get<0>(idsAndDistances)), - vectorToPyArray(std::get<1>(idsAndDistances))}; + auto idsAndDistances = + index.query(pyArrayToVector(input), k, queryEf); + std::tuple, py::array_t> + output = {vectorToPyArray( + std::get<0>(idsAndDistances)), + vectorToPyArray(std::get<1>(idsAndDistances))}; return output; } case 2: { - auto idsAndDistances = index.query(pyArrayToNDArray(input), k, num_threads, queryEf); - std::tuple, py::array_t> output = { - ndArrayToPyArray(std::get<0>(idsAndDistances)), - ndArrayToPyArray(std::get<1>(idsAndDistances))}; + auto idsAndDistances = index.query(pyArrayToNDArray(input), + k, num_threads, queryEf); + std::tuple, py::array_t> + output = { + ndArrayToPyArray( + std::get<0>(idsAndDistances)), + ndArrayToPyArray(std::get<1>(idsAndDistances))}; return output; } default: - throw std::domain_error("query(...) expected one- or two-dimensional input data (either " - "a single query vector or multiple query vectors) but got " + - std::to_string(inputNDim) + " dimensions."); + throw std::domain_error( + "query(...) expected one- or two-dimensional input data (either " + "a single query vector or multiple query vectors) but got " + + std::to_string(inputNDim) + " dimensions."); } }, - py::arg("vectors"), py::arg("k") = 1, py::arg("num_threads") = -1, py::arg("query_ef") = -1, R"( + py::arg("vectors"), py::arg("k") = 1, py::arg("num_threads") = -1, + py::arg("query_ef") = -1, R"( Query this index to retrieve the ``k`` nearest neighbors of the provided vectors. Args: @@ -545,7 +580,8 @@ The number of vectors that this index searches through when inserting a new vect the index. Higher values make index construction slower, but give better recall. This parameter cannot be changed after the index is instantiated.)"); - index.def_property("max_elements", &Index::getMaxElements, &Index::resizeIndex, R"( + index.def_property("max_elements", &Index::getMaxElements, + &Index::resizeIndex, R"( The maximum number of elements that can be stored in this index. If :py:attr:`max_elements` is much larger than @@ -576,7 +612,8 @@ deleted from the index; those deleted elements simply become invisible.)"); index.def( "get_vector", [](Index &index, size_t _id) { - return ndArrayToPyArray(NDArray(index.getVector(_id), {(int)index.getNumDimensions()})); + return ndArrayToPyArray(NDArray( + index.getVector(_id), {(int)index.getNumDimensions()})); }, py::arg("id"), R"( Get the vector stored in this index at the provided integer ID. @@ -596,7 +633,9 @@ If no such vector exists, a :py:exc:`KeyError` will be thrown. index.def( "get_vectors", - [](Index &index, std::vector _ids) { return ndArrayToPyArray(index.getVectors(_ids)); }, + [](Index &index, std::vector _ids) { + return ndArrayToPyArray(index.getVectors(_ids)); + }, py::arg("ids"), R"( Get one or more vectors stored in this index at the provided integer IDs. If one or more of the provided IDs cannot be found in the index, a @@ -609,7 +648,10 @@ If one or more of the provided IDs cannot be found in the index, a )"); index.def_property_readonly( - "ids", [](Index &index) { return std::make_unique(index.getIDsMap()); }, + "ids", + [](Index &index) { + return std::make_unique(index.getIDsMap()); + }, R"( A set-like object containing the integer IDs stored as 'keys' in this index. @@ -629,7 +671,10 @@ specific integer ID in this index:: )"); index.def( - "get_distance", [](Index &index, std::vector a, std::vector b) { return index.getDistance(a, b); }, + "get_distance", + [](Index &index, std::vector a, std::vector b) { + return index.getDistance(a, b); + }, R"( Get the distance between two provided vectors. The vectors must share the dimensionality of the index. )", @@ -804,42 +849,53 @@ Use the ``len`` operator to call this method:: This value may differ from :py:attr:`num_elements` if elements have been deleted. )"); - register_index_class(m, "FloatIndex", - "An :py:class:`Index` that uses full-precision 32-bit floating-point " - "storage."); + register_index_class( + m, "FloatIndex", + "An :py:class:`Index` that uses full-precision 32-bit floating-point " + "storage."); // An int8 index that stores its values as 8-bit integers, assumes all // input/output data is float on [-1, 1], and returns floating-point // distances. - register_index_class>(m, "Float8Index", - "An :py:class:`Index` that uses fixed-point 8-bit storage."); + register_index_class>( + m, "Float8Index", + "An :py:class:`Index` that uses fixed-point 8-bit storage."); // An 8-bit floating-point index class that has even more reduced // precision over Float8, but allows values on the range [-448, 448]. // Inspired by: https://arxiv.org/pdf/2209.05433.pdf - register_index_class(m, "E4M3Index", "An :py:class:`Index` that uses floating-point 8-bit storage."); + register_index_class( + m, "E4M3Index", + "An :py:class:`Index` that uses floating-point 8-bit storage."); index.def_static( "__new__", - [](const py::object *, const SpaceType space, const int num_dimensions, const size_t M, - const size_t ef_construction, const size_t random_seed, const size_t max_elements, + [](const py::object *, const SpaceType space, const int num_dimensions, + const size_t M, const size_t ef_construction, const size_t random_seed, + const size_t max_elements, const StorageDataType storageDataType) -> std::shared_ptr { py::gil_scoped_release release; switch (storageDataType) { case StorageDataType::E4M3: - return std::make_shared>(space, num_dimensions, M, ef_construction, random_seed, - max_elements); + return std::make_shared>( + space, num_dimensions, M, ef_construction, random_seed, + max_elements); case StorageDataType::Float8: - return std::make_shared>>( - space, num_dimensions, M, ef_construction, random_seed, max_elements); + return std::make_shared< + TypedIndex>>( + space, num_dimensions, M, ef_construction, random_seed, + max_elements); case StorageDataType::Float32: - return std::make_shared>(space, num_dimensions, M, ef_construction, random_seed, - max_elements); + return std::make_shared>(space, num_dimensions, M, + ef_construction, + random_seed, max_elements); default: throw std::runtime_error("Unknown storage data type received!"); } }, - py::arg("cls"), py::arg("space"), py::arg("num_dimensions"), py::arg("M") = 12, py::arg("ef_construction") = 200, - py::arg("random_seed") = 1, py::arg("max_elements") = 1, py::arg("storage_data_type") = StorageDataType::Float32, + py::arg("cls"), py::arg("space"), py::arg("num_dimensions"), + py::arg("M") = 12, py::arg("ef_construction") = 200, + py::arg("random_seed") = 1, py::arg("max_elements") = 1, + py::arg("storage_data_type") = StorageDataType::Float32, R"( Create a new Voyager nearest-neighbor search index with the provided arguments. @@ -870,29 +926,35 @@ of Voyager prior to v1.3. index.def_static( "load", - [](const std::string filename, const SpaceType space, const int num_dimensions, + [](const std::string filename, const SpaceType space, + const int num_dimensions, const StorageDataType storageDataType) -> std::shared_ptr { py::gil_scoped_release release; auto inputStream = std::make_shared(filename); - std::unique_ptr metadata = voyager::Metadata::loadFromStream(inputStream); + std::unique_ptr metadata = + voyager::Metadata::loadFromStream(inputStream); if (metadata) { if (metadata->getStorageDataType() != storageDataType) { - throw std::domain_error("Provided storage data type (" + toString(storageDataType) + - ") does not match the data type used in this file (" + - toString(metadata->getStorageDataType()) + ")."); + throw std::domain_error( + "Provided storage data type (" + toString(storageDataType) + + ") does not match the data type used in this file (" + + toString(metadata->getStorageDataType()) + ")."); } if (metadata->getSpaceType() != space) { - throw std::domain_error("Provided space type (" + toString(space) + - ") does not match the space type used in this file (" + - toString(metadata->getSpaceType()) + ")."); + throw std::domain_error( + "Provided space type (" + toString(space) + + ") does not match the space type used in this file (" + + toString(metadata->getSpaceType()) + ")."); } if (metadata->getNumDimensions() != num_dimensions) { - throw std::domain_error("Provided number of dimensions (" + std::to_string(num_dimensions) + - ") does not match the number of dimensions used in this file " - "(" + - std::to_string(metadata->getNumDimensions()) + ")."); + throw std::domain_error( + "Provided number of dimensions (" + + std::to_string(num_dimensions) + + ") does not match the number of dimensions used in this file " + "(" + + std::to_string(metadata->getNumDimensions()) + ")."); } return loadTypedIndexFromMetadata(std::move(metadata), inputStream); @@ -900,11 +962,15 @@ of Voyager prior to v1.3. switch (storageDataType) { case StorageDataType::E4M3: - return std::make_shared>(inputStream, space, num_dimensions); + return std::make_shared>(inputStream, space, + num_dimensions); case StorageDataType::Float8: - return std::make_shared>>(inputStream, space, num_dimensions); + return std::make_shared< + TypedIndex>>(inputStream, space, + num_dimensions); case StorageDataType::Float32: - return std::make_shared>(inputStream, space, num_dimensions); + return std::make_shared>(inputStream, space, + num_dimensions); default: throw std::runtime_error("Unknown storage data type received!"); } @@ -917,52 +983,64 @@ of Voyager prior to v1.3. [](const std::string filename) -> std::shared_ptr { py::gil_scoped_release release; - return loadTypedIndexFromStream(std::make_shared(filename)); + return loadTypedIndexFromStream( + std::make_shared(filename)); }, py::arg("filename"), LOAD_DOCSTRING); index.def_static( "load", - [](const py::object filelike, const SpaceType space, const int num_dimensions, + [](const py::object filelike, const SpaceType space, + const int num_dimensions, const StorageDataType storageDataType) -> std::shared_ptr { if (!isReadableFileLike(filelike)) { - throw py::type_error("Expected either a filename or a file-like object (with " - "read, seek, seekable, and tell methods), but received: " + - filelike.attr("__repr__")().cast()); + throw py::type_error( + "Expected either a filename or a file-like object (with " + "read, seek, seekable, and tell methods), but received: " + + filelike.attr("__repr__")().cast()); } auto inputStream = std::make_shared(filelike); py::gil_scoped_release release; - std::unique_ptr metadata = voyager::Metadata::loadFromStream(inputStream); + std::unique_ptr metadata = + voyager::Metadata::loadFromStream(inputStream); if (metadata) { if (metadata->getStorageDataType() != storageDataType) { - throw std::domain_error("Provided storage data type (" + toString(storageDataType) + - ") does not match the data type used in this file (" + - toString(metadata->getStorageDataType()) + ")."); + throw std::domain_error( + "Provided storage data type (" + toString(storageDataType) + + ") does not match the data type used in this file (" + + toString(metadata->getStorageDataType()) + ")."); } if (metadata->getSpaceType() != space) { - throw std::domain_error("Provided space type (" + toString(space) + - ") does not match the space type used in this file (" + - toString(metadata->getSpaceType()) + ")."); + throw std::domain_error( + "Provided space type (" + toString(space) + + ") does not match the space type used in this file (" + + toString(metadata->getSpaceType()) + ")."); } if (metadata->getNumDimensions() != num_dimensions) { - throw std::domain_error("Provided number of dimensions (" + std::to_string(num_dimensions) + - ") does not match the number of dimensions used in this file " - "(" + - std::to_string(metadata->getNumDimensions()) + ")."); + throw std::domain_error( + "Provided number of dimensions (" + + std::to_string(num_dimensions) + + ") does not match the number of dimensions used in this file " + "(" + + std::to_string(metadata->getNumDimensions()) + ")."); } return loadTypedIndexFromMetadata(std::move(metadata), inputStream); } switch (storageDataType) { case StorageDataType::E4M3: - return std::make_shared>(inputStream, space, num_dimensions); + return std::make_shared>(inputStream, space, + num_dimensions); case StorageDataType::Float8: - return std::make_shared>>(inputStream, space, num_dimensions); + return std::make_shared< + TypedIndex>>(inputStream, space, + num_dimensions); case StorageDataType::Float32: - return std::make_shared>(inputStream, space, num_dimensions); + return std::make_shared>(inputStream, space, + num_dimensions); default: throw std::runtime_error("Unknown storage data type received!"); } @@ -974,9 +1052,10 @@ of Voyager prior to v1.3. "load", [](const py::object filelike) -> std::shared_ptr { if (!isReadableFileLike(filelike)) { - throw py::type_error("Expected either a filename or a file-like object (with " - "read, seek, seekable, and tell methods), but received: " + - filelike.attr("__repr__")().cast()); + throw py::type_error( + "Expected either a filename or a file-like object (with " + "read, seek, seekable, and tell methods), but received: " + + filelike.attr("__repr__")().cast()); } auto inputStream = std::make_shared(filelike); diff --git a/python/src/PythonInputStream.h b/python/src/PythonInputStream.h index e3ff35ec..2cb6a0ab 100644 --- a/python/src/PythonInputStream.h +++ b/python/src/PythonInputStream.h @@ -23,8 +23,8 @@ namespace py = pybind11; #include bool isReadableFileLike(py::object fileLike) { - return py::hasattr(fileLike, "read") && py::hasattr(fileLike, "seek") && py::hasattr(fileLike, "tell") && - py::hasattr(fileLike, "seekable"); + return py::hasattr(fileLike, "read") && py::hasattr(fileLike, "seek") && + py::hasattr(fileLike, "tell") && py::hasattr(fileLike, "seekable"); } /** @@ -74,8 +74,9 @@ class PythonInputStream : public InputStream, PythonFileLike { long long read(char *buffer, long long bytesToRead) { py::gil_scoped_acquire acquire; if (buffer == nullptr) { - throw py::buffer_error("C++ code attempted to read from a Python file-like object into a " - "null destination buffer."); + throw py::buffer_error( + "C++ code attempted to read from a Python file-like object into a " + "null destination buffer."); } if (bytesToRead < 0) { @@ -86,7 +87,8 @@ class PythonInputStream : public InputStream, PythonFileLike { long long bytesRead = 0; if (peekValue.size()) { - long long bytesToCopy = std::min(bytesToRead, (long long)peekValue.size()); + long long bytesToCopy = + std::min(bytesToRead, (long long)peekValue.size()); std::memcpy(buffer, peekValue.data(), bytesToCopy); for (int i = 0; i < bytesToCopy; i++) peekValue.erase(peekValue.begin()); @@ -95,15 +97,19 @@ class PythonInputStream : public InputStream, PythonFileLike { } while (bytesRead < bytesToRead) { - auto readResult = fileLike.attr("read")(std::min(MAX_BUFFER_SIZE, bytesToRead - bytesRead)); + auto readResult = fileLike.attr("read")( + std::min(MAX_BUFFER_SIZE, bytesToRead - bytesRead)); if (!py::isinstance(readResult)) { std::string message = "Python file-like object was expected to return " "bytes from its read(...) method, but " "returned " + - py::str(readResult.get_type().attr("__name__")).cast() + "."; + py::str(readResult.get_type().attr("__name__")) + .cast() + + "."; - if (py::hasattr(fileLike, "mode") && py::str(fileLike.attr("mode")).cast() == "r") { + if (py::hasattr(fileLike, "mode") && + py::str(fileLike.attr("mode")).cast() == "r") { message += " (Try opening the stream in \"rb\" mode instead of " "\"r\" mode if possible.)"; } @@ -116,8 +122,10 @@ class PythonInputStream : public InputStream, PythonFileLike { char *pythonBuffer = nullptr; py::ssize_t pythonLength = 0; - if (PYBIND11_BYTES_AS_STRING_AND_SIZE(bytesObject.ptr(), &pythonBuffer, &pythonLength)) { - throw py::buffer_error("Internal error: failed to read bytes from bytes object!"); + if (PYBIND11_BYTES_AS_STRING_AND_SIZE(bytesObject.ptr(), &pythonBuffer, + &pythonLength)) { + throw py::buffer_error( + "Internal error: failed to read bytes from bytes object!"); } if (!buffer && pythonLength > 0) { @@ -126,8 +134,10 @@ class PythonInputStream : public InputStream, PythonFileLike { } if (bytesRead + pythonLength > bytesToRead) { - throw py::buffer_error("Python returned " + std::to_string(pythonLength) + " bytes, but only " + - std::to_string(bytesToRead - bytesRead) + " bytes were requested."); + throw py::buffer_error( + "Python returned " + std::to_string(pythonLength) + + " bytes, but only " + std::to_string(bytesToRead - bytesRead) + + " bytes were requested."); } if (buffer && pythonLength > 0) { @@ -180,8 +190,10 @@ class PythonInputStream : public InputStream, PythonFileLike { peekValue.push_back(resultAsCharacters[3]); return result; } else { - throw std::runtime_error("Failed to peek " + std::to_string(sizeof(result)) + - " bytes from file-like object at index " + std::to_string(lastPosition) + "."); + throw std::runtime_error("Failed to peek " + + std::to_string(sizeof(result)) + + " bytes from file-like object at index " + + std::to_string(lastPosition) + "."); } } diff --git a/python/src/PythonOutputStream.h b/python/src/PythonOutputStream.h index 8b60256b..58ca658b 100644 --- a/python/src/PythonOutputStream.h +++ b/python/src/PythonOutputStream.h @@ -23,8 +23,8 @@ namespace py = pybind11; #include bool isWriteableFileLike(py::object fileLike) { - return py::hasattr(fileLike, "write") && py::hasattr(fileLike, "seek") && py::hasattr(fileLike, "tell") && - py::hasattr(fileLike, "seekable"); + return py::hasattr(fileLike, "write") && py::hasattr(fileLike, "seek") && + py::hasattr(fileLike, "tell") && py::hasattr(fileLike, "seekable"); } /** @@ -55,7 +55,9 @@ class PythonOutputStream : public OutputStream, public PythonFileLike { for (unsigned long long i = 0; i < numBytes; i += MAX_BUFFER_SIZE) { unsigned long long chunkSize = std::min(numBytes - i, MAX_BUFFER_SIZE); - int bytesWritten = fileLike.attr("write")(py::bytes((const char *)ptr, chunkSize)).cast(); + int bytesWritten = + fileLike.attr("write")(py::bytes((const char *)ptr, chunkSize)) + .cast(); if (bytesWritten < 0) return false; From 598e9e6b22d5f62f8b24cbd54645af2d40e8c9ce Mon Sep 17 00:00:00 2001 From: Mark Koh Date: Sun, 18 Aug 2024 20:21:50 -0600 Subject: [PATCH 22/25] Fix TypedIndex rebase --- cpp/src/TypedIndex.h | 33 ++++----------------------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/cpp/src/TypedIndex.h b/cpp/src/TypedIndex.h index c0f5debc..63066a40 100644 --- a/cpp/src/TypedIndex.h +++ b/cpp/src/TypedIndex.h @@ -53,29 +53,6 @@ template <> const std::string storageDataTypeName() { return "Float8"; } template <> const std::string storageDataTypeName() { return "Float32"; } template <> const std::string storageDataTypeName() { return "E4M3"; } -template -dist_t ensureNotNegative(dist_t distance, hnswlib::labeltype label) { - if constexpr (std::is_same_v) { - // Allow for a very slight negative distance if using E4M3 - if (distance < 0 && distance >= -0.14) { - return 0; - } - } - - if (distance < 0) { - if (distance >= -0.00001) { - return 0; - } - - throw std::runtime_error( - "Potential candidate (with label '" + std::to_string(label) + - "') had negative distance " + std::to_string(distance) + - ". This may indicate a corrupted index file."); - } - - return distance; -} - /** * A C++ wrapper class for a typed HNSW index. * @@ -402,7 +379,7 @@ class TypedIndex : public Index { floatToDataType(&inputArray[startIndex], &convertedArray[startIndex], actualDimensions); - size_t id = ids.size() ? ids.at(row) : (currentLabel + row); + size_t id = ids.size() ? ids.at(row) : (currentLabel.fetch_add(1)); try { algorithmImpl->addPoint(convertedArray.data() + startIndex, id); } catch (IndexFullError &e) { @@ -438,7 +415,7 @@ class TypedIndex : public Index { normalizeVector( &inputArray[startIndex], &normalizedArray[startIndex], actualDimensions); - size_t id = ids.size() ? ids.at(row) : (currentLabel + row); + size_t id = ids.size() ? ids.at(row) : (currentLabel.fetch_add(1)); try { algorithmImpl->addPoint(normalizedArray.data() + startIndex, id); @@ -629,8 +606,7 @@ class TypedIndex : public Index { dist_t distance = result_tuple.first; hnswlib::labeltype label = result_tuple.second; - distancePointer[row * k + i] = - ensureNotNegative(distance, label); + distancePointer[row * k + i] = distance; labelPointer[row * k + i] = label; result.pop(); } @@ -704,8 +680,7 @@ class TypedIndex : public Index { for (int i = k - 1; i >= 0; i--) { auto &result_tuple = result.top(); - distancePointer[i] = ensureNotNegative( - result_tuple.first, result_tuple.second); + distancePointer[i] = result_tuple.first; labelPointer[i] = result_tuple.second; result.pop(); } From 753175f4f871c8f94a30d42387a68295fc4cd286 Mon Sep 17 00:00:00 2001 From: Mark Koh Date: Sun, 18 Aug 2024 20:51:59 -0600 Subject: [PATCH 23/25] Add c++ tests to github actions --- .github/workflows/all.yml | 27 +++++++++++++++++++++------ .gitignore | 1 + CONTRIBUTING.md | 4 ++-- cpp/CMakeLists.txt | 13 +++++++++++++ cpp/test/CMakeLists.txt | 15 ++++++++++----- cpp/test/test_main.cpp | 6 ++++-- 6 files changed, 51 insertions(+), 15 deletions(-) diff --git a/.github/workflows/all.yml b/.github/workflows/all.yml index 19f00998..0d941b7a 100644 --- a/.github/workflows/all.yml +++ b/.github/workflows/all.yml @@ -228,18 +228,33 @@ jobs: # TODO: Switch back to macos-latest once https://github.com/actions/python-versions/pull/114 is fixed os: - 'ubuntu-latest' - - windows-latest + # TODO: Fix failing CMake build on windows: + # Error: `cl : command line error D8016: '/O2' and '/RTC1' command-line options are incompatible [D:\a\voyager\voyager\cpp\test\test.vcxproj]` + # I've tried passing CXX flags, but windows doesn't seem to respect disabling runtime checks with /RTC1 or disabling optimizations with /O2 + # - windows-latest - macos-12 name: Test C++ on ${{ matrix.os }} steps: - uses: actions/checkout@v3 with: submodules: recursive - - name: Install Linux dependencies - if: runner.os == 'Linux' - run: sudo apt-get update && sudo apt-get install -y pkg-config - - name: Build voyager locally - run: make test + - name: Install CMake (Windows) + if: matrix.os == 'windows-latest' + run: | + choco install cmake --installargs 'ADD_CMAKE_TO_PATH=System' + choco install ninja + - name: Install CMake (MacOS) + if: matrix.os == 'macos-12' + run: brew install cmake + - name: Install CMake (Ubuntu) + if: matrix.os == 'ubuntu-latest' + run: sudo apt-get install -y cmake + - name: Configure CMake + run: cmake . + - name: Build with CMake + run: make + - name: Run Tests (if any) + run: cat Makefile build-python-wheels: needs: [run-python-tests, run-python-tests-with-address-sanitizer] diff --git a/.gitignore b/.gitignore index e735e256..329081d5 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,4 @@ compile_commands.json CTestTestfile.cmake _deps DartConfiguration.tcl +VoyagerTests diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9d8bbc65..c06e7e97 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -115,8 +115,8 @@ To run the C++ tests, use the following commands: cd cpp git submodule update --init --recursive cmake . +make make test -./test/test ``` ## Style @@ -166,7 +166,7 @@ cd java mvn package ``` -this will update the java documentation located in [docs/java/](https://github.com/spotify/voyager/tree/main/docs/java). +This will update the java documentation located in [docs/java/](https://github.com/spotify/voyager/tree/main/docs/java). ## Issues When creating an issue please try to ahere to the following format: diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 72893357..7a4607bf 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -5,6 +5,17 @@ set(CMAKE_CXX_STANDARD 17) set(LLVM_CXX_STD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) +if (MSVC) + # Set /RTC1 only for Debug builds + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /RTC1") + + # Ensure /RTC1 is not used in Release builds + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2") + + # Optionally, remove /RTC1 from the global CXX flags to avoid conflicts + string(REGEX REPLACE "/RTC1" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +endif() + add_subdirectory(include) add_subdirectory(src) add_subdirectory(test) @@ -25,3 +36,5 @@ add_custom_target(format COMMAND ${FORMAT_COMMAND} `${FIND_COMMAND}` COMMENT "Running C++ formatter" ) + +enable_testing() diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 227e256c..c5598640 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -1,12 +1,17 @@ -add_executable(test doctest_setup.cpp test_main.cpp) +# List the test source files +set(TEST_FILES test_main.cpp doctest_setup.cpp) # Add any test files here -target_link_libraries(test +# Create an executable for the tests +add_executable(VoyagerTests ${TEST_FILES}) + +# Link the test executable with the main project and Doctest +# target_link_libraries(MyProjectTests PRIVATE MyProject doctest::doctest) +target_link_libraries(VoyagerTests PUBLIC VoyagerLib PRIVATE doctest ) -target_compile_options(test PRIVATE -O2 -g) - -include(CTest) +# Add the tests +add_test(NAME VoyagerTests COMMAND VoyagerTests) diff --git a/cpp/test/test_main.cpp b/cpp/test/test_main.cpp index f9cc4e6c..dbfa5dc1 100644 --- a/cpp/test/test_main.cpp +++ b/cpp/test/test_main.cpp @@ -22,11 +22,13 @@ TEST_CASE("Test combinations of different instantiations and sizes") { std::vector storageTypesSet = { StorageDataType::Float8, StorageDataType::Float32, StorageDataType::E4M3}; + auto count = 0; + for (auto spaceType : spaceTypesSet) { for (auto numDimensions : numDimensionsSet) { for (auto numElements : numElementsSet) { for (auto storageType : storageTypesSet) { - SUBCASE("Combination test") { + SUBCASE("Test instantiation ") { CAPTURE(spaceType); CAPTURE(numDimensions); CAPTURE(numElements); @@ -41,7 +43,7 @@ TEST_CASE("Test combinations of different instantiations and sizes") { testCombination(index, spaceType, numDimensions, storageType); } else if (storageType == StorageDataType::E4M3) { auto index = TypedIndex(spaceType, numDimensions); - testCombination(index, spaceType, 20, storageType); + testCombination(index, spaceType, numDimensions, storageType); } } } From 4f98c9350db9469a1558e6d54767e205077a9039 Mon Sep 17 00:00:00 2001 From: Mark Koh Date: Mon, 19 Aug 2024 08:16:41 -0600 Subject: [PATCH 24/25] Add build dir for cmake --- .github/workflows/all.yml | 74 ++++++++++++++++++--------------------- .gitignore | 4 ++- CONTRIBUTING.md | 5 +-- cpp/CMakeLists.txt | 3 +- cpp/Makefile | 15 ++++++++ cpp/build/.gitkeep | 0 cpp/test/CMakeLists.txt | 4 +++ 7 files changed, 60 insertions(+), 45 deletions(-) create mode 100644 cpp/Makefile create mode 100644 cpp/build/.gitkeep diff --git a/.github/workflows/all.yml b/.github/workflows/all.yml index 0d941b7a..394e7382 100644 --- a/.github/workflows/all.yml +++ b/.github/workflows/all.yml @@ -41,6 +41,41 @@ jobs: with: clang-format-version: 16 + run-cpp-tests: + runs-on: ${{ matrix.os }} + continue-on-error: true + defaults: + run: + working-directory: cpp + strategy: + matrix: + # TODO: Switch back to macos-latest once https://github.com/actions/python-versions/pull/114 is fixed + os: + - 'ubuntu-latest' + # TODO: Fix failing CMake build on windows: + # Error: `cl : command line error D8016: '/O2' and '/RTC1' command-line options are incompatible [D:\a\voyager\voyager\cpp\test\test.vcxproj]` + # I've tried passing CXX flags, but windows doesn't seem to respect disabling runtime checks with /RTC1 or disabling optimizations with /O2 + - windows-latest + - macos-12 + name: Test C++ on ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + - name: Install CMake (Windows) + if: matrix.os == 'windows-latest' + run: | + choco install cmake --installargs 'ADD_CMAKE_TO_PATH=System' + choco install ninja + - name: Install CMake (MacOS) + if: matrix.os == 'macos-12' + run: brew install cmake + - name: Install CMake (Ubuntu) + if: matrix.os == 'ubuntu-latest' + run: sudo apt-get install -y cmake + - name: Run tests + run: make test + run-java-tests: continue-on-error: true name: Test with Java ${{ matrix.java-version }} on ${{ matrix.os }} @@ -217,45 +252,6 @@ jobs: asv machine --yes asv continuous --sort name --no-only-changed refs/remotes/origin/main ${{ github.sha }} | tee >(sed '1,/All benchmarks:/d' > $GITHUB_STEP_SUMMARY) - run-cpp-tests: - runs-on: ${{ matrix.os }} - continue-on-error: true - defaults: - run: - working-directory: cpp - strategy: - matrix: - # TODO: Switch back to macos-latest once https://github.com/actions/python-versions/pull/114 is fixed - os: - - 'ubuntu-latest' - # TODO: Fix failing CMake build on windows: - # Error: `cl : command line error D8016: '/O2' and '/RTC1' command-line options are incompatible [D:\a\voyager\voyager\cpp\test\test.vcxproj]` - # I've tried passing CXX flags, but windows doesn't seem to respect disabling runtime checks with /RTC1 or disabling optimizations with /O2 - # - windows-latest - - macos-12 - name: Test C++ on ${{ matrix.os }} - steps: - - uses: actions/checkout@v3 - with: - submodules: recursive - - name: Install CMake (Windows) - if: matrix.os == 'windows-latest' - run: | - choco install cmake --installargs 'ADD_CMAKE_TO_PATH=System' - choco install ninja - - name: Install CMake (MacOS) - if: matrix.os == 'macos-12' - run: brew install cmake - - name: Install CMake (Ubuntu) - if: matrix.os == 'ubuntu-latest' - run: sudo apt-get install -y cmake - - name: Configure CMake - run: cmake . - - name: Build with CMake - run: make - - name: Run Tests (if any) - run: cat Makefile - build-python-wheels: needs: [run-python-tests, run-python-tests-with-address-sanitizer] runs-on: ${{ matrix.os }} diff --git a/.gitignore b/.gitignore index 329081d5..f5287bce 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.egg-info/ -build/ +*/build/* +!build/.gitkeep dist/ tmp/ __pycache__/ @@ -26,6 +27,7 @@ CMakeFiles CMakeScripts Testing Makefile +!cpp/Makefile cmake_install.cmake install_manifest.txt compile_commands.json diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c06e7e97..2ef187e9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -67,8 +67,7 @@ To build the C++ library with `cmake`, use the following commands: ```shell cd cpp git submodule update --init --recursive -cmake . -make +make build ``` ## Testing @@ -114,8 +113,6 @@ To run the C++ tests, use the following commands: ```shell cd cpp git submodule update --init --recursive -cmake . -make make test ``` diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7a4607bf..4208d3eb 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -16,6 +16,8 @@ if (MSVC) string(REGEX REPLACE "/RTC1" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endif() +enable_testing() + add_subdirectory(include) add_subdirectory(src) add_subdirectory(test) @@ -37,4 +39,3 @@ add_custom_target(format COMMENT "Running C++ formatter" ) -enable_testing() diff --git a/cpp/Makefile b/cpp/Makefile new file mode 100644 index 00000000..02c3ab16 --- /dev/null +++ b/cpp/Makefile @@ -0,0 +1,15 @@ +default_target: build + +BUILD_DIR := build + +cmake: + cmake -S . -B $(BUILD_DIR) + +build: cmake + cmake --build ${BUILD_DIR} + +test: build + cmake --build ${BUILD_DIR} --target test + +clean: + rm -rf ${BUILD_DIR}/* \ No newline at end of file diff --git a/cpp/build/.gitkeep b/cpp/build/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index c5598640..adde8751 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -15,3 +15,7 @@ target_link_libraries(VoyagerTests # Add the tests add_test(NAME VoyagerTests COMMAND VoyagerTests) + +# Discover tests using Doctest +include(${CMAKE_SOURCE_DIR}/include/doctest/scripts/cmake/doctest.cmake) +doctest_discover_tests(VoyagerTests) From 1f6c0b19ac1e51f735f4b58674dcfd4210894b59 Mon Sep 17 00:00:00 2001 From: Mark Koh Date: Mon, 19 Aug 2024 16:00:21 -0600 Subject: [PATCH 25/25] Fix windows tests --- .github/workflows/all.yml | 4 ---- cpp/CMakeLists.txt | 12 ------------ cpp/Makefile | 2 +- cpp/src/E4M3.h | 1 + cpp/src/Enums.h | 2 ++ cpp/test/CMakeLists.txt | 3 --- 6 files changed, 4 insertions(+), 20 deletions(-) diff --git a/.github/workflows/all.yml b/.github/workflows/all.yml index 394e7382..505e2fc0 100644 --- a/.github/workflows/all.yml +++ b/.github/workflows/all.yml @@ -52,9 +52,6 @@ jobs: # TODO: Switch back to macos-latest once https://github.com/actions/python-versions/pull/114 is fixed os: - 'ubuntu-latest' - # TODO: Fix failing CMake build on windows: - # Error: `cl : command line error D8016: '/O2' and '/RTC1' command-line options are incompatible [D:\a\voyager\voyager\cpp\test\test.vcxproj]` - # I've tried passing CXX flags, but windows doesn't seem to respect disabling runtime checks with /RTC1 or disabling optimizations with /O2 - windows-latest - macos-12 name: Test C++ on ${{ matrix.os }} @@ -66,7 +63,6 @@ jobs: if: matrix.os == 'windows-latest' run: | choco install cmake --installargs 'ADD_CMAKE_TO_PATH=System' - choco install ninja - name: Install CMake (MacOS) if: matrix.os == 'macos-12' run: brew install cmake diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 4208d3eb..720f3d43 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -5,17 +5,6 @@ set(CMAKE_CXX_STANDARD 17) set(LLVM_CXX_STD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -if (MSVC) - # Set /RTC1 only for Debug builds - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /RTC1") - - # Ensure /RTC1 is not used in Release builds - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2") - - # Optionally, remove /RTC1 from the global CXX flags to avoid conflicts - string(REGEX REPLACE "/RTC1" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") -endif() - enable_testing() add_subdirectory(include) @@ -38,4 +27,3 @@ add_custom_target(format COMMAND ${FORMAT_COMMAND} `${FIND_COMMAND}` COMMENT "Running C++ formatter" ) - diff --git a/cpp/Makefile b/cpp/Makefile index 02c3ab16..62b1dc73 100644 --- a/cpp/Makefile +++ b/cpp/Makefile @@ -9,7 +9,7 @@ build: cmake cmake --build ${BUILD_DIR} test: build - cmake --build ${BUILD_DIR} --target test + ctest --test-dir ${BUILD_DIR} clean: rm -rf ${BUILD_DIR}/* \ No newline at end of file diff --git a/cpp/src/E4M3.h b/cpp/src/E4M3.h index 37f6b523..ed241efd 100644 --- a/cpp/src/E4M3.h +++ b/cpp/src/E4M3.h @@ -21,6 +21,7 @@ #pragma once #include +#include static constexpr float ALL_E4M3_VALUES[256] = { 0, diff --git a/cpp/src/Enums.h b/cpp/src/Enums.h index 885dada4..b0f3377c 100644 --- a/cpp/src/Enums.h +++ b/cpp/src/Enums.h @@ -1,5 +1,7 @@ #pragma once +#include + /** * The space (i.e. distance metric) to use for searching. */ diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index adde8751..a46805c1 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -13,9 +13,6 @@ target_link_libraries(VoyagerTests doctest ) -# Add the tests -add_test(NAME VoyagerTests COMMAND VoyagerTests) - # Discover tests using Doctest include(${CMAKE_SOURCE_DIR}/include/doctest/scripts/cmake/doctest.cmake) doctest_discover_tests(VoyagerTests)