Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: propogate index and record fixtures through async tests #69

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/aerospike_vector_search/aio/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ async def index_get_status(

Note:
This method retrieves the status of the specified index. If index_get_status is called the vector client puts some records into Aerospike Vector Search,
the records may not immediately begin to merge into the index. To wait for all records to be merged into an index, use vector_client.wait_for_index_completion.
the records may not immediately begin to merge into the index.

Warning: This API is subject to change.
"""
Expand Down
27 changes: 21 additions & 6 deletions src/aerospike_vector_search/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,21 +528,36 @@ class HnswParams(object):
"""
Parameters for the Hierarchical Navigable Small World (HNSW) algorithm, used for approximate nearest neighbor search.

:param m: The number of bi-directional links created per level during construction. Larger 'm' values lead to higher recall but slower construction. Defaults to 16.
:param m: The number of bi-directional links created per level during construction. Larger 'm' values lead to higher recall but slower construction. Optional, Defaults to 16.
:type m: Optional[int]

:param ef_construction: The size of the dynamic list for the nearest neighbors (candidates) during the index construction. Larger 'ef_construction' values lead to higher recall but slower construction. Defaults to 100.
:param ef_construction: The size of the dynamic list for the nearest neighbors (candidates) during the index construction. Larger 'ef_construction' values lead to higher recall but slower construction. Optional, Defaults to 100.
:type ef_construction: Optional[int]

:param ef: The size of the dynamic list for the nearest neighbors (candidates) during the search phase. Larger 'ef' values lead to higher recall but slower search. Defaults to 100.
:param ef: The size of the dynamic list for the nearest neighbors (candidates) during the search phase. Larger 'ef' values lead to higher recall but slower search. Optional, Defaults to 100.
:type ef: Optional[int]

:param batching_params: Parameters related to configuring batch processing, such as the maximum number of records per batch and batching interval. Defaults to HnswBatchingParams().
:type batching_params: Optional[HnswBatchingParams]
:param batching_params: Parameters related to configuring batch processing, such as the maximum number of records per batch and batching interval. Optional, Defaults to HnswBatchingParams().
:type batching_params: HnswBatchingParams

:param enable_vector_integrity_check: Verifies if the underlying vector has changed before returning the kANN result.
:param max_mem_queue_size: Maximum size of in-memory queue for inserted/updated vector records. Optional, Defaults to the corresponding config on the AVS Server.
:type max_mem_queue_size: Optional[int]

:param index_caching_params: Parameters related to configuring caching for the HNSW index. Optional, Defaults to HnswCachingParams().
:type index_caching_params: HnswCachingParams

:param healer_params: Parameters related to configuring the HNSW index healer. Optional, Defaults to HnswHealerParams().
:type healer_params: HnswHealerParams

:param merge_params: Parameters related to configuring the merging of index records. Optional, Defaults to HnswIndexMergeParams().
:type merge_params: HnswIndexMergeParams

:param enable_vector_integrity_check: Verifies if the underlying vector has changed before returning the kANN result. Optional, Defaults to True.
:type enable_vector_integrity_check: Optional[bool]

:param record_caching_params: Parameters related to configuring caching for vector records. Optional, Defaults to HnswCachingParams().
:type record_caching_params: HnswCachingParams

"""

def __init__(
Expand Down
23 changes: 23 additions & 0 deletions tests/standard/aio/aio_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import asyncio


async def drop_specified_index(admin_client, namespace, name):
await admin_client.index_drop(namespace=namespace, name=name)


def gen_records(count: int, vec_bin: str, vec_dim: int):
num = 0
while num < count:
Expand All @@ -10,3 +14,22 @@ def gen_records(count: int, vec_bin: str, vec_dim: int):
)
yield key_and_rec
num += 1


async def wait_for_index(admin_client, namespace: str, index: str):

verticies = 0
unmerged_recs = 0

while verticies == 0 or unmerged_recs > 0:
status = await admin_client.index_get_status(
namespace=namespace,
name=index,
)

verticies = status.index_healer_vertices_valid
unmerged_recs = status.unmerged_record_count

# print(verticies)
# print(unmerged_recs)
await asyncio.sleep(0.5)
72 changes: 61 additions & 11 deletions tests/standard/aio/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,24 @@
import pytest
import random
import string
import grpc

from aerospike_vector_search.aio import Client
from aerospike_vector_search.aio.admin import Client as AdminClient
from aerospike_vector_search import types
from aerospike_vector_search import types, AVSServerError

from .aio_utils import gen_records
import utils

#import logging
#logger = logging.getLogger(__name__)
#logging.basicConfig(filename='example.log', encoding='utf-8', level=logging.DEBUG)


# default test values
DEFAULT_NAMESPACE = "test"
DEFAULT_INDEX_DIMENSION = 128
DEFAULT_VECTOR_FIELD = "vector"
DEFAULT_NAMESPACE = utils.DEFAULT_NAMESPACE
DEFAULT_INDEX_DIMENSION = utils.DEFAULT_INDEX_DIMENSION
DEFAULT_VECTOR_FIELD = utils.DEFAULT_VECTOR_FIELD
DEFAULT_INDEX_ARGS = {
"namespace": DEFAULT_NAMESPACE,
"vector_field": DEFAULT_VECTOR_FIELD,
Expand Down Expand Up @@ -199,14 +201,37 @@ def index_name():

@pytest.fixture(params=[DEFAULT_INDEX_ARGS])
async def index(session_admin_client, index_name, request):
index_args = request.param
args = request.param
namespace = args.get("namespace", DEFAULT_NAMESPACE)
vector_field = args.get("vector_field", DEFAULT_VECTOR_FIELD)
dimensions = args.get("dimensions", DEFAULT_INDEX_DIMENSION)
await session_admin_client.index_create(
name = index_name,
**index_args,
namespace = namespace,
vector_field = vector_field,
dimensions = dimensions,
index_params=types.HnswParams(
batching_params=types.HnswBatchingParams(
# 10_000 is the minimum value, in order for the tests to run as
# fast as possible we set it to the minimum value so records are indexed
# quickly
index_interval=10_000,
),
healer_params=types.HnswHealerParams(
# run the healer every second
# for fast indexing
schedule="* * * * * ?"
)
)
)
yield index_name
namespace = index_args.get("namespace", DEFAULT_NAMESPACE)
await session_admin_client.index_drop(namespace=namespace, name=index_name)
try:
await session_admin_client.index_drop(namespace=namespace, name=index_name)
except AVSServerError as se:
if se.rpc_error.code() != grpc.StatusCode.NOT_FOUND:
pass
else:
raise


@pytest.fixture(params=[DEFAULT_RECORDS_ARGS])
Expand All @@ -217,10 +242,35 @@ async def records(session_vector_client, request):
num_records = args.get("num_records", DEFAULT_NUM_RECORDS)
vector_field = args.get("vector_field", DEFAULT_VECTOR_FIELD)
dimensions = args.get("dimensions", DEFAULT_INDEX_DIMENSION)
set_name = args.get("set_name", None)
keys = []
for key, rec in record_generator(count=num_records, vec_bin=vector_field, vec_dim=dimensions):
await session_vector_client.upsert(namespace=namespace, key=key, record_data=rec)
await session_vector_client.upsert(
namespace=namespace,
key=key,
record_data=rec,
set_name=set_name,
)
keys.append(key)
yield len(keys)
yield keys
for key in keys:
await session_vector_client.delete(key=key, namespace=namespace)
await session_vector_client.delete(key=key, namespace=namespace)


@pytest.fixture(params=[DEFAULT_RECORDS_ARGS])
async def record(session_vector_client, request):
args = request.param
record_generator = args.get("record_generator", DEFAULT_RECORD_GENERATOR)
namespace = args.get("namespace", DEFAULT_NAMESPACE)
vector_field = args.get("vector_field", DEFAULT_VECTOR_FIELD)
dimensions = args.get("dimensions", DEFAULT_INDEX_DIMENSION)
set_name = args.get("set_name", None)
key, rec = next(record_generator(count=1, vec_bin=vector_field, vec_dim=dimensions))
await session_vector_client.upsert(
namespace=namespace,
key=key,
record_data=rec,
set_name=set_name,
)
yield key
await session_vector_client.delete(key=key, namespace=namespace)
38 changes: 19 additions & 19 deletions tests/standard/aio/test_admin_client_index_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from aerospike_vector_search import types, AVSServerError
import grpc

from ...utils import random_name
from ...utils import random_name, DEFAULT_NAMESPACE

from .aio_utils import drop_specified_index
from hypothesis import given, settings, Verbosity, Phase
Expand Down Expand Up @@ -51,7 +51,7 @@ def __init__(
"test_case",
[
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_1",
dimensions=1024,
vector_distance_metric=None,
Expand Down Expand Up @@ -105,7 +105,7 @@ async def test_index_create(session_admin_client, test_case, random_name):
"test_case",
[
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_2",
dimensions=495,
vector_distance_metric=None,
Expand All @@ -116,7 +116,7 @@ async def test_index_create(session_admin_client, test_case, random_name):
timeout=None,
),
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_3",
dimensions=2048,
vector_distance_metric=None,
Expand Down Expand Up @@ -174,7 +174,7 @@ async def test_index_create_with_dimnesions(
"test_case",
[
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_4",
dimensions=1024,
vector_distance_metric=types.VectorDistanceMetric.COSINE,
Expand All @@ -185,7 +185,7 @@ async def test_index_create_with_dimnesions(
timeout=None,
),
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_5",
dimensions=1024,
vector_distance_metric=types.VectorDistanceMetric.DOT_PRODUCT,
Expand All @@ -196,7 +196,7 @@ async def test_index_create_with_dimnesions(
timeout=None,
),
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_6",
dimensions=1024,
vector_distance_metric=types.VectorDistanceMetric.MANHATTAN,
Expand All @@ -207,7 +207,7 @@ async def test_index_create_with_dimnesions(
timeout=None,
),
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_7",
dimensions=1024,
vector_distance_metric=types.VectorDistanceMetric.HAMMING,
Expand Down Expand Up @@ -262,7 +262,7 @@ async def test_index_create_with_vector_distance_metric(
"test_case",
[
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_8",
dimensions=1024,
vector_distance_metric=None,
Expand All @@ -273,7 +273,7 @@ async def test_index_create_with_vector_distance_metric(
timeout=None,
),
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_9",
dimensions=1024,
vector_distance_metric=None,
Expand Down Expand Up @@ -326,7 +326,7 @@ async def test_index_create_with_sets(session_admin_client, test_case, random_na
"test_case",
[
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_10",
dimensions=1024,
vector_distance_metric=None,
Expand All @@ -342,7 +342,7 @@ async def test_index_create_with_sets(session_admin_client, test_case, random_na
timeout=None,
),
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_11",
dimensions=1024,
vector_distance_metric=None,
Expand All @@ -358,7 +358,7 @@ async def test_index_create_with_sets(session_admin_client, test_case, random_na
timeout=None,
),
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_12",
dimensions=1024,
vector_distance_metric=None,
Expand All @@ -372,7 +372,7 @@ async def test_index_create_with_sets(session_admin_client, test_case, random_na
timeout=None,
),
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_13",
dimensions=1024,
vector_distance_metric=None,
Expand All @@ -386,7 +386,7 @@ async def test_index_create_with_sets(session_admin_client, test_case, random_na
timeout=None,
),
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_20",
dimensions=1024,
vector_distance_metric=None,
Expand Down Expand Up @@ -473,7 +473,7 @@ async def test_index_create_with_index_params(
"test_case",
[
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_14",
dimensions=1024,
vector_distance_metric=None,
Expand Down Expand Up @@ -527,14 +527,14 @@ async def test_index_create_index_labels(session_admin_client, test_case, random
"test_case",
[
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_15",
dimensions=1024,
vector_distance_metric=None,
sets=None,
index_params=None,
index_labels=None,
index_storage=types.IndexStorage(namespace="test", set_name="foo"),
index_storage=types.IndexStorage(namespace=DEFAULT_NAMESPACE, set_name="foo"),
timeout=None,
),
],
Expand Down Expand Up @@ -578,7 +578,7 @@ async def test_index_create_index_storage(session_admin_client, test_case, rando
"test_case",
[
index_create_test_case(
namespace="test",
namespace=DEFAULT_NAMESPACE,
vector_field="example_16",
dimensions=1024,
vector_distance_metric=None,
Expand Down
Loading
Loading