Skip to content

Commit

Permalink
feat: AI-powered search changes for v1.11 (#1742)
Browse files Browse the repository at this point in the history
  • Loading branch information
mdubus authored Oct 10, 2024
1 parent 1f5be1b commit 4662933
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 79 deletions.
11 changes: 10 additions & 1 deletion src/types/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ export type SearchForFacetValuesResponse = {
};

export type HybridSearch = {
embedder?: string;
embedder: string;
semanticRatio?: number;
};

Expand Down Expand Up @@ -389,6 +389,8 @@ export type OpenAiEmbedder = {
dimensions?: number;
distribution?: Distribution;
url?: string;
documentTemplateMaxBytes?: number;
binaryQuantized?: boolean;
};

export type HuggingFaceEmbedder = {
Expand All @@ -397,12 +399,15 @@ export type HuggingFaceEmbedder = {
revision?: string;
documentTemplate?: string;
distribution?: Distribution;
documentTemplateMaxBytes?: number;
binaryQuantized?: boolean;
};

export type UserProvidedEmbedder = {
source: "userProvided";
dimensions: number;
distribution?: Distribution;
binaryQuantized?: boolean;
};

export type RestEmbedder = {
Expand All @@ -415,6 +420,8 @@ export type RestEmbedder = {
request: Record<string, any>;
response: Record<string, any>;
headers?: Record<string, string>;
documentTemplateMaxBytes?: number;
binaryQuantized?: boolean;
};

export type OllamaEmbedder = {
Expand All @@ -425,6 +432,8 @@ export type OllamaEmbedder = {
documentTemplate?: string;
distribution?: Distribution;
dimensions?: number;
documentTemplateMaxBytes?: number;
binaryQuantized?: boolean;
};

export type Embedder =
Expand Down
10 changes: 6 additions & 4 deletions tests/__snapshots__/settings.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,9 @@ exports[`Test on settings > Admin key: Update embedders settings 1`] = `
"distinctAttribute": null,
"embedders": {
"default": {
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}
{% endfor %}",
"documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}
{% endif %}{% endfor %}",
"documentTemplateMaxBytes": 400,
"model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
"source": "huggingFace",
},
Expand Down Expand Up @@ -804,8 +805,9 @@ exports[`Test on settings > Master key: Update embedders settings 1`] = `
"distinctAttribute": null,
"embedders": {
"default": {
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}
{% endfor %}",
"documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}
{% endif %}{% endfor %}",
"documentTemplateMaxBytes": 400,
"model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
"source": "huggingFace",
},
Expand Down
63 changes: 63 additions & 0 deletions tests/embedders.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])(
mean: 0.7,
sigma: 0.3,
},
binaryQuantized: false,
},
};
const task: EnqueuedTask = await client
Expand All @@ -101,6 +102,7 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])(
const response: Embedders = await client.index(index.uid).getEmbedders();

expect(response).toEqual(newEmbedder);
expect(response).not.toHaveProperty("documentTemplateMaxBytes");
});

test(`${permission} key: Update embedders with 'openAi' source`, async () => {
Expand All @@ -118,6 +120,8 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])(
sigma: 0.3,
},
url: "https://api.openai.com/v1/embeddings",
documentTemplateMaxBytes: 500,
binaryQuantized: false,
},
};
const task: EnqueuedTask = await client
Expand Down Expand Up @@ -147,6 +151,8 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])(
mean: 0.7,
sigma: 0.3,
},
documentTemplateMaxBytes: 500,
binaryQuantized: false,
},
};
const task: EnqueuedTask = await client
Expand Down Expand Up @@ -188,6 +194,8 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])(
headers: {
"Custom-Header": "CustomValue",
},
documentTemplateMaxBytes: 500,
binaryQuantized: false,
},
};
const task: EnqueuedTask = await client
Expand Down Expand Up @@ -219,6 +227,8 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])(
sigma: 0.3,
},
dimensions: 512,
documentTemplateMaxBytes: 500,
binaryQuantized: false,
},
};
const task: EnqueuedTask = await client
Expand Down Expand Up @@ -266,6 +276,58 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])(
expect(response).toEqual(null);
});

test(`${permission} key: search (POST) with vectors`, async () => {
const client = await getClient(permission);

const { taskUid } = await client.index(index.uid).updateEmbedders({
default: {
source: "userProvided",
dimensions: 1,
},
});
await client.waitForTask(taskUid);

const response = await client.index(index.uid).search("", {
vector: [1],
hybrid: {
embedder: "default",
semanticRatio: 1.0,
},
});

expect(response).toHaveProperty("hits");
expect(response).toHaveProperty("semanticHitCount");
// Those fields are no longer returned by the search response
// We want to ensure that they don't appear in it anymore
expect(response).not.toHaveProperty("vector");
expect(response).not.toHaveProperty("_semanticScore");
});

test(`${permission} key: search (GET) with vectors`, async () => {
const client = await getClient(permission);

const { taskUid } = await client.index(index.uid).updateEmbedders({
default: {
source: "userProvided",
dimensions: 1,
},
});
await client.waitForTask(taskUid);

const response = await client.index(index.uid).searchGet("", {
vector: [1],
hybridEmbedder: "default",
hybridSemanticRatio: 1.0,
});

expect(response).toHaveProperty("hits");
expect(response).toHaveProperty("semanticHitCount");
// Those fields are no longer returned by the search response
// We want to ensure that they don't appear in it anymore
expect(response).not.toHaveProperty("vector");
expect(response).not.toHaveProperty("_semanticScore");
});

test(`${permission} key: search for similar documents`, async () => {
const client = await getClient(permission);

Expand All @@ -288,6 +350,7 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])(
await client.waitForTask(documentAdditionTask);

const response = await client.index(index.uid).searchSimilarDocuments({
embedder: "manual",
id: "143",
});

Expand Down
35 changes: 0 additions & 35 deletions tests/get_search.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -457,41 +457,6 @@ describe.each([
"The filter query parameter should be in string format when using searchGet",
);
});
test(`${permission} key: search with vectors`, async () => {
const client = await getClient(permission);
const adminClient = await getClient("Admin");
const adminKey = await getKey("Admin");

await fetch(`${HOST}/experimental-features`, {
body: JSON.stringify({ vectorStore: true }),
headers: {
Authorization: `Bearer ${adminKey}`,
"Content-Type": "application/json",
},
method: "PATCH",
});

const { taskUid } = await adminClient
.index(emptyIndex.uid)
.updateEmbedders({
default: {
source: "userProvided",
dimensions: 1,
},
});
await adminClient.waitForTask(taskUid);

const response = await client
.index(emptyIndex.uid)
.searchGet("", { vector: [1], hybridSemanticRatio: 1.0 });

expect(response).toHaveProperty("hits");
expect(response).toHaveProperty("semanticHitCount");
// Those fields are no longer returned by the search response
// We want to ensure that they don't appear in it anymore
expect(response).not.toHaveProperty("vector");
expect(response).not.toHaveProperty("_semanticScore");
});

test(`${permission} key: search without vectors`, async () => {
const client = await getClient(permission);
Expand Down
39 changes: 0 additions & 39 deletions tests/search.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -936,45 +936,6 @@ describe.each([
expect(response.hits.length).toEqual(0);
});

test(`${permission} key: search with vectors`, async () => {
const client = await getClient(permission);
const adminClient = await getClient("Admin");
const adminKey = await getKey("Admin");

await fetch(`${HOST}/experimental-features`, {
body: JSON.stringify({ vectorStore: true }),
headers: {
Authorization: `Bearer ${adminKey}`,
"Content-Type": "application/json",
},
method: "PATCH",
});

const { taskUid } = await adminClient
.index(emptyIndex.uid)
.updateEmbedders({
default: {
source: "userProvided",
dimensions: 1,
},
});
await adminClient.waitForTask(taskUid);

const response = await client.index(emptyIndex.uid).search("", {
vector: [1],
hybrid: {
semanticRatio: 1.0,
},
});

expect(response).toHaveProperty("hits");
expect(response).toHaveProperty("semanticHitCount");
// Those fields are no longer returned by the search response
// We want to ensure that they don't appear in it anymore
expect(response).not.toHaveProperty("vector");
expect(response).not.toHaveProperty("_semanticScore");
});

test(`${permission} key: search without vectors`, async () => {
const client = await getClient(permission);
const response = await client.index(index.uid).search("prince", {});
Expand Down

0 comments on commit 4662933

Please sign in to comment.