Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: AI-powered search changes for v1.11 #1742

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion src/types/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ export type SearchForFacetValuesResponse = {
};

export type HybridSearch = {
embedder?: string;
embedder: string;
semanticRatio?: number;
};

Expand Down Expand Up @@ -389,6 +389,8 @@ export type OpenAiEmbedder = {
dimensions?: number;
distribution?: Distribution;
url?: string;
documentTemplateMaxBytes?: number;
binaryQuantized?: boolean;
};

export type HuggingFaceEmbedder = {
Expand All @@ -397,12 +399,15 @@ export type HuggingFaceEmbedder = {
revision?: string;
documentTemplate?: string;
distribution?: Distribution;
documentTemplateMaxBytes?: number;
binaryQuantized?: boolean;
};

export type UserProvidedEmbedder = {
source: "userProvided";
dimensions: number;
distribution?: Distribution;
binaryQuantized?: boolean;
};

export type RestEmbedder = {
Expand All @@ -415,6 +420,8 @@ export type RestEmbedder = {
request: Record<string, any>;
response: Record<string, any>;
headers?: Record<string, string>;
documentTemplateMaxBytes?: number;
binaryQuantized?: boolean;
};

export type OllamaEmbedder = {
Expand All @@ -425,6 +432,8 @@ export type OllamaEmbedder = {
documentTemplate?: string;
distribution?: Distribution;
dimensions?: number;
documentTemplateMaxBytes?: number;
binaryQuantized?: boolean;
};

export type Embedder =
Expand Down
10 changes: 6 additions & 4 deletions tests/__snapshots__/settings.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,9 @@ exports[`Test on settings > Admin key: Update embedders settings 1`] = `
"distinctAttribute": null,
"embedders": {
"default": {
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}
{% endfor %}",
"documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}
{% endif %}{% endfor %}",
"documentTemplateMaxBytes": 400,
"model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
"source": "huggingFace",
},
Expand Down Expand Up @@ -804,8 +805,9 @@ exports[`Test on settings > Master key: Update embedders settings 1`] = `
"distinctAttribute": null,
"embedders": {
"default": {
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}
{% endfor %}",
"documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}
{% endif %}{% endfor %}",
"documentTemplateMaxBytes": 400,
"model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
"source": "huggingFace",
},
Expand Down
63 changes: 63 additions & 0 deletions tests/embedders.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])(
mean: 0.7,
sigma: 0.3,
},
binaryQuantized: false,
},
};
const task: EnqueuedTask = await client
Expand All @@ -101,6 +102,7 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])(
const response: Embedders = await client.index(index.uid).getEmbedders();

expect(response).toEqual(newEmbedder);
expect(response).not.toHaveProperty("documentTemplateMaxBytes");
});

test(`${permission} key: Update embedders with 'openAi' source`, async () => {
Expand All @@ -118,6 +120,8 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])(
sigma: 0.3,
},
url: "https://api.openai.com/v1/embeddings",
documentTemplateMaxBytes: 500,
binaryQuantized: false,
},
};
const task: EnqueuedTask = await client
Expand Down Expand Up @@ -147,6 +151,8 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])(
mean: 0.7,
sigma: 0.3,
},
documentTemplateMaxBytes: 500,
binaryQuantized: false,
},
};
const task: EnqueuedTask = await client
Expand Down Expand Up @@ -188,6 +194,8 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])(
headers: {
"Custom-Header": "CustomValue",
},
documentTemplateMaxBytes: 500,
binaryQuantized: false,
},
};
const task: EnqueuedTask = await client
Expand Down Expand Up @@ -219,6 +227,8 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])(
sigma: 0.3,
},
dimensions: 512,
documentTemplateMaxBytes: 500,
binaryQuantized: false,
},
};
const task: EnqueuedTask = await client
Expand Down Expand Up @@ -266,6 +276,58 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])(
expect(response).toEqual(null);
});

test(`${permission} key: search (POST) with vectors`, async () => {
const client = await getClient(permission);

const { taskUid } = await client.index(index.uid).updateEmbedders({
default: {
source: "userProvided",
dimensions: 1,
},
});
await client.waitForTask(taskUid);

const response = await client.index(index.uid).search("", {
vector: [1],
hybrid: {
embedder: "default",
semanticRatio: 1.0,
},
});

expect(response).toHaveProperty("hits");
expect(response).toHaveProperty("semanticHitCount");
// Those fields are no longer returned by the search response
// We want to ensure that they don't appear in it anymore
expect(response).not.toHaveProperty("vector");
expect(response).not.toHaveProperty("_semanticScore");
});

test(`${permission} key: search (GET) with vectors`, async () => {
const client = await getClient(permission);

const { taskUid } = await client.index(index.uid).updateEmbedders({
default: {
source: "userProvided",
dimensions: 1,
},
});
await client.waitForTask(taskUid);

const response = await client.index(index.uid).searchGet("", {
vector: [1],
hybridEmbedder: "default",
hybridSemanticRatio: 1.0,
});

expect(response).toHaveProperty("hits");
expect(response).toHaveProperty("semanticHitCount");
// Those fields are no longer returned by the search response
// We want to ensure that they don't appear in it anymore
expect(response).not.toHaveProperty("vector");
expect(response).not.toHaveProperty("_semanticScore");
});

test(`${permission} key: search for similar documents`, async () => {
const client = await getClient(permission);

Expand All @@ -288,6 +350,7 @@ describe.each([{ permission: "Master" }, { permission: "Admin" }])(
await client.waitForTask(documentAdditionTask);

const response = await client.index(index.uid).searchSimilarDocuments({
embedder: "manual",
id: "143",
});

Expand Down
35 changes: 0 additions & 35 deletions tests/get_search.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -457,41 +457,6 @@ describe.each([
"The filter query parameter should be in string format when using searchGet",
);
});
test(`${permission} key: search with vectors`, async () => {
const client = await getClient(permission);
const adminClient = await getClient("Admin");
const adminKey = await getKey("Admin");

await fetch(`${HOST}/experimental-features`, {
body: JSON.stringify({ vectorStore: true }),
headers: {
Authorization: `Bearer ${adminKey}`,
"Content-Type": "application/json",
},
method: "PATCH",
});

const { taskUid } = await adminClient
.index(emptyIndex.uid)
.updateEmbedders({
default: {
source: "userProvided",
dimensions: 1,
},
});
await adminClient.waitForTask(taskUid);

const response = await client
.index(emptyIndex.uid)
.searchGet("", { vector: [1], hybridSemanticRatio: 1.0 });

expect(response).toHaveProperty("hits");
expect(response).toHaveProperty("semanticHitCount");
// Those fields are no longer returned by the search response
// We want to ensure that they don't appear in it anymore
expect(response).not.toHaveProperty("vector");
expect(response).not.toHaveProperty("_semanticScore");
});

test(`${permission} key: search without vectors`, async () => {
const client = await getClient(permission);
Expand Down
39 changes: 0 additions & 39 deletions tests/search.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -936,45 +936,6 @@ describe.each([
expect(response.hits.length).toEqual(0);
});

test(`${permission} key: search with vectors`, async () => {
const client = await getClient(permission);
const adminClient = await getClient("Admin");
const adminKey = await getKey("Admin");

await fetch(`${HOST}/experimental-features`, {
body: JSON.stringify({ vectorStore: true }),
headers: {
Authorization: `Bearer ${adminKey}`,
"Content-Type": "application/json",
},
method: "PATCH",
});

const { taskUid } = await adminClient
.index(emptyIndex.uid)
.updateEmbedders({
default: {
source: "userProvided",
dimensions: 1,
},
});
await adminClient.waitForTask(taskUid);

const response = await client.index(emptyIndex.uid).search("", {
vector: [1],
hybrid: {
semanticRatio: 1.0,
},
});

expect(response).toHaveProperty("hits");
expect(response).toHaveProperty("semanticHitCount");
// Those fields are no longer returned by the search response
// We want to ensure that they don't appear in it anymore
expect(response).not.toHaveProperty("vector");
expect(response).not.toHaveProperty("_semanticScore");
});

test(`${permission} key: search without vectors`, async () => {
const client = await getClient(permission);
const response = await client.index(index.uid).search("prince", {});
Expand Down