-
Notifications
You must be signed in to change notification settings - Fork 294
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Azure AI Search hybrid search support (#428)
## Motivation and Context (Why the change? What's the scenario?) Already described in issue #159 The main idea is to support Azure AI search Hybrid search. ## High level description (Approach, Design) The idea is to have a new Config property in the AzureAISearchConfig class, so Hybrid is only enabled explicitly. When enabled, the CosineSimilarity is not calculated and the minDistance is set to the minRelevance parameter (passed from the top SearchAsync method). --------- Co-authored-by: “luismanez” <“[email protected]”> Co-authored-by: Devis Lucato <[email protected]>
- Loading branch information
1 parent
713cbb2
commit c631a64
Showing
11 changed files
with
247 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
15 changes: 15 additions & 0 deletions
15
examples/111-dotnet-azure-ai-hybrid-search/111-dotnet-azure-ai-hybrid-search.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
<Project Sdk="Microsoft.NET.Sdk.Web"> | ||
|
||
<PropertyGroup> | ||
<TargetFramework>net8.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
<ManagePackageVersionsCentrally>false</ManagePackageVersionsCentrally> | ||
<NoWarn>$(NoWarn);CA1050;CA2000;CA1707;CA1303;CA2007;CA1724;CA1861;CA1859;</NoWarn> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<ProjectReference Include="..\..\service\Core\Core.csproj" /> | ||
</ItemGroup> | ||
|
||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
// ReSharper disable InconsistentNaming | ||
|
||
using Microsoft.KernelMemory; | ||
using Microsoft.KernelMemory.AI.OpenAI; | ||
|
||
public static class Program | ||
{ | ||
private const string indexName = "acronyms"; | ||
|
||
public static async Task Main() | ||
{ | ||
var azureOpenAITextConfig = new AzureOpenAIConfig(); | ||
var azureOpenAIEmbeddingConfig = new AzureOpenAIConfig(); | ||
var azureAISearchConfigWithHybridSearch = new AzureAISearchConfig(); | ||
var azureAISearchConfigWithoutHybridSearch = new AzureAISearchConfig(); | ||
|
||
new ConfigurationBuilder() | ||
.AddJsonFile("appsettings.json") | ||
.AddJsonFile("appsettings.Development.json", optional: true) | ||
.Build() | ||
.BindSection("KernelMemory:Services:AzureOpenAIText", azureOpenAITextConfig) | ||
.BindSection("KernelMemory:Services:AzureOpenAIEmbedding", azureOpenAIEmbeddingConfig) | ||
.BindSection("KernelMemory:Services:AzureAISearch", azureAISearchConfigWithHybridSearch) | ||
.BindSection("KernelMemory:Services:AzureAISearch", azureAISearchConfigWithoutHybridSearch); | ||
|
||
azureAISearchConfigWithHybridSearch.UseHybridSearch = true; | ||
azureAISearchConfigWithoutHybridSearch.UseHybridSearch = false; | ||
|
||
var memoryNoHybridSearch = new KernelMemoryBuilder() | ||
.WithAzureOpenAITextGeneration(azureOpenAITextConfig, new DefaultGPTTokenizer()) | ||
.WithAzureOpenAITextEmbeddingGeneration(azureOpenAIEmbeddingConfig, new DefaultGPTTokenizer()) | ||
.WithAzureAISearchMemoryDb(azureAISearchConfigWithoutHybridSearch) | ||
.WithSearchClientConfig(new SearchClientConfig { MaxMatchesCount = 2, Temperature = 0, TopP = 0 }) | ||
.Build<MemoryServerless>(); | ||
|
||
var memoryWithHybridSearch = new KernelMemoryBuilder() | ||
.WithAzureOpenAITextGeneration(azureOpenAITextConfig, new DefaultGPTTokenizer()) | ||
.WithAzureOpenAITextEmbeddingGeneration(azureOpenAIEmbeddingConfig, new DefaultGPTTokenizer()) | ||
.WithAzureAISearchMemoryDb(azureAISearchConfigWithHybridSearch) | ||
.WithSearchClientConfig(new SearchClientConfig { MaxMatchesCount = 2, Temperature = 0, TopP = 0 }) | ||
.Build<MemoryServerless>(); | ||
|
||
await CreateIndexAndImportData(memoryWithHybridSearch); | ||
|
||
const string question = "abc"; | ||
|
||
Console.WriteLine("Answer without hybrid search:"); | ||
await AskQuestion(memoryNoHybridSearch, question); | ||
// Output: INFO NOT FOUND | ||
|
||
Console.WriteLine("Answer using hybrid search:"); | ||
await AskQuestion(memoryWithHybridSearch, question); | ||
// Output: 'Aliens Brewing Coffee' | ||
} | ||
|
||
private static async Task AskQuestion(IKernelMemory memory, string question) | ||
{ | ||
var answer = await memory.AskAsync(question, index: indexName); | ||
Console.WriteLine(answer.Result); | ||
} | ||
|
||
private static async Task CreateIndexAndImportData(IKernelMemory memory) | ||
{ | ||
await memory.DeleteIndexAsync(indexName); | ||
|
||
var data = """ | ||
aaa bbb ccc 000000000 | ||
C B A ....... | ||
ai bee cee Something else | ||
XY. abc means 'Aliens Brewing Coffee' | ||
abeec abecedario | ||
A B C D first 4 letters | ||
"""; | ||
|
||
var rows = data.Split("\n"); | ||
foreach (var acronym in rows) | ||
{ | ||
await memory.ImportTextAsync(acronym, index: indexName); | ||
} | ||
} | ||
} |
77 changes: 77 additions & 0 deletions
77
examples/111-dotnet-azure-ai-hybrid-search/appsettings.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
{ | ||
"Logging": { | ||
"LogLevel": { | ||
"Default": "Warning", | ||
// Examples: how to handle logs differently by class | ||
// "Microsoft.KernelMemory.Handlers.TextExtractionHandler": "Information", | ||
// "Microsoft.KernelMemory.Handlers.TextPartitioningHandler": "Information", | ||
// "Microsoft.KernelMemory.Handlers.GenerateEmbeddingsHandler": "Information", | ||
// "Microsoft.KernelMemory.Handlers.SaveEmbeddingsHandler": "Information", | ||
// "Microsoft.KernelMemory.ContentStorage.AzureBlobs": "Information", | ||
// "Microsoft.KernelMemory.Pipeline.Queue.AzureQueues": "Information", | ||
"Microsoft.AspNetCore": "Warning" | ||
} | ||
}, | ||
"KernelMemory": { | ||
"Services": { | ||
"AzureAISearch": { | ||
// "ApiKey" or "AzureIdentity". For other options see <AzureAISearchConfig>. | ||
// AzureIdentity: use automatic AAD authentication mechanism. You can test locally | ||
// using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET. | ||
"Auth": "AzureIdentity", | ||
"Endpoint": "https://<...>", | ||
"APIKey": "" | ||
}, | ||
"AzureOpenAIText": { | ||
// "ApiKey" or "AzureIdentity" | ||
// AzureIdentity: use automatic AAD authentication mechanism. You can test locally | ||
// using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET. | ||
"Auth": "AzureIdentity", | ||
"Endpoint": "https://<...>.openai.azure.com/", | ||
"APIKey": "", | ||
"Deployment": "", | ||
// The max number of tokens supported by model deployed | ||
// See https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models | ||
"MaxTokenTotal": 16384, | ||
// "ChatCompletion" or "TextCompletion" | ||
"APIType": "ChatCompletion", | ||
"MaxRetries": 10 | ||
}, | ||
"AzureOpenAIEmbedding": { | ||
// "ApiKey" or "AzureIdentity" | ||
// AzureIdentity: use automatic AAD authentication mechanism. You can test locally | ||
// using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET. | ||
"Auth": "AzureIdentity", | ||
"Endpoint": "https://<...>.openai.azure.com/", | ||
"APIKey": "", | ||
"Deployment": "", | ||
// The max number of tokens supported by model deployed | ||
// See https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models | ||
"MaxTokenTotal": 8191 | ||
}, | ||
"OpenAI": { | ||
// Name of the model used to generate text (text completion or chat completion) | ||
"TextModel": "gpt-3.5-turbo-16k", | ||
// The max number of tokens supported by the text model. | ||
"TextModelMaxTokenTotal": 16384, | ||
// What type of text generation, by default autodetect using the model name. | ||
// Possible values: "Auto", "TextCompletion", "Chat" | ||
"TextGenerationType": "Auto", | ||
// Name of the model used to generate text embeddings | ||
"EmbeddingModel": "text-embedding-ada-002", | ||
// The max number of tokens supported by the embedding model | ||
// See https://platform.openai.com/docs/guides/embeddings/what-are-embeddings | ||
"EmbeddingModelMaxTokenTotal": 8191, | ||
// OpenAI API Key | ||
"APIKey": "", | ||
// OpenAI Organization ID (usually empty, unless you have multiple accounts on different orgs) | ||
"OrgId": "", | ||
// Endpoint to use. By default the system uses 'https://api.openai.com/v1'. | ||
// Change this to use proxies or services compatible with OpenAI HTTP protocol like LM Studio. | ||
"Endpoint": "", | ||
// How many times to retry in case of throttling | ||
"MaxRetries": 10 | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using System.Collections.Generic; | ||
|
||
namespace Microsoft.KernelMemory.InteractiveSetup.UI; | ||
|
||
internal static class DictionaryExtensions | ||
{ | ||
public static string TryGet(this Dictionary<string, object> data, string key) | ||
{ | ||
return data.TryGetValue(key, out object? value) ? value.ToString() ?? string.Empty : string.Empty; | ||
} | ||
|
||
public static string TryGetOr(this Dictionary<string, object> data, string key, string fallbackValue) | ||
{ | ||
return data.TryGetValue(key, out object? value) ? value.ToString() ?? string.Empty : fallbackValue; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters