diff --git a/Directory.Build.props b/Directory.Build.props index f44861a1e..443c9ccfd 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -2,7 +2,7 @@ - 0.71.0 + 0.72.0 12 diff --git a/Directory.Packages.props b/Directory.Packages.props index 97b15ba98..95feb4712 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -39,6 +39,7 @@ + diff --git a/KernelMemory.sln b/KernelMemory.sln index 7e07e78e8..875b417d2 100644 --- a/KernelMemory.sln +++ b/KernelMemory.sln @@ -317,6 +317,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "how-to", "how-to", "{6B992E EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "211-dotnet-WebClient-Intent-Detection", "examples\211-dotnet-WebClient-Intent-Detection\211-dotnet-WebClient-Intent-Detection.csproj", "{84AEC1DD-CBAE-400A-949C-91BA373C587D}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "212-dotnet-ollama", "examples\212-dotnet-ollama\212-dotnet-ollama.csproj", "{B303885D-F64F-4EEB-B085-0014E863AF61}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Ollama", "extensions\Ollama\Ollama\Ollama.csproj", "{F192513B-265B-4943-A2A9-44E23B15BA18}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -578,6 +582,13 @@ Global {84AEC1DD-CBAE-400A-949C-91BA373C587D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {84AEC1DD-CBAE-400A-949C-91BA373C587D}.Debug|Any CPU.Build.0 = Debug|Any CPU {84AEC1DD-CBAE-400A-949C-91BA373C587D}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B303885D-F64F-4EEB-B085-0014E863AF61}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B303885D-F64F-4EEB-B085-0014E863AF61}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B303885D-F64F-4EEB-B085-0014E863AF61}.Release|Any CPU.ActiveCfg = Release|Any CPU + {F192513B-265B-4943-A2A9-44E23B15BA18}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {F192513B-265B-4943-A2A9-44E23B15BA18}.Debug|Any CPU.Build.0 = Debug|Any CPU + {F192513B-265B-4943-A2A9-44E23B15BA18}.Release|Any CPU.ActiveCfg = Release|Any CPU + {F192513B-265B-4943-A2A9-44E23B15BA18}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -670,6 +681,8 @@ Global {795CD089-05A9-4800-B6FF-3243CAD7D41B} = {7BA7F1B2-19E2-46EB-B000-513EE2F65769} {6B992EFC-81B0-4E52-925F-41420BDC40B6} = {7BA7F1B2-19E2-46EB-B000-513EE2F65769} {84AEC1DD-CBAE-400A-949C-91BA373C587D} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841} + {B303885D-F64F-4EEB-B085-0014E863AF61} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841} + {F192513B-265B-4943-A2A9-44E23B15BA18} = {155DA079-E267-49AF-973A-D1D44681970F} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {CC136C62-115C-41D1-B414-F9473EFF6EA8} diff --git a/README.md b/README.md index 56d0e4779..eb43f499f 100644 --- a/README.md +++ b/README.md @@ -167,26 +167,26 @@ storage engines (known as "connectors") varies across languages. Here's comparison table: -| Feature | Kernel Memory | Semantic Memory | -| --------------------------------------- |------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| ------------------------------------------------------------------------------------------------------------ | -| Data formats | Web pages, PDF, Images, Word, PowerPoint, Excel, Markdown, Text, JSON, HTML | Text only | -| Search | Cosine similarity, Hybrid search with filters (AND/OR conditions) | Cosine similarity | -| Language support | Any language, command line tools, browser extensions, low-code/no-code apps, chatbots, assistants, etc. | C#, Python, Java | +| Feature | Kernel Memory | Semantic Memory | +| --------------------------------------- |-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| ------------------------------------------------------------------------------------------------------------ | +| Data formats | Web pages, PDF, Images, Word, PowerPoint, Excel, Markdown, Text, JSON, HTML | Text only | +| Search | Cosine similarity, Hybrid search with filters (AND/OR conditions) | Cosine similarity | +| Language support | Any language, command line tools, browser extensions, low-code/no-code apps, chatbots, assistants, etc. | C#, Python, Java | | Storage engines | [Azure AI Search](https://azure.microsoft.com/products/ai-services/ai-search), [Elasticsearch](https://www.nuget.org/packages/FreeMindLabs.KernelMemory.Elasticsearch), [MongoDB Atlas](https://www.mongodb.com/atlas/database), [Postgres+pgvector](https://github.com/microsoft/kernel-memory/extensions/postgres), [Qdrant](https://qdrant.tech), [Redis](https://redis.io), [SQL Server](https://www.nuget.org/packages/Microsoft.KernelMemory.MemoryDb.SQLServer/), In memory KNN, On disk KNN. | Azure AI Search, Chroma, DuckDB, Kusto, Milvus, MongoDB, Pinecone, Postgres, Qdrant, Redis, SQLite, Weaviate | -| File storage | Disk, [Azure Blobs](https://learn.microsoft.com/azure/storage/blobs/storage-blobs-introduction), [AWS S3](https://aws.amazon.com/s3), [MongoDB Atlas](https://www.mongodb.com/atlas/database), In memory (volatile) | - | -| RAG | Yes, with sources lookup | - | -| Summarization | Yes | - | -| OCR | Yes via [Azure Document Intelligence](https://azure.microsoft.com/products/ai-services/ai-document-intelligence) | - | -| Security Filters | Yes | - | -| Large document ingestion | Yes, including async processing using queues ([Azure Queues](https://learn.microsoft.com/azure/storage/queues/storage-queues-introduction), [RabbitMQ](https://www.rabbitmq.com), File based or In memory queues) | - | -| Document storage | Yes | - | -| Custom storage schema | some DBs | - | -| Vector DBs with internal embedding | Yes | - | -| Concurrent write to multiple vector DBs | Yes | - | -| LLMs | [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/concepts/models), [OpenAI](https://platform.openai.com/docs/models), [Anthropic](https://www.anthropic.com), [LLamaSharp](https://github.com/SciSharp/LLamaSharp) via [llama.cpp](https://github.com/ggerganov/llama.cpp), [LM Studio](https://lmstudio.ai/), Semantic Kernel connectors | Azure OpenAI, OpenAI, Gemini, Hugging Face, ONNX, custom ones, etc. | -| LLMs with dedicated tokenization | Yes | No | -| Cloud deployment | Yes | - | -| Web service with OpenAPI | Yes | - | +| File storage | Disk, [Azure Blobs](https://learn.microsoft.com/azure/storage/blobs/storage-blobs-introduction), [AWS S3](https://aws.amazon.com/s3), [MongoDB Atlas](https://www.mongodb.com/atlas/database), In memory (volatile) | - | +| RAG | Yes, with sources lookup | - | +| Summarization | Yes | - | +| OCR | Yes via [Azure Document Intelligence](https://azure.microsoft.com/products/ai-services/ai-document-intelligence) | - | +| Security Filters | Yes | - | +| Large document ingestion | Yes, including async processing using queues ([Azure Queues](https://learn.microsoft.com/azure/storage/queues/storage-queues-introduction), [RabbitMQ](https://www.rabbitmq.com), File based or In memory queues) | - | +| Document storage | Yes | - | +| Custom storage schema | some DBs | - | +| Vector DBs with internal embedding | Yes | - | +| Concurrent write to multiple vector DBs | Yes | - | +| LLMs | [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/concepts/models), [OpenAI](https://platform.openai.com/docs/models), [Anthropic](https://www.anthropic.com), [Ollama](https://ollama.com), [LLamaSharp](https://github.com/SciSharp/LLamaSharp), [LM Studio](https://lmstudio.ai), Semantic Kernel connectors | Azure OpenAI, OpenAI, Gemini, Hugging Face, ONNX, custom ones, etc. | +| LLMs with dedicated tokenization | Yes | No | +| Cloud deployment | Yes | - | +| Web service with OpenAPI | Yes | - | ## Quick test using the Docker image @@ -303,30 +303,35 @@ running the service locally with OpenAPI enabled. 2. [Using Kernel Memory web service to upload documents and answer questions](examples/001-dotnet-WebClient) 3. [Importing files and asking question without running the service (serverless mode)](examples/002-dotnet-Serverless) 4. [Using KM Plugin for Semantic Kernel](examples/003-dotnet-SemanticKernel-plugin) -5. [Processing files with custom logic (custom handlers) in serverless mode](examples/004-dotnet-serverless-custom-pipeline) -6. [Processing files with custom logic (custom handlers) in asynchronous mode](examples/005-dotnet-AsyncMemoryCustomPipeline) -7. [Upload files and ask questions from command line using curl](examples/006-curl-calling-webservice) -8. [Customizing RAG and summarization prompts](examples/101-dotnet-custom-Prompts) -9. [Custom partitioning/text chunking options](examples/102-dotnet-custom-partitioning-options) -10. [Using a custom embedding/vector generator](examples/103-dotnet-custom-EmbeddingGenerator) -11. [Using custom LLMs](examples/104-dotnet-custom-LLM) -12. [Using LLama](examples/105-dotnet-serverless-llamasharp) -13. [Summarizing documents, using synthetic memories](examples/106-dotnet-retrieve-synthetics) -14. [Using Semantic Kernel LLM connectors](examples/107-dotnet-SemanticKernel-TextCompletion) -15. [Using custom content decoders](examples/108-dotnet-custom-content-decoders) -16. [Using a custom web scraper to fetch web pages](examples/109-dotnet-custom-webscraper) -17. [Generating answers with Anthropic LLMs](examples/110-dotnet-anthropic) -18. [Hybrid Search with Azure AI Search](examples/111-dotnet-azure-ai-hybrid-search) -19. [Writing and using a custom ingestion handler](examples/201-dotnet-serverless-custom-handler) -20. [Running a single asynchronous pipeline handler as a standalone service](examples/202-dotnet-custom-handler-as-a-service) -21. [Test project using KM package from nuget.org](examples/203-dotnet-using-core-nuget) -22. [Integrating Memory with ASP.NET applications and controllers](examples/204-dotnet-ASP.NET-MVC-integration) -23. [Sample code showing how to extract text from files](examples/205-dotnet-extract-text-from-docs) -24. [.NET configuration and logging](examples/206-dotnet-configuration-and-logging) -25. [Expanding chunks retrieving adjacent partitions](examples/207-dotnet-expanding-chunks-on-retrieval) -26. [Using local models via LM Studio](examples/208-dotnet-lmstudio) -27. [Using Context Parameters to customize RAG prompt during a request](examples/209-dotnet-using-context-overrides) -28. [Creating a Memory instance without KernelMemoryBuilder](examples/210-KM-without-builder) +5. Customizations + * [Processing files with custom logic (custom handlers) in serverless mode](examples/004-dotnet-serverless-custom-pipeline) + * [Processing files with custom logic (custom handlers) in asynchronous mode](examples/005-dotnet-AsyncMemoryCustomPipeline) + * [Customizing RAG and summarization prompts](examples/101-dotnet-custom-Prompts) + * [Custom partitioning/text chunking options](examples/102-dotnet-custom-partitioning-options) + * [Using a custom embedding/vector generator](examples/103-dotnet-custom-EmbeddingGenerator) + * [Using custom content decoders](examples/108-dotnet-custom-content-decoders) + * [Using a custom web scraper to fetch web pages](examples/109-dotnet-custom-webscraper) + * [Writing and using a custom ingestion handler](examples/201-dotnet-serverless-custom-handler) + * [Using Context Parameters to customize RAG prompt during a request](examples/209-dotnet-using-context-overrides) +6. Local models and external connectors + * [Using custom LLMs](examples/104-dotnet-custom-LLM) + * [Using local LLMs with Ollama](212-dotnet-ollama) + * [Using local LLMs with llama.cpp via LlamaSharp](examples/105-dotnet-serverless-llamasharp) + * [Using local models with LM Studio](examples/208-dotnet-lmstudio) + * [Using Semantic Kernel LLM connectors](examples/107-dotnet-SemanticKernel-TextCompletion) + * [Generating answers with Anthropic LLMs](examples/110-dotnet-anthropic) +7. [Upload files and ask questions from command line using curl](examples/006-curl-calling-webservice) +8. [Summarizing documents, using synthetic memories](examples/106-dotnet-retrieve-synthetics) +9. [Hybrid Search with Azure AI Search](examples/111-dotnet-azure-ai-hybrid-search) +10. [Running a single asynchronous pipeline handler as a standalone service](examples/202-dotnet-custom-handler-as-a-service) +11. [Integrating Memory with ASP.NET applications and controllers](examples/204-dotnet-ASP.NET-MVC-integration) +12. [Sample code showing how to extract text from files](examples/205-dotnet-extract-text-from-docs) +13. [.NET configuration and logging](examples/206-dotnet-configuration-and-logging) +14. [Expanding chunks retrieving adjacent partitions](examples/207-dotnet-expanding-chunks-on-retrieval) +15. [Creating a Memory instance without KernelMemoryBuilder](examples/210-KM-without-builder) +16. [Intent Detection](examples/211-dotnet-WebClient-Intent-Detection) +17. [Fetching data from Discord](examples/301-discord-test-application) +18. [Test project using KM package from nuget.org](examples/203-dotnet-using-core-nuget) ## Tools diff --git a/docs/index.md b/docs/index.md index c1f2be10b..7bc5dc6f8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -112,7 +112,7 @@ Here's comparison table: | Custom storage schema | some DBs | - | | Vector DBs with internal embedding | Yes | - | | Concurrent write to multiple vector DBs | Yes | - | -| LLMs | [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/concepts/models), [OpenAI](https://platform.openai.com/docs/models), [Anthropic](https://www.anthropic.com), [LLamaSharp](https://github.com/SciSharp/LLamaSharp) via [llama.cpp](https://github.com/ggerganov/llama.cpp), [LM Studio](https://lmstudio.ai/), Semantic Kernel connectors | Azure OpenAI, OpenAI, Gemini, Hugging Face, ONNX, custom ones, etc. | +| LLMs | [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/concepts/models), [OpenAI](https://platform.openai.com/docs/models), [Anthropic](https://www.anthropic.com), [Ollama](https://ollama.com), [LLamaSharp](https://github.com/SciSharp/LLamaSharp), [LM Studio](https://lmstudio.ai), Semantic Kernel connectors | Azure OpenAI, OpenAI, Gemini, Hugging Face, ONNX, custom ones, etc. | | LLMs with dedicated tokenization | Yes | No | | Cloud deployment | Yes | - | | Web service with OpenAPI | Yes | - | diff --git a/docs/quickstart.md b/docs/quickstart.md index d96ac1e79..776f0026c 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -18,8 +18,8 @@ we will set up the service and demonstrate how to use the Memory API from Python * [.NET 6](https://dotnet.microsoft.com/download) or higher * Either an [OpenAI API Key](https://platform.openai.com/api-keys) or [Azure OpenAI deployment](https://azure.microsoft.com/products/ai-services/openai-service). If you are familiar - with llama.cpp or LLamaSharp you can also use a LLama model. However, this may result in slower AI code execution, - depending on your device. + with [Ollama](https://ollama.com) you can also use a local model such as [Microsoft phi3](https://azure.microsoft.com/products/phi-3) and [Meta LLama](https://llama.meta.com). + However, this may result in slower AI code execution, depending on your device. * A vector database, such as Azure AI Search, Qdrant, or Postgres+pgvector. For basic tests, you can use KM SimpleVectorDb. * A copy of the [KM repository](https://github.com/microsoft/kernel-memory). diff --git a/examples/001-dotnet-WebClient/Program.cs b/examples/001-dotnet-WebClient/Program.cs index 437dee4e4..3ad5f8ad7 100644 --- a/examples/001-dotnet-WebClient/Program.cs +++ b/examples/001-dotnet-WebClient/Program.cs @@ -253,7 +253,7 @@ private static async Task AskSimpleQuestion() var question = "What's E = m*c^2?"; Console.WriteLine($"Question: {question}"); - var answer = await s_memory.AskAsync(question, minRelevance: 0.76); + var answer = await s_memory.AskAsync(question, minRelevance: 0.66); Console.WriteLine($"\nAnswer: {answer.Result}"); Console.WriteLine("\n====================================\n"); @@ -278,7 +278,7 @@ private static async Task AskSimpleQuestionAndShowSources() var question = "What's Kernel Memory?"; Console.WriteLine($"Question: {question}"); - var answer = await s_memory.AskAsync(question, minRelevance: 0); + var answer = await s_memory.AskAsync(question, minRelevance: 0.5); Console.WriteLine($"\nAnswer: {answer.Result}\n\n Sources:\n"); // Show sources / citations diff --git a/examples/210-KM-without-builder/Program.cs b/examples/210-KM-without-builder/Program.cs index 6c1439617..b68a06f44 100644 --- a/examples/210-KM-without-builder/Program.cs +++ b/examples/210-KM-without-builder/Program.cs @@ -73,7 +73,7 @@ public static async Task Main() var promptProvider = new EmbeddedPromptProvider(); // AI dependencies - var tokenizer = new GPT4Tokenizer(); + var tokenizer = new GPT4oTokenizer(); var embeddingGeneratorHttpClient = new HttpClient(); var embeddingGenerator = new AzureOpenAITextEmbeddingGenerator(azureOpenAIEmbeddingConfig, tokenizer, loggerFactory, embeddingGeneratorHttpClient); var textGeneratorHttpClient = new HttpClient(); diff --git a/examples/212-dotnet-ollama/212-dotnet-ollama.csproj b/examples/212-dotnet-ollama/212-dotnet-ollama.csproj new file mode 100644 index 000000000..d90b61733 --- /dev/null +++ b/examples/212-dotnet-ollama/212-dotnet-ollama.csproj @@ -0,0 +1,12 @@ + + + + net8.0 + enable + + + + + + + diff --git a/examples/212-dotnet-ollama/Program.cs b/examples/212-dotnet-ollama/Program.cs new file mode 100644 index 000000000..271ef9eb1 --- /dev/null +++ b/examples/212-dotnet-ollama/Program.cs @@ -0,0 +1,67 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.KernelMemory; +using Microsoft.KernelMemory.AI.Ollama; +using Microsoft.KernelMemory.AI.OpenAI; +using Microsoft.KernelMemory.Diagnostics; + +/* This example shows how to use KM with Ollama + * + * 1. Install and launch Ollama. You should see an icon for the app running the background. + * + * 2. Download your preferred models, e.g. + * - ollama pull nomic-embed-text + * - ollama pull phi3:medium-128k + * + * 3. Run the code below + * + * 4. Other things + * Run "ollama show phi3:medium-128k" to see model's properties + * Run "ollama list" to see the list of models you have on your system + * Run "ollama serve" if you prefer running Ollama from the command line + */ +public static class Program +{ + public static async Task Main() + { + var logLevel = LogLevel.Warning; + SensitiveDataLogger.Enabled = false; + + var config = new OllamaConfig + { + Endpoint = "http://localhost:11434", + TextModel = new OllamaModelConfig("phi3:medium-128k", 131072), + EmbeddingModel = new OllamaModelConfig("nomic-embed-text", 2048) + }; + + var memory = new KernelMemoryBuilder() + .WithOllamaTextGeneration(config, new GPT4oTokenizer()) + .WithOllamaTextEmbeddingGeneration(config, new GPT4oTokenizer()) + .Configure(builder => builder.Services.AddLogging(l => + { + l.SetMinimumLevel(logLevel); + l.AddSimpleConsole(c => c.SingleLine = true); + })) + .Build(); + + // Import some text + await memory.ImportTextAsync("Today is October 32nd, 2476"); + + // Generate an answer - This uses OpenAI for embeddings and finding relevant data, and LM Studio to generate an answer + var answer = await memory.AskAsync("What's the current date (don't check for validity)?"); + Console.WriteLine(answer.Question); + Console.WriteLine(answer.Result); + + /* + + -- Output using phi3:medium-128k: + + What's the current date (don't check for validity)? + The given fact states that "Today is October 32nd, 2476." However, it appears to be an incorrect statement as + there are never more than 31 days in any month. If we consider this date without checking its validity and accept + the stated day of October as being 32, then the current date would be "October 32nd, 2476." However, it is important + to note that this date does not align with our calendar system. + + */ + } +} diff --git a/examples/README.md b/examples/README.md index c365608bd..d9874ca30 100644 --- a/examples/README.md +++ b/examples/README.md @@ -6,29 +6,32 @@ Some examples about how to use Kernel Memory. 2. [Using Kernel Memory web service to upload documents and answer questions](001-dotnet-WebClient) 3. [Importing files and asking question without running the service (serverless mode)](002-dotnet-Serverless) 4. [Using KM Plugin for Semantic Kernel](003-dotnet-SemanticKernel-plugin) -5. [Processing files with custom logic (custom handlers) in serverless mode](004-dotnet-serverless-custom-pipeline) -6. [Processing files with custom logic (custom handlers) in asynchronous mode](005-dotnet-AsyncMemoryCustomPipeline) +5. Customizations + * [Processing files with custom logic (custom handlers) in serverless mode](004-dotnet-serverless-custom-pipeline) + * [Processing files with custom logic (custom handlers) in asynchronous mode](005-dotnet-AsyncMemoryCustomPipeline) + * [Customizing RAG and summarization prompts](101-dotnet-custom-Prompts) + * [Custom partitioning/text chunking options](102-dotnet-custom-partitioning-options) + * [Using a custom embedding/vector generator](103-dotnet-custom-EmbeddingGenerator) + * [Using custom content decoders](108-dotnet-custom-content-decoders) + * [Using a custom web scraper to fetch web pages](109-dotnet-custom-webscraper) + * [Writing and using a custom ingestion handler](201-dotnet-serverless-custom-handler) + * [Using Context Parameters to customize RAG prompt during a request](209-dotnet-using-context-overrides) +6. Local models and external connectors + * [Using custom LLMs](104-dotnet-custom-LLM) + * [Using local LLMs with Ollama](212-dotnet-ollama) + * [Using local LLMs with llama.cpp via LlamaSharp](105-dotnet-serverless-llamasharp) + * [Using local models with LM Studio](208-dotnet-lmstudio) + * [Using Semantic Kernel LLM connectors](107-dotnet-SemanticKernel-TextCompletion) + * [Generating answers with Anthropic LLMs](110-dotnet-anthropic) 7. [Upload files and ask questions from command line using curl](006-curl-calling-webservice) -8. [Customizing RAG and summarization prompts](101-dotnet-custom-Prompts) -9. [Custom partitioning/text chunking options](102-dotnet-custom-partitioning-options) -10. [Using a custom embedding/vector generator](103-dotnet-custom-EmbeddingGenerator) -11. [Using custom LLMs](104-dotnet-custom-LLM) -12. [Using LLama](105-dotnet-serverless-llamasharp) -13. [Summarizing documents, using synthetic memories](106-dotnet-retrieve-synthetics) -14. [Using Semantic Kernel LLM connectors](107-dotnet-SemanticKernel-TextCompletion) -15. [Using custom content decoders](108-dotnet-custom-content-decoders) -16. [Using a custom web scraper to fetch web pages](109-dotnet-custom-webscraper) -17. [Generating answers with Anthropic LLMs](110-dotnet-anthropic) -18. [Hybrid Search with Azure AI Search](111-dotnet-azure-ai-hybrid-search) -19. [Writing and using a custom ingestion handler](201-dotnet-serverless-custom-handler) -20. [Running a single asynchronous pipeline handler as a standalone service](202-dotnet-custom-handler-as-a-service) -21. [Test project using KM package from nuget.org](203-dotnet-using-core-nuget) -22. [Integrating Memory with ASP.NET applications and controllers](204-dotnet-ASP.NET-MVC-integration) -23. [Sample code showing how to extract text from files](205-dotnet-extract-text-from-docs) -24. [.NET configuration and logging](206-dotnet-configuration-and-logging) -25. [Expanding chunks retrieving adjacent partitions](207-dotnet-expanding-chunks-on-retrieval) -26. [Using local models via LM Studio](208-dotnet-lmstudio) -27. [Using Context Parameters to customize RAG prompt during a request](209-dotnet-using-context-overrides) -28. [Creating a Memory instance without KernelMemoryBuilder](210-KM-without-builder) -29. [Intent Detection](211-dotnet-WebClient-Intent-Detection) -30. [Fetching data from Discord](301-discord-test-application) +8. [Summarizing documents, using synthetic memories](106-dotnet-retrieve-synthetics) +9. [Hybrid Search with Azure AI Search](111-dotnet-azure-ai-hybrid-search) +10. [Running a single asynchronous pipeline handler as a standalone service](202-dotnet-custom-handler-as-a-service) +11. [Integrating Memory with ASP.NET applications and controllers](204-dotnet-ASP.NET-MVC-integration) +12. [Sample code showing how to extract text from files](205-dotnet-extract-text-from-docs) +13. [.NET configuration and logging](206-dotnet-configuration-and-logging) +14. [Expanding chunks retrieving adjacent partitions](207-dotnet-expanding-chunks-on-retrieval) +15. [Creating a Memory instance without KernelMemoryBuilder](210-KM-without-builder) +16. [Intent Detection](211-dotnet-WebClient-Intent-Detection) +17. [Fetching data from Discord](301-discord-test-application) +18. [Test project using KM package from nuget.org](203-dotnet-using-core-nuget) diff --git a/extensions/Anthropic/AnthropicTextGeneration.cs b/extensions/Anthropic/AnthropicTextGeneration.cs index c257d2aec..4d8597b95 100644 --- a/extensions/Anthropic/AnthropicTextGeneration.cs +++ b/extensions/Anthropic/AnthropicTextGeneration.cs @@ -68,8 +68,8 @@ public AnthropicTextGeneration( { this._log.LogWarning( "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors", - nameof(GPT4Tokenizer)); - textTokenizer = new GPT4Tokenizer(); + nameof(GPT4oTokenizer)); + textTokenizer = new GPT4oTokenizer(); } this._textTokenizer = textTokenizer; diff --git a/extensions/AzureOpenAI/AzureOpenAITextEmbeddingGenerator.cs b/extensions/AzureOpenAI/AzureOpenAITextEmbeddingGenerator.cs index 4e1ca1e5c..b57fe2ac6 100644 --- a/extensions/AzureOpenAI/AzureOpenAITextEmbeddingGenerator.cs +++ b/extensions/AzureOpenAI/AzureOpenAITextEmbeddingGenerator.cs @@ -36,8 +36,8 @@ public AzureOpenAITextEmbeddingGenerator( { this._log.LogWarning( "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors", - nameof(GPT4Tokenizer)); - textTokenizer = new GPT4Tokenizer(); + nameof(GPT4oTokenizer)); + textTokenizer = new GPT4oTokenizer(); } this._textTokenizer = textTokenizer; diff --git a/extensions/AzureOpenAI/AzureOpenAITextGenerator.cs b/extensions/AzureOpenAI/AzureOpenAITextGenerator.cs index bd0a1b529..313b46e3d 100644 --- a/extensions/AzureOpenAI/AzureOpenAITextGenerator.cs +++ b/extensions/AzureOpenAI/AzureOpenAITextGenerator.cs @@ -39,8 +39,8 @@ public AzureOpenAITextGenerator( { this._log.LogWarning( "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors", - nameof(GPT4Tokenizer)); - textTokenizer = new GPT4Tokenizer(); + nameof(GPT4oTokenizer)); + textTokenizer = new GPT4oTokenizer(); } this._textTokenizer = textTokenizer; diff --git a/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs b/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs index b906ebc06..fcb4fa3d3 100644 --- a/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs +++ b/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs @@ -47,8 +47,8 @@ public LlamaSharpTextGenerator( { this._log.LogWarning( "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors", - nameof(GPT4Tokenizer)); - textTokenizer = new GPT4Tokenizer(); + nameof(GPT4oTokenizer)); + textTokenizer = new GPT4oTokenizer(); } this._textTokenizer = textTokenizer; diff --git a/extensions/Ollama/Ollama/DependencyInjection.cs b/extensions/Ollama/Ollama/DependencyInjection.cs new file mode 100644 index 000000000..618fc3497 --- /dev/null +++ b/extensions/Ollama/Ollama/DependencyInjection.cs @@ -0,0 +1,120 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using Microsoft.KernelMemory.AI; +using Microsoft.KernelMemory.AI.Ollama; +using OllamaSharp; + +#pragma warning disable IDE0130 // reduce number of "using" statements +// ReSharper disable once CheckNamespace - reduce number of "using" statements +namespace Microsoft.KernelMemory; + +/// +/// Kernel Memory builder extensions +/// +public static partial class KernelMemoryBuilderExtensions +{ + public static IKernelMemoryBuilder WithOllamaTextGeneration( + this IKernelMemoryBuilder builder, + OllamaConfig config, + ITextTokenizer? textTokenizer = null) + { + builder.Services.AddOllamaTextGeneration(config, textTokenizer); + return builder; + } + + public static IKernelMemoryBuilder WithOllamaTextGeneration( + this IKernelMemoryBuilder builder, + string modelName, + string endpoint = "http://localhost:11434", + ITextTokenizer? textTokenizer = null) + { + builder.Services.AddOllamaTextGeneration(modelName, endpoint, textTokenizer); + return builder; + } + + public static IKernelMemoryBuilder WithOllamaTextEmbeddingGeneration( + this IKernelMemoryBuilder builder, + OllamaConfig config, + ITextTokenizer? textTokenizer = null) + { + builder.Services.AddOllamaTextEmbeddingGeneration(config, textTokenizer); + return builder; + } + + public static IKernelMemoryBuilder WithOllamaTextEmbeddingGeneration( + this IKernelMemoryBuilder builder, + string modelName, + string endpoint = "http://localhost:11434", + ITextTokenizer? textTokenizer = null) + { + builder.Services.AddOllamaTextEmbeddingGeneration(modelName, endpoint, textTokenizer); + return builder; + } +} + +/// +/// .NET IServiceCollection dependency injection extensions. +/// +public static partial class DependencyInjection +{ + public static IServiceCollection AddOllamaTextGeneration( + this IServiceCollection services, + string modelName, + string endpoint = "http://localhost:11434", + ITextTokenizer? textTokenizer = null) + { + return services + .AddSingleton( + serviceProvider => new OllamaTextGenerator( + new OllamaApiClient(new Uri(endpoint), modelName), + new OllamaModelConfig { ModelName = modelName }, + textTokenizer, + serviceProvider.GetService())); + } + + public static IServiceCollection AddOllamaTextGeneration( + this IServiceCollection services, + OllamaConfig config, + ITextTokenizer? textTokenizer = null) + { + return services + .AddSingleton( + serviceProvider => new OllamaTextGenerator( + new OllamaApiClient(new Uri(config.Endpoint), config.TextModel.ModelName), + config.TextModel, + textTokenizer, + serviceProvider.GetService())); + } + + public static IServiceCollection AddOllamaTextEmbeddingGeneration( + this IServiceCollection services, + string modelName, + string endpoint = "http://localhost:11434", + ITextTokenizer? textTokenizer = null) + { + return services + .AddSingleton( + serviceProvider => new OllamaTextEmbeddingGenerator( + new OllamaApiClient(new Uri(endpoint), modelName), + new OllamaModelConfig { ModelName = modelName }, + textTokenizer, + serviceProvider.GetService())); + } + + public static IServiceCollection AddOllamaTextEmbeddingGeneration( + this IServiceCollection services, + OllamaConfig config, + ITextTokenizer? textTokenizer = null) + { + return services + .AddSingleton( + serviceProvider => new OllamaTextEmbeddingGenerator( + new OllamaApiClient(new Uri(config.Endpoint), config.EmbeddingModel.ModelName), + config.EmbeddingModel, + textTokenizer, + serviceProvider.GetService())); + } +} diff --git a/extensions/Ollama/Ollama/Ollama.csproj b/extensions/Ollama/Ollama/Ollama.csproj new file mode 100644 index 000000000..e26c65d47 --- /dev/null +++ b/extensions/Ollama/Ollama/Ollama.csproj @@ -0,0 +1,33 @@ + + + + net8.0 + LatestMajor + Microsoft.KernelMemory.AI.Ollama + Microsoft.KernelMemory.AI.Ollama + $(NoWarn);KMEXP00;KMEXP01;CA1724; + + + + true + Microsoft.KernelMemory.AI.Ollama + Ollama LLM connector for Kernel Memory + Provide access to Ollama LLM models in Kernel Memory to generate embeddings and text + Ollama, Memory, RAG, Kernel Memory, Semantic Memory, Episodic Memory, Declarative Memory, AI, Artificial Intelligence, Embeddings, Vector DB, Vector Search, Semantic Search, Memory DB, ETL + bin/$(Configuration)/$(TargetFramework)/$(AssemblyName).xml + + + + + + + + + + + + + + + + diff --git a/extensions/Ollama/Ollama/OllamaConfig.cs b/extensions/Ollama/Ollama/OllamaConfig.cs new file mode 100644 index 000000000..6afa24f0e --- /dev/null +++ b/extensions/Ollama/Ollama/OllamaConfig.cs @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Microsoft.KernelMemory.AI.Ollama; + +public class OllamaConfig +{ + /// + /// Ollama HTTP endpoint. + /// + public string Endpoint { get; set; } = "http://localhost:11434"; + + /// + /// Settings for the model used for text generation. Chat models can be used too. + /// + public OllamaModelConfig TextModel { get; set; } = new OllamaModelConfig(); + + /// + /// Settings for the model used for text embedding generation. + /// + public OllamaModelConfig EmbeddingModel { get; set; } = new OllamaModelConfig(); +} diff --git a/extensions/Ollama/Ollama/OllamaModelConfig.cs b/extensions/Ollama/Ollama/OllamaModelConfig.cs new file mode 100644 index 000000000..c1c3af561 --- /dev/null +++ b/extensions/Ollama/Ollama/OllamaModelConfig.cs @@ -0,0 +1,125 @@ +// Copyright (c) Microsoft. All rights reserved. + +namespace Microsoft.KernelMemory.AI.Ollama; + +public class OllamaModelConfig +{ + /// + /// Model used for text generation. Chat models can be used too. + /// + public string ModelName { get; set; } = string.Empty; + + /// + /// The max number of tokens supported by the model. + /// Default to 4096 for text and 8192 for embeddings. + /// + public int? MaxTokenTotal { get; set; } + + /// + /// Enable Mirostat sampling for controlling perplexity. + /// (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) + /// + public int? MiroStat { get; set; } + + /// + /// Influences how quickly the algorithm responds to feedback from the + /// generated text. A lower learning rate will result in slower adjustments, + /// while a higher learning rate will make the algorithm more responsive. + /// (Default: 0.1) + /// + public float? MiroStatEta { get; set; } + + /// + /// Controls the balance between coherence and diversity of the output. + /// A lower value will result in more focused and coherent text. + /// (Default: 5.0) + /// + public float? MiroStatTau { get; set; } + + /// + /// Sets the size of the context window used to generate the next token. + /// (Default: 2048) + /// + public int? NumCtx { get; set; } + + /// + /// The number of GQA groups in the transformer layer. Required for some + /// models, for example it is 8 for llama2:70b + /// + public int? NumGqa { get; set; } + + /// + /// The number of layers to send to the GPU(s). On macOS it defaults to + /// 1 to enable metal support, 0 to disable. + /// + public int? NumGpu { get; set; } + + /// + /// Sets the number of threads to use during computation. By default, + /// Ollama will detect this for optimal performance. + /// It is recommended to set this value to the number of physical CPU cores + /// your system has (as opposed to the logical number of cores). + /// + public int? NumThread { get; set; } + + /// + /// Sets how far back for the model to look back to prevent repetition. + /// (Default: 64, 0 = disabled, -1 = num_ctx) + /// + public int? RepeatLastN { get; set; } + + /// + /// Sets the random number seed to use for generation. + /// Setting this to a specific number will make the model generate the same + /// text for the same prompt. (Default: 0) + /// + public int? Seed { get; set; } + + /// + /// Tail free sampling is used to reduce the impact of less probable + /// tokens from the output. A higher value (e.g., 2.0) will reduce the + /// impact more, while a value of 1.0 disables this setting. (default: 1) + /// + public float? TfsZ { get; set; } + + /// + /// Maximum number of tokens to predict when generating text. + /// (Default: 128, -1 = infinite generation, -2 = fill context) + /// + public int? NumPredict { get; set; } + + /// + /// Reduces the probability of generating nonsense. A higher value + /// (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) + /// will be more conservative. (Default: 40) + /// + public int? TopK { get; set; } + + /// + /// Alternative to the top_p, and aims to ensure a balance of quality and variety.min_p represents the minimum + /// probability for a token to be considered, relative to the probability of the most likely token.For + /// example, with min_p=0.05 and the most likely token having a probability of 0.9, logits with a value less + /// than 0.05*0.9=0.045 are filtered out. (Default: 0.0) + /// + public float? MinP { get; set; } + + /// + /// How many requests can be processed in parallel + /// + public int MaxBatchSize { get; set; } = 1; + + public OllamaModelConfig() + { + } + + public OllamaModelConfig(string modelName) + { + this.ModelName = modelName; + } + + public OllamaModelConfig(string modelName, int maxToken) + { + this.ModelName = modelName; + this.MaxTokenTotal = maxToken; + } +} diff --git a/extensions/Ollama/Ollama/OllamaTextEmbeddingGenerator.cs b/extensions/Ollama/Ollama/OllamaTextEmbeddingGenerator.cs new file mode 100644 index 000000000..9719648bc --- /dev/null +++ b/extensions/Ollama/Ollama/OllamaTextEmbeddingGenerator.cs @@ -0,0 +1,139 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net.Http; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using Microsoft.KernelMemory.AI.OpenAI; +using Microsoft.KernelMemory.Diagnostics; +using OllamaSharp; +using OllamaSharp.Models; + +namespace Microsoft.KernelMemory.AI.Ollama; + +public class OllamaTextEmbeddingGenerator : ITextEmbeddingGenerator, ITextEmbeddingBatchGenerator +{ + private const int MaxTokensIfUndefined = 8192; + + private readonly IOllamaApiClient _client; + private readonly OllamaModelConfig _modelConfig; + private readonly ILogger _log; + private readonly ITextTokenizer _textTokenizer; + + public int MaxTokens { get; } + + public int MaxBatchSize { get; } + + public OllamaTextEmbeddingGenerator( + IOllamaApiClient ollamaClient, + OllamaModelConfig modelConfig, + ITextTokenizer? textTokenizer = null, + ILoggerFactory? loggerFactory = null) + { + this._client = ollamaClient; + this._modelConfig = modelConfig; + this.MaxBatchSize = modelConfig.MaxBatchSize; + this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger(); + + if (textTokenizer == null) + { + this._log.LogWarning( + "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors", + nameof(GPT4oTokenizer)); + textTokenizer = new GPT4oTokenizer(); + } + + this._textTokenizer = textTokenizer; + + this.MaxTokens = modelConfig.MaxTokenTotal ?? MaxTokensIfUndefined; + } + + public OllamaTextEmbeddingGenerator( + OllamaConfig config, + ITextTokenizer? textTokenizer = null, + ILoggerFactory? loggerFactory = null) + : this( + new OllamaApiClient(new Uri(config.Endpoint), config.EmbeddingModel.ModelName), + config.EmbeddingModel, + textTokenizer, + loggerFactory) + { + } + + public OllamaTextEmbeddingGenerator( + HttpClient httpClient, + OllamaConfig config, + ITextTokenizer? textTokenizer = null, + ILoggerFactory? loggerFactory = null) + : this( + new OllamaApiClient(httpClient, config.EmbeddingModel.ModelName), + config.EmbeddingModel, + textTokenizer, + loggerFactory) + { + } + + public int CountTokens(string text) + { + return this._textTokenizer.CountTokens(text); + } + + public IReadOnlyList GetTokens(string text) + { + return this._textTokenizer.GetTokens(text); + } + + public async Task GenerateEmbeddingAsync( + string text, + CancellationToken cancellationToken = default) + { + this._log.LogTrace("Generating embedding, text length {0} chars", text.Length); + + Embedding[] result = await this.GenerateEmbeddingBatchAsync([text], cancellationToken).ConfigureAwait(false); + var embeddding = result.First(); + this._log.LogTrace("Embedding ready, vector length {0}", embeddding.Length); + + return embeddding; + } + + public async Task GenerateEmbeddingBatchAsync( + IEnumerable textList, + CancellationToken cancellationToken = default) + { + var list = textList.ToList(); + this._log.LogTrace("Generating embeddings batch, size {0} texts", list.Count); + + var request = new EmbedRequest + { + Model = this._client.SelectedModel, + Input = list, + Options = new RequestOptions + { + // Global settings + MiroStat = this._modelConfig.MiroStat, + MiroStatEta = this._modelConfig.MiroStatEta, + MiroStatTau = this._modelConfig.MiroStatTau, + NumCtx = this._modelConfig.NumCtx, + NumGqa = this._modelConfig.NumGqa, + NumGpu = this._modelConfig.NumGpu, + NumThread = this._modelConfig.NumThread, + RepeatLastN = this._modelConfig.RepeatLastN, + Seed = this._modelConfig.Seed, + TfsZ = this._modelConfig.TfsZ, + NumPredict = this._modelConfig.NumPredict, + TopK = this._modelConfig.TopK, + MinP = this._modelConfig.MinP, + } + }; + + EmbedResponse response = await this._client.Embed(request, cancellationToken).ConfigureAwait(false); + Embedding[] result = response.Embeddings.Select(Embedding.FromDoubles).ToArray(); + + this._log.LogTrace("Embeddings batch ready, size {0} texts", result.Length); + + return result; + } +} diff --git a/extensions/Ollama/Ollama/OllamaTextGenerator.cs b/extensions/Ollama/Ollama/OllamaTextGenerator.cs new file mode 100644 index 000000000..c5bf02eb7 --- /dev/null +++ b/extensions/Ollama/Ollama/OllamaTextGenerator.cs @@ -0,0 +1,140 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Net.Http; +using System.Runtime.CompilerServices; +using System.Threading; +using Microsoft.Extensions.Logging; +using Microsoft.KernelMemory.AI.OpenAI; +using Microsoft.KernelMemory.Diagnostics; +using OllamaSharp; +using OllamaSharp.Models; + +namespace Microsoft.KernelMemory.AI.Ollama; + +public class OllamaTextGenerator : ITextGenerator +{ + private const int MaxTokensIfUndefined = 4096; + + private readonly IOllamaApiClient _client; + private readonly OllamaModelConfig _modelConfig; + private readonly ILogger _log; + private readonly ITextTokenizer _textTokenizer; + + public int MaxTokenTotal { get; } + + public OllamaTextGenerator( + IOllamaApiClient ollamaClient, + OllamaModelConfig modelConfig, + ITextTokenizer? textTokenizer = null, + ILoggerFactory? loggerFactory = null) + { + this._client = ollamaClient; + this._modelConfig = modelConfig; + this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger(); + + if (textTokenizer == null) + { + this._log.LogWarning( + "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors", + nameof(GPT4oTokenizer)); + textTokenizer = new GPT4oTokenizer(); + } + + this._textTokenizer = textTokenizer; + + this.MaxTokenTotal = modelConfig.MaxTokenTotal ?? MaxTokensIfUndefined; + } + + public OllamaTextGenerator( + OllamaConfig config, + ITextTokenizer? textTokenizer = null, + ILoggerFactory? loggerFactory = null) + : this( + new OllamaApiClient(new Uri(config.Endpoint), config.TextModel.ModelName), + config.TextModel, + textTokenizer, + loggerFactory) + { + } + + public OllamaTextGenerator( + HttpClient httpClient, + OllamaConfig config, + ITextTokenizer? textTokenizer = null, + ILoggerFactory? loggerFactory = null) + : this( + new OllamaApiClient(httpClient, config.TextModel.ModelName), + config.TextModel, + textTokenizer, + loggerFactory) + { + } + + public int CountTokens(string text) + { + return this._textTokenizer.CountTokens(text); + } + + public IReadOnlyList GetTokens(string text) + { + return this._textTokenizer.GetTokens(text); + } + + public async IAsyncEnumerable GenerateTextAsync( + string prompt, + TextGenerationOptions options, + [EnumeratorCancellation] CancellationToken cancellationToken = default) + { + var request = new GenerateRequest + { + Model = this._client.SelectedModel, + Prompt = prompt, + Stream = true, + Options = new RequestOptions + { + // Use case specific + Temperature = (float)options.Temperature, + TopP = (float)options.NucleusSampling, + RepeatPenalty = (float)options.FrequencyPenalty, + + // Global settings + MiroStat = this._modelConfig.MiroStat, + MiroStatEta = this._modelConfig.MiroStatEta, + MiroStatTau = this._modelConfig.MiroStatTau, + NumCtx = this._modelConfig.NumCtx, + NumGqa = this._modelConfig.NumGqa, + NumGpu = this._modelConfig.NumGpu, + NumThread = this._modelConfig.NumThread, + RepeatLastN = this._modelConfig.RepeatLastN, + Seed = this._modelConfig.Seed, + TfsZ = this._modelConfig.TfsZ, + NumPredict = this._modelConfig.NumPredict, + TopK = this._modelConfig.TopK, + MinP = this._modelConfig.MinP, + } + }; + + if (options.StopSequences is { Count: > 0 }) + { + var stop = new List(); + foreach (var s in options.StopSequences) { stop.Add(s); } + + request.Options.Stop = stop.ToArray(); + } + + // IAsyncEnumerable stream = this._client.Generate(request, cancellationToken); + // await foreach (GenerateResponseStream? token in stream) + // { + // if (token != null) { yield return token.Response; } + // } + + var chat = new Chat(this._client); + IAsyncEnumerable stream = chat.Send(prompt, cancellationToken); + await foreach (string? token in stream) + { + if (token != null) { yield return token; } + } + } +} diff --git a/extensions/Ollama/README.md b/extensions/Ollama/README.md new file mode 100644 index 000000000..d678c047e --- /dev/null +++ b/extensions/Ollama/README.md @@ -0,0 +1,29 @@ +# Kernel Memory with Ollama + +[![Nuget package](https://img.shields.io/nuget/v/Microsoft.KernelMemory.AI.Ollama)](https://www.nuget.org/packages/Microsoft.KernelMemory.AI.Ollama/) +[![Discord](https://img.shields.io/discord/1063152441819942922?label=Discord&logo=discord&logoColor=white&color=d82679)](https://aka.ms/KMdiscord) + +This project contains the +[Ollama](https://ollama.com) +LLM connector to access to LLM models via Ollama service to generate text and +text embeddings. + +Sample code: + +```csharp +var config = new OllamaConfig +{ + Endpoint = "http://localhost:11434", + TextModel = new OllamaModelConfig("phi3:medium-128k", 131072), + EmbeddingModel = new OllamaModelConfig("nomic-embed-text", 2048) +}; + +var memory = new KernelMemoryBuilder() + .WithOllamaTextGeneration(config) + .WithOllamaTextEmbeddingGeneration(config) + .Build(); + +await memory.ImportTextAsync("Today is October 32nd, 2476"); + +var answer = await memory.AskAsync("What's the current date (don't check for validity)?"); +``` diff --git a/extensions/OpenAI/OpenAI.UnitTests/GPTTokenizersTests.cs b/extensions/OpenAI/OpenAI.UnitTests/GPTTokenizersTests.cs index c1c01f0b5..0d93d1336 100644 --- a/extensions/OpenAI/OpenAI.UnitTests/GPTTokenizersTests.cs +++ b/extensions/OpenAI/OpenAI.UnitTests/GPTTokenizersTests.cs @@ -26,6 +26,10 @@ public void CanTokenize() var gpt4 = new GPT4Tokenizer(); tokens = gpt4.GetTokens(helloWorld); Assert.Equal(["hello", " world"], tokens); + + var gpt4o = new GPT4oTokenizer(); + tokens = gpt4o.GetTokens(helloWorld); + Assert.Equal(["hello", " world"], tokens); } [Fact] @@ -38,5 +42,6 @@ public void TheyCountTokens() Assert.Equal(29, new GPT2Tokenizer().CountTokens(text)); Assert.Equal(29, new GPT3Tokenizer().CountTokens(text)); Assert.Equal(21, new GPT4Tokenizer().CountTokens(text)); + Assert.Equal(22, new GPT4oTokenizer().CountTokens(text)); } } diff --git a/extensions/OpenAI/OpenAI/OpenAITextEmbeddingGenerator.cs b/extensions/OpenAI/OpenAI/OpenAITextEmbeddingGenerator.cs index 6cc1c3104..5872c8670 100644 --- a/extensions/OpenAI/OpenAI/OpenAITextEmbeddingGenerator.cs +++ b/extensions/OpenAI/OpenAI/OpenAITextEmbeddingGenerator.cs @@ -137,8 +137,8 @@ private OpenAITextEmbeddingGenerator( { this._log.LogWarning( "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors", - nameof(GPT4Tokenizer)); - textTokenizer = new GPT4Tokenizer(); + nameof(GPT4oTokenizer)); + textTokenizer = new GPT4oTokenizer(); } this._textTokenizer = textTokenizer; diff --git a/extensions/OpenAI/OpenAI/OpenAITextGenerator.cs b/extensions/OpenAI/OpenAI/OpenAITextGenerator.cs index 7251bbcbd..f2c981e8d 100644 --- a/extensions/OpenAI/OpenAI/OpenAITextGenerator.cs +++ b/extensions/OpenAI/OpenAI/OpenAITextGenerator.cs @@ -86,8 +86,8 @@ public OpenAITextGenerator( { this._log.LogWarning( "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors", - nameof(GPT4Tokenizer)); - textTokenizer = new GPT4Tokenizer(); + nameof(GPT4oTokenizer)); + textTokenizer = new GPT4oTokenizer(); } this._textTokenizer = textTokenizer; @@ -96,13 +96,13 @@ public OpenAITextGenerator( /// public int CountTokens(string text) { - return this._textTokenizer!.CountTokens(text); + return this._textTokenizer.CountTokens(text); } /// public IReadOnlyList GetTokens(string text) { - return this._textTokenizer!.GetTokens(text); + return this._textTokenizer.GetTokens(text); } /// diff --git a/service/Abstractions/AI/Embedding.cs b/service/Abstractions/AI/Embedding.cs index e44526722..aa2566868 100644 --- a/service/Abstractions/AI/Embedding.cs +++ b/service/Abstractions/AI/Embedding.cs @@ -38,6 +38,18 @@ public Embedding(float[] vector) this.Data = vector; } + /// + /// This is not a ctor on purpose so we can use collections syntax with + /// the main ctor, and surface the extra casting cost when not using floats. + /// + public static Embedding FromDoubles(double[] vector) + { + float[] f = new float[vector.Length]; + for (int i = 0; i < vector.Length; i++) { f[i] = (float)vector[i]; } + + return new Embedding(f); + } + public Embedding(ReadOnlyMemory vector) { this.Data = vector; diff --git a/service/Abstractions/Diagnostics/SensitiveDataLogger.cs b/service/Abstractions/Diagnostics/SensitiveDataLogger.cs new file mode 100644 index 000000000..1262cf510 --- /dev/null +++ b/service/Abstractions/Diagnostics/SensitiveDataLogger.cs @@ -0,0 +1,75 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using Microsoft.Extensions.Logging; + +namespace Microsoft.KernelMemory.Diagnostics; + +#pragma warning disable CA2254 // by design +public static class SensitiveDataLogger +{ + private static bool s_enabled = false; + + public static bool Enabled + { + get + { + return s_enabled; + } + set + { + var env = Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"); + if (!string.Equals(env, "Development", StringComparison.OrdinalIgnoreCase)) + { +#pragma warning disable CA2201 + throw new ApplicationException("Sensitive data logging can be enabled only in a development environment. Check ASPNETCORE_ENVIRONMENT env var."); +#pragma warning restore CA0000 + } + + s_enabled = value && string.Equals(env, "Development", StringComparison.OrdinalIgnoreCase); + } + } + + public static LogLevel LoggingLevel { get; set; } = LogLevel.Information; + + public static void LogSensitive(this ILogger logger, string? message, params object?[] args) + { + if (!Enabled) { return; } + + logger.Log(LoggingLevel, $"[PII] {message}", args); + } + + public static void LogSensitive( + this ILogger logger, + Exception? exception, + string? message, + params object?[] args) + { + if (!Enabled) { return; } + + logger.Log(LoggingLevel, exception, message, args); + } + + public static void LogSensitive( + this ILogger logger, + EventId eventId, + Exception? exception, + string? message, + params object?[] args) + { + if (!Enabled) { return; } + + logger.Log(LoggingLevel, eventId, exception, message, args); + } + + public static void LogSensitive( + this ILogger logger, + EventId eventId, + string? message, + params object?[] args) + { + if (!Enabled) { return; } + + logger.Log(LoggingLevel, eventId, message, args); + } +} diff --git a/service/Core/Core.csproj b/service/Core/Core.csproj index b6ce2f27f..e60ba40a7 100644 --- a/service/Core/Core.csproj +++ b/service/Core/Core.csproj @@ -20,6 +20,7 @@ + diff --git a/service/Core/Search/SearchClient.cs b/service/Core/Search/SearchClient.cs index 4a229f22e..231ec9df5 100644 --- a/service/Core/Search/SearchClient.cs +++ b/service/Core/Search/SearchClient.cs @@ -324,7 +324,7 @@ public async Task AskAsync( if (factsUsedCount == 0) { - this._log.LogWarning("No memories available"); + this._log.LogWarning("No memories available (min relevance: {0})", minRelevance); noAnswerFound.NoResultReason = "No memories available"; return noAnswerFound; } @@ -347,6 +347,7 @@ public async Task AskAsync( watch.Stop(); answer.Result = text.ToString(); + this._log.LogSensitive("Answer: {0}", answer.Result); answer.NoResult = ValueIsEquivalentTo(answer.Result, this._config.EmptyAnswer); if (answer.NoResult) { @@ -391,6 +392,8 @@ private IAsyncEnumerable GenerateAnswer(string question, string facts, I this._log.LogDebug("Running RAG prompt, size: {0} tokens, requesting max {1} tokens", this._textGenerator.CountTokens(prompt), this._config.AnswerTokens); + + this._log.LogSensitive("Prompt: {0}", prompt); } return this._textGenerator.GenerateTextAsync(prompt, options, token); diff --git a/service/Core/SemanticKernel/SemanticKernelTextEmbeddingGenerator.cs b/service/Core/SemanticKernel/SemanticKernelTextEmbeddingGenerator.cs index fbcb29a72..533f0e7d7 100644 --- a/service/Core/SemanticKernel/SemanticKernelTextEmbeddingGenerator.cs +++ b/service/Core/SemanticKernel/SemanticKernelTextEmbeddingGenerator.cs @@ -44,8 +44,8 @@ public SemanticKernelTextEmbeddingGenerator( { this._log.LogWarning( "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors", - nameof(GPT4Tokenizer)); - textTokenizer = new GPT4Tokenizer(); + nameof(GPT4oTokenizer)); + textTokenizer = new GPT4oTokenizer(); } this._tokenizer = textTokenizer; diff --git a/service/Core/SemanticKernel/SemanticKernelTextGenerator.cs b/service/Core/SemanticKernel/SemanticKernelTextGenerator.cs index 10fcbf2cf..3d1d48982 100644 --- a/service/Core/SemanticKernel/SemanticKernelTextGenerator.cs +++ b/service/Core/SemanticKernel/SemanticKernelTextGenerator.cs @@ -45,8 +45,8 @@ public SemanticKernelTextGenerator( { this._log.LogWarning( "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors", - nameof(GPT4Tokenizer)); - textTokenizer = new GPT4Tokenizer(); + nameof(GPT4oTokenizer)); + textTokenizer = new GPT4oTokenizer(); } this._tokenizer = textTokenizer; diff --git a/service/Service/ServiceConfiguration.cs b/service/Service/ServiceConfiguration.cs index 0d7b63ba9..e3b6c27a1 100644 --- a/service/Service/ServiceConfiguration.cs +++ b/service/Service/ServiceConfiguration.cs @@ -6,6 +6,7 @@ using Microsoft.Extensions.DependencyInjection; using Microsoft.KernelMemory.AI; using Microsoft.KernelMemory.AI.Anthropic; +using Microsoft.KernelMemory.AI.Ollama; using Microsoft.KernelMemory.AI.OpenAI; using Microsoft.KernelMemory.DocumentStorage.DevTools; using Microsoft.KernelMemory.MemoryDb.SQLServer; @@ -215,7 +216,7 @@ private void ConfigureIngestionEmbeddingGenerators(IKernelMemoryBuilder builder) var instance = this.GetServiceInstance(builder, s => s.AddAzureOpenAIEmbeddingGeneration( config: this.GetServiceConfig("AzureOpenAIEmbedding"), - textTokenizer: new GPT4Tokenizer())); + textTokenizer: new GPT4oTokenizer())); builder.AddIngestionEmbeddingGenerator(instance); break; } @@ -225,7 +226,17 @@ private void ConfigureIngestionEmbeddingGenerators(IKernelMemoryBuilder builder) var instance = this.GetServiceInstance(builder, s => s.AddOpenAITextEmbeddingGeneration( config: this.GetServiceConfig("OpenAI"), - textTokenizer: new GPT4Tokenizer())); + textTokenizer: new GPT4oTokenizer())); + builder.AddIngestionEmbeddingGenerator(instance); + break; + } + + case string x when x.Equals("Ollama", StringComparison.OrdinalIgnoreCase): + { + var instance = this.GetServiceInstance(builder, + s => s.AddOllamaTextEmbeddingGeneration( + config: this.GetServiceConfig("Ollama"), + textTokenizer: new GPT4oTokenizer())); builder.AddIngestionEmbeddingGenerator(instance); break; } @@ -352,13 +363,19 @@ private void ConfigureRetrievalEmbeddingGenerator(IKernelMemoryBuilder builder) case string y when y.Equals("AzureOpenAIEmbedding", StringComparison.OrdinalIgnoreCase): builder.Services.AddAzureOpenAIEmbeddingGeneration( config: this.GetServiceConfig("AzureOpenAIEmbedding"), - textTokenizer: new GPT4Tokenizer()); + textTokenizer: new GPT4oTokenizer()); break; case string x when x.Equals("OpenAI", StringComparison.OrdinalIgnoreCase): builder.Services.AddOpenAITextEmbeddingGeneration( config: this.GetServiceConfig("OpenAI"), - textTokenizer: new GPT4Tokenizer()); + textTokenizer: new GPT4oTokenizer()); + break; + + case string x when x.Equals("Ollama", StringComparison.OrdinalIgnoreCase): + builder.Services.AddOllamaTextEmbeddingGeneration( + config: this.GetServiceConfig("Ollama"), + textTokenizer: new GPT4oTokenizer()); break; default: @@ -423,17 +440,25 @@ private void ConfigureTextGenerator(IKernelMemoryBuilder builder) case string y when y.Equals("AzureOpenAIText", StringComparison.OrdinalIgnoreCase): builder.Services.AddAzureOpenAITextGeneration( config: this.GetServiceConfig("AzureOpenAIText"), - textTokenizer: new GPT4Tokenizer()); + textTokenizer: new GPT4oTokenizer()); break; case string x when x.Equals("OpenAI", StringComparison.OrdinalIgnoreCase): builder.Services.AddOpenAITextGeneration( config: this.GetServiceConfig("OpenAI"), - textTokenizer: new GPT4Tokenizer()); + textTokenizer: new GPT4oTokenizer()); break; case string x when x.Equals("Anthropic", StringComparison.OrdinalIgnoreCase): - builder.Services.AddAnthropicTextGeneration(this.GetServiceConfig("Anthropic")); + builder.Services.AddAnthropicTextGeneration( + config: this.GetServiceConfig("Anthropic"), + textTokenizer: new GPT4oTokenizer()); + break; + + case string x when x.Equals("Ollama", StringComparison.OrdinalIgnoreCase): + builder.Services.AddOllamaTextGeneration( + config: this.GetServiceConfig("Ollama"), + textTokenizer: new GPT4oTokenizer()); break; case string x when x.Equals("LlamaSharp", StringComparison.OrdinalIgnoreCase): diff --git a/service/Service/appsettings.json b/service/Service/appsettings.json index 4dc741c18..e4c5bd707 100644 --- a/service/Service/appsettings.json +++ b/service/Service/appsettings.json @@ -375,6 +375,119 @@ "DatabaseName": "KernelMemory", "UseSingleCollectionForVectorSearch": false }, + "Ollama": { + "Endpoint": "http://localhost:11434", + "TextModel": { + "ModelName": "phi3:medium-128k", + "MaxTokenTotal": 131072, + // How many requests can be processed in parallel + "MaxBatchSize": 1 + //// Enable Mirostat sampling for controlling perplexity. + //// (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) + //"MiroStat": 0, + //// Influences how quickly the algorithm responds to feedback from the + //// generated text. A lower learning rate will result in slower adjustments, + //// while a higher learning rate will make the algorithm more responsive. + //// (Default: 0.1) + //"MiroStatEta": 0.1, + //// Controls the balance between coherence and diversity of the output. + //// A lower value will result in more focused and coherent text. + //// (Default: 5.0) + //"MiroStatTau": 5.0, + //// Sets the size of the context window used to generate the next token. + //// (Default: 2048) + //"NumCtx": 2048, + //// The number of GQA groups in the transformer layer. Required for some + //// models, for example it is 8 for llama2:70b + //"NumGqa": null, + //// The number of layers to send to the GPU(s). On macOS it defaults to + //// 1 to enable metal support, 0 to disable. + //"NumGpu": null, + //// Sets the number of threads to use during computation. By default, + //// Ollama will detect this for optimal performance. + //// It is recommended to set this value to the number of physical CPU cores + //// your system has (as opposed to the logical number of cores). + //"NumThread": null, + //// Sets how far back for the model to look back to prevent repetition. + //// (Default: 64, 0 = disabled, -1 = num_ctx) + //"RepeatLastN": null, + //// Sets the random number seed to use for generation. + //// Setting this to a specific number will make the model generate the same + //// text for the same prompt. (Default: 0) + //"Seed": 0, + //// Tail free sampling is used to reduce the impact of less probable + //// tokens from the output. A higher value (e.g., 2.0) will reduce the + //// impact more, while a value of 1.0 disables this setting. (default: 1) + //"TfsZ": 1.0, + //// Maximum number of tokens to predict when generating text. + //// (Default: 128, -1 = infinite generation, -2 = fill context) + //"NumPredict": 128, + //// Reduces the probability of generating nonsense. A higher value + //// (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) + //// will be more conservative. (Default: 40) + //"TopK": 40, + //// Alternative to the top_p, and aims to ensure a balance of quality and variety.min_p represents the minimum + //// probability for a token to be considered, relative to the probability of the most likely token.For + //// example, with min_p=0.05 and the most likely token having a probability of 0.9, logits with a value less + //// than 0.05*0.9=0.045 are filtered out. (Default: 0.0) + //"MinP": 0.0 + }, + "EmbeddingModel": { + "ModelName": "nomic-embed-text", + "MaxTokenTotal": 2048, + // How many requests can be processed in parallel + "MaxBatchSize": 1 + //// Enable Mirostat sampling for controlling perplexity. + //// (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) + //"MiroStat": 0, + //// Influences how quickly the algorithm responds to feedback from the + //// generated text. A lower learning rate will result in slower adjustments, + //// while a higher learning rate will make the algorithm more responsive. + //// (Default: 0.1) + //"MiroStatEta": 0.1, + //// Controls the balance between coherence and diversity of the output. + //// A lower value will result in more focused and coherent text. + //// (Default: 5.0) + //"MiroStatTau": 5.0, + //// Sets the size of the context window used to generate the next token. + //// (Default: 2048) + //"NumCtx": 2048, + //// The number of GQA groups in the transformer layer. Required for some + //// models, for example it is 8 for llama2:70b + //"NumGqa": null, + //// The number of layers to send to the GPU(s). On macOS it defaults to + //// 1 to enable metal support, 0 to disable. + //"NumGpu": null, + //// Sets the number of threads to use during computation. By default, + //// Ollama will detect this for optimal performance. + //// It is recommended to set this value to the number of physical CPU cores + //// your system has (as opposed to the logical number of cores). + //"NumThread": null, + //// Sets how far back for the model to look back to prevent repetition. + //// (Default: 64, 0 = disabled, -1 = num_ctx) + //"RepeatLastN": null, + //// Sets the random number seed to use for generation. + //// Setting this to a specific number will make the model generate the same + //// text for the same prompt. (Default: 0) + //"Seed": 0, + //// Tail free sampling is used to reduce the impact of less probable + //// tokens from the output. A higher value (e.g., 2.0) will reduce the + //// impact more, while a value of 1.0 disables this setting. (default: 1) + //"TfsZ": 1.0, + //// Maximum number of tokens to predict when generating text. + //// (Default: 128, -1 = infinite generation, -2 = fill context) + //"NumPredict": 128, + //// Reduces the probability of generating nonsense. A higher value + //// (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) + //// will be more conservative. (Default: 40) + //"TopK": 40, + //// Alternative to the top_p, and aims to ensure a balance of quality and variety.min_p represents the minimum + //// probability for a token to be considered, relative to the probability of the most likely token.For + //// example, with min_p=0.05 and the most likely token having a probability of 0.9, logits with a value less + //// than 0.05*0.9=0.045 are filtered out. (Default: 0.0) + //"MinP": 0.0 + } + }, "OpenAI": { // Name of the model used to generate text (text completion or chat completion) "TextModel": "gpt-3.5-turbo-16k", diff --git a/tools/InteractiveSetup/Context.cs b/tools/InteractiveSetup/Context.cs index d2b4b713b..6f93e92a6 100644 --- a/tools/InteractiveSetup/Context.cs +++ b/tools/InteractiveSetup/Context.cs @@ -26,6 +26,7 @@ internal sealed class Context public BoundedBoolean CfgAzureOpenAIEmbedding = new(); public BoundedBoolean CfgOpenAI = new(); public BoundedBoolean CfgLlamaSharp = new(); + public BoundedBoolean CfgOllama = new(); public BoundedBoolean CfgAzureAIDocIntel = new(); // Vectors diff --git a/tools/InteractiveSetup/Main.cs b/tools/InteractiveSetup/Main.cs index bb9341041..9a2defd0b 100644 --- a/tools/InteractiveSetup/Main.cs +++ b/tools/InteractiveSetup/Main.cs @@ -62,6 +62,7 @@ public static void InteractiveSetup(string[] args) AzureOpenAIText.Setup(ctx); OpenAI.Setup(ctx); LlamaSharp.Setup(ctx); + Ollama.Setup(ctx); Logger.Setup(); @@ -198,6 +199,18 @@ private static void EmbeddingGeneratorSetup(Context ctx) ctx.CfgOpenAI.Value = true; }), + new("Ollama service", config.Retrieval.EmbeddingGeneratorType == "Ollama", () => + { + AppSettings.Change(x => + { + x.Retrieval.EmbeddingGeneratorType = "Ollama"; + x.DataIngestion.EmbeddingGeneratorTypes = ctx.CfgEmbeddingGenerationEnabled.Value + ? new List { x.Retrieval.EmbeddingGeneratorType } + : new List { }; + }); + ctx.CfgOllama.Value = true; + }), + new("None/Custom (manually set with code)", string.IsNullOrEmpty(config.Retrieval.EmbeddingGeneratorType), () => { AppSettings.Change(x => @@ -233,7 +246,13 @@ private static void TextGeneratorTypeSetup(Context ctx) ctx.CfgOpenAI.Value = true; }), - new("LLama model", config.TextGeneratorType == "LlamaSharp", () => + new("Ollama service", config.TextGeneratorType == "Ollama", () => + { + AppSettings.Change(x => { x.TextGeneratorType = "Ollama"; }); + ctx.CfgOllama.Value = true; + }), + + new("LlamaSharp library", config.TextGeneratorType == "LlamaSharp", () => { AppSettings.Change(x => { x.TextGeneratorType = "LlamaSharp"; }); ctx.CfgLlamaSharp.Value = true; diff --git a/tools/InteractiveSetup/Services/Ollama.cs b/tools/InteractiveSetup/Services/Ollama.cs new file mode 100644 index 000000000..59b656540 --- /dev/null +++ b/tools/InteractiveSetup/Services/Ollama.cs @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.Globalization; +using Microsoft.KernelMemory.InteractiveSetup.UI; + +namespace Microsoft.KernelMemory.InteractiveSetup.Services; + +internal static class Ollama +{ + public static void Setup(Context ctx, bool force = false) + { + if (!ctx.CfgOllama.Value && !force) { return; } + + ctx.CfgOllama.Value = false; + const string ServiceName = "Ollama"; + + Dictionary textModel = new(); + Dictionary embeddingModel = new(); + + if (!AppSettings.GetCurrentConfig().Services.TryGetValue(ServiceName, out var config)) + { + textModel = new Dictionary + { + { "ModelName", "phi3:medium-128k" }, + { "MaxTokenTotal", 131072 }, + }; + + embeddingModel = new Dictionary + { + { "ModelName", "nomic-embed-text" }, + { "MaxTokenTotal", 2048 }, + }; + + config = new Dictionary + { + { "Endpoint", "http://localhost:11434" }, + { "TextModel", textModel }, + { "EmbeddingModel", embeddingModel }, + }; + } + + AppSettings.Change(x => x.Services[ServiceName] = new Dictionary + { + { "Endpoint", SetupUI.AskOpenQuestion("Ollama endpoint", config.TryGet("Endpoint")) } + }); + + AppSettings.Change(x => x.Services[ServiceName]["TextModel"] = new Dictionary + { + { "ModelName", SetupUI.AskOpenQuestion("Ollama text model name", textModel.TryGet("ModelName")) }, + { "MaxTokenTotal", SetupUI.AskOpenQuestionInt("Ollama text model max tokens", StrToInt(textModel.TryGet("MaxTokenTotal"))) }, + }); + + AppSettings.Change(x => x.Services[ServiceName]["EmbeddingModel"] = new Dictionary + { + { "ModelName", SetupUI.AskOpenQuestion("Ollama text embedding model name", embeddingModel.TryGet("ModelName")) }, + { "MaxTokenTotal", SetupUI.AskOpenQuestionInt("Ollama text embedding model max tokens", StrToInt(embeddingModel.TryGet("MaxTokenTotal"))) }, + }); + } + + private static int StrToInt(string s) + { + return int.Parse(s, NumberStyles.Integer, NumberFormatInfo.InvariantInfo); + } +} diff --git a/tools/InteractiveSetup/UI/SetupUI.cs b/tools/InteractiveSetup/UI/SetupUI.cs index 1c4c8ddee..b2113261e 100644 --- a/tools/InteractiveSetup/UI/SetupUI.cs +++ b/tools/InteractiveSetup/UI/SetupUI.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using System; +using System.Globalization; using System.Linq; namespace Microsoft.KernelMemory.InteractiveSetup.UI; @@ -30,6 +31,12 @@ public static string AskOptionalOpenQuestion(string question, string? defaultVal return AskOpenQuestion(question: question, defaultValue: defaultValue, optional: true); } + public static int AskOpenQuestionInt(string question, int defaultValue, bool optional = false) + { + string value = AskOpenQuestion(question: question, defaultValue: $"{defaultValue}", trim: true, optional: optional, isPassword: false); + return int.Parse(value, NumberStyles.Integer, NumberFormatInfo.InvariantInfo); + } + public static string AskOpenQuestion(string question, string? defaultValue, bool trim = true, bool optional = false, bool isPassword = false) { if (!string.IsNullOrEmpty(defaultValue))