Ollama connector (#770)

## Motivation and Context (Why the change? What's the scenario?) Although Ollama service protocol seemingly follows OpenAI behavior, Azure Open AI/OpenAI/SK OpenAI C# connectors are not fully functional when pointed to Ollama service, leading to various errors. This PR introduces a new connector dedicated to [Ollama](https://ollama.com), allowing to use it for Text Generation and Text Embedding generation. The Ollama connector can be used for service and serverless mode. The PR includes an example (212), I've also manually tested the service with the new connector, using "phi3:medium-128k" and "nomic-embed-text" on Apple Silicon. ## High level description (Approach, Design) * New Ollama connector based on [OllamaSharp](https://github.com/awaescher/OllamaSharp) * Bump version to 0.72 * Other minor changes: * Switch from gpt4 to gpt4o tokenizer, when no tokenizer is specified. * Add SensitiveDataLogger, usable only on dev environments to log data that potentially includes PII. * Fix warning in service logs when using Anthropic
microsoft · Sep 5, 2024 · b06084a · b06084a
1 parent ac5248d
commit b06084a
Show file tree

Hide file tree

Showing 37 changed files with 1,136 additions and 102 deletions.
diff --git a/Directory.Build.props b/Directory.Build.props
@@ -2,7 +2,7 @@
 <Project>
     <PropertyGroup>
         <!-- Central version prefix - applies to all nuget packages. -->
-        <Version>0.71.0</Version>
+        <Version>0.72.0</Version>
 
         <!-- C# lang version, https://learn.microsoft.com/dotnet/csharp/whats-new -->
         <LangVersion>12</LangVersion>

diff --git a/Directory.Packages.props b/Directory.Packages.props
@@ -39,6 +39,7 @@
     <PackageVersion Include="MongoDB.Driver.GridFS" Version="2.28.0" />
     <PackageVersion Include="Moq" Version="4.20.70" />
     <PackageVersion Include="Newtonsoft.Json" Version="13.0.3" />
+    <PackageVersion Include="OllamaSharp" Version="3.0.1" />
     <PackageVersion Include="PdfPig" Version="0.1.8" />
     <PackageVersion Include="Pgvector" Version="0.3.0" />
     <PackageVersion Include="Polly.Core" Version="8.4.1" />

diff --git a/KernelMemory.sln b/KernelMemory.sln
@@ -317,6 +317,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "how-to", "how-to", "{6B992E
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "211-dotnet-WebClient-Intent-Detection", "examples\211-dotnet-WebClient-Intent-Detection\211-dotnet-WebClient-Intent-Detection.csproj", "{84AEC1DD-CBAE-400A-949C-91BA373C587D}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "212-dotnet-ollama", "examples\212-dotnet-ollama\212-dotnet-ollama.csproj", "{B303885D-F64F-4EEB-B085-0014E863AF61}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Ollama", "extensions\Ollama\Ollama\Ollama.csproj", "{F192513B-265B-4943-A2A9-44E23B15BA18}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -578,6 +582,13 @@ Global
 		{84AEC1DD-CBAE-400A-949C-91BA373C587D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{84AEC1DD-CBAE-400A-949C-91BA373C587D}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{84AEC1DD-CBAE-400A-949C-91BA373C587D}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{B303885D-F64F-4EEB-B085-0014E863AF61}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{B303885D-F64F-4EEB-B085-0014E863AF61}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{B303885D-F64F-4EEB-B085-0014E863AF61}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{F192513B-265B-4943-A2A9-44E23B15BA18}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{F192513B-265B-4943-A2A9-44E23B15BA18}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{F192513B-265B-4943-A2A9-44E23B15BA18}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{F192513B-265B-4943-A2A9-44E23B15BA18}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@@ -670,6 +681,8 @@ Global
 		{795CD089-05A9-4800-B6FF-3243CAD7D41B} = {7BA7F1B2-19E2-46EB-B000-513EE2F65769}
 		{6B992EFC-81B0-4E52-925F-41420BDC40B6} = {7BA7F1B2-19E2-46EB-B000-513EE2F65769}
 		{84AEC1DD-CBAE-400A-949C-91BA373C587D} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841}
+		{B303885D-F64F-4EEB-B085-0014E863AF61} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841}
+		{F192513B-265B-4943-A2A9-44E23B15BA18} = {155DA079-E267-49AF-973A-D1D44681970F}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {CC136C62-115C-41D1-B414-F9473EFF6EA8}

diff --git a/README.md b/README.md
diff --git a/docs/index.md b/docs/index.md
@@ -112,7 +112,7 @@ Here's comparison table:
 | Custom storage schema                   | some DBs                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | -                                                                                                            |
 | Vector DBs with internal embedding      | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | -                                                                                                            |
 | Concurrent write to multiple vector DBs | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | -                                                                                                            |
-| LLMs                                    | [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/concepts/models), [OpenAI](https://platform.openai.com/docs/models), [Anthropic](https://www.anthropic.com), [LLamaSharp](https://github.com/SciSharp/LLamaSharp) via [llama.cpp](https://github.com/ggerganov/llama.cpp), [LM Studio](https://lmstudio.ai/), Semantic Kernel connectors                                                                                                                                         | Azure OpenAI, OpenAI, Gemini, Hugging Face, ONNX, custom ones, etc.                                          |
+| LLMs                                    | [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/concepts/models), [OpenAI](https://platform.openai.com/docs/models), [Anthropic](https://www.anthropic.com), [Ollama](https://ollama.com), [LLamaSharp](https://github.com/SciSharp/LLamaSharp), [LM Studio](https://lmstudio.ai), Semantic Kernel connectors                                                                                                                                         | Azure OpenAI, OpenAI, Gemini, Hugging Face, ONNX, custom ones, etc.                                          |
 | LLMs with dedicated tokenization        | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | No                                                                                                           |
 | Cloud deployment                        | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | -                                                                                                            |
 | Web service with OpenAPI                | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | -                                                                                                            |

diff --git a/docs/quickstart.md b/docs/quickstart.md
@@ -18,8 +18,8 @@ we will set up the service and demonstrate how to use the Memory API from Python
 * [.NET 6](https://dotnet.microsoft.com/download) or higher
 * Either an [OpenAI API Key](https://platform.openai.com/api-keys) or
   [Azure OpenAI deployment](https://azure.microsoft.com/products/ai-services/openai-service). If you are familiar
-  with llama.cpp or LLamaSharp you can also use a LLama model. However, this may result in slower AI code execution,
-  depending on your device.
+  with [Ollama](https://ollama.com) you can also use a local model such as [Microsoft phi3](https://azure.microsoft.com/products/phi-3) and [Meta LLama](https://llama.meta.com).
+  However, this may result in slower AI code execution, depending on your device.
 * A vector database, such as Azure AI Search, Qdrant, or Postgres+pgvector. For basic tests, you can use KM
   SimpleVectorDb.
 * A copy of the [KM repository](https://github.com/microsoft/kernel-memory).

diff --git a/examples/001-dotnet-WebClient/Program.cs b/examples/001-dotnet-WebClient/Program.cs
@@ -253,7 +253,7 @@ private static async Task AskSimpleQuestion()
         var question = "What's E = m*c^2?";
         Console.WriteLine($"Question: {question}");
 
-        var answer = await s_memory.AskAsync(question, minRelevance: 0.76);
+        var answer = await s_memory.AskAsync(question, minRelevance: 0.66);
         Console.WriteLine($"\nAnswer: {answer.Result}");
 
         Console.WriteLine("\n====================================\n");
@@ -278,7 +278,7 @@ private static async Task AskSimpleQuestionAndShowSources()
         var question = "What's Kernel Memory?";
         Console.WriteLine($"Question: {question}");
 
-        var answer = await s_memory.AskAsync(question, minRelevance: 0);
+        var answer = await s_memory.AskAsync(question, minRelevance: 0.5);
         Console.WriteLine($"\nAnswer: {answer.Result}\n\n  Sources:\n");
 
         // Show sources / citations

diff --git a/examples/210-KM-without-builder/Program.cs b/examples/210-KM-without-builder/Program.cs
@@ -73,7 +73,7 @@ public static async Task Main()
         var promptProvider = new EmbeddedPromptProvider();
 
         // AI dependencies
-        var tokenizer = new GPT4Tokenizer();
+        var tokenizer = new GPT4oTokenizer();
         var embeddingGeneratorHttpClient = new HttpClient();
         var embeddingGenerator = new AzureOpenAITextEmbeddingGenerator(azureOpenAIEmbeddingConfig, tokenizer, loggerFactory, embeddingGeneratorHttpClient);
         var textGeneratorHttpClient = new HttpClient();

diff --git a/examples/212-dotnet-ollama/212-dotnet-ollama.csproj b/examples/212-dotnet-ollama/212-dotnet-ollama.csproj
@@ -0,0 +1,12 @@
+<Project Sdk="Microsoft.NET.Sdk.Web">
+
+    <PropertyGroup>
+        <TargetFramework>net8.0</TargetFramework>
+        <ImplicitUsings>enable</ImplicitUsings>
+    </PropertyGroup>
+
+    <ItemGroup>
+        <ProjectReference Include="..\..\service\Core\Core.csproj" />
+    </ItemGroup>
+
+</Project>
diff --git a/examples/212-dotnet-ollama/Program.cs b/examples/212-dotnet-ollama/Program.cs
@@ -0,0 +1,67 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using Microsoft.KernelMemory;
+using Microsoft.KernelMemory.AI.Ollama;
+using Microsoft.KernelMemory.AI.OpenAI;
+using Microsoft.KernelMemory.Diagnostics;
+
+/* This example shows how to use KM with Ollama
+ *
+ * 1. Install and launch Ollama. You should see an icon for the app running the background.
+ *
+ * 2. Download your preferred models, e.g.
+ *      - ollama pull nomic-embed-text
+ *      - ollama pull phi3:medium-128k
+ *
+ * 3. Run the code below
+ *
+ * 4. Other things
+ *      Run "ollama show phi3:medium-128k" to see model's properties
+ *      Run "ollama list" to see the list of models you have on your system
+ *      Run "ollama serve" if you prefer running Ollama from the command line
+ */
+public static class Program
+{
+    public static async Task Main()
+    {
+        var logLevel = LogLevel.Warning;
+        SensitiveDataLogger.Enabled = false;
+
+        var config = new OllamaConfig
+        {
+            Endpoint = "http://localhost:11434",
+            TextModel = new OllamaModelConfig("phi3:medium-128k", 131072),
+            EmbeddingModel = new OllamaModelConfig("nomic-embed-text", 2048)
+        };
+
+        var memory = new KernelMemoryBuilder()
+            .WithOllamaTextGeneration(config, new GPT4oTokenizer())
+            .WithOllamaTextEmbeddingGeneration(config, new GPT4oTokenizer())
+            .Configure(builder => builder.Services.AddLogging(l =>
+            {
+                l.SetMinimumLevel(logLevel);
+                l.AddSimpleConsole(c => c.SingleLine = true);
+            }))
+            .Build();
+
+        // Import some text
+        await memory.ImportTextAsync("Today is October 32nd, 2476");
+
+        // Generate an answer - This uses OpenAI for embeddings and finding relevant data, and LM Studio to generate an answer
+        var answer = await memory.AskAsync("What's the current date (don't check for validity)?");
+        Console.WriteLine(answer.Question);
+        Console.WriteLine(answer.Result);
+
+        /*
+
+        -- Output using phi3:medium-128k:
+
+        What's the current date (don't check for validity)?
+        The given fact states that "Today is October 32nd, 2476." However, it appears to be an incorrect statement as
+        there are never more than 31 days in any month. If we consider this date without checking its validity and accept
+        the stated day of October as being 32, then the current date would be "October 32nd, 2476." However, it is important
+        to note that this date does not align with our calendar system.
+
+        */
+    }
+}
diff --git a/examples/README.md b/examples/README.md
@@ -6,29 +6,32 @@ Some examples about how to use Kernel Memory.
 2. [Using Kernel Memory web service to upload documents and answer questions](001-dotnet-WebClient)
 3. [Importing files and asking question without running the service (serverless mode)](002-dotnet-Serverless)
 4. [Using KM Plugin for Semantic Kernel](003-dotnet-SemanticKernel-plugin)
-5. [Processing files with custom logic (custom handlers) in serverless mode](004-dotnet-serverless-custom-pipeline)
-6. [Processing files with custom logic (custom handlers) in asynchronous mode](005-dotnet-AsyncMemoryCustomPipeline)
+5. Customizations
+    * [Processing files with custom logic (custom handlers) in serverless mode](004-dotnet-serverless-custom-pipeline)
+    * [Processing files with custom logic (custom handlers) in asynchronous mode](005-dotnet-AsyncMemoryCustomPipeline)
+    * [Customizing RAG and summarization prompts](101-dotnet-custom-Prompts)
+    * [Custom partitioning/text chunking options](102-dotnet-custom-partitioning-options)
+    * [Using a custom embedding/vector generator](103-dotnet-custom-EmbeddingGenerator)
+    * [Using custom content decoders](108-dotnet-custom-content-decoders)
+    * [Using a custom web scraper to fetch web pages](109-dotnet-custom-webscraper)
+    * [Writing and using a custom ingestion handler](201-dotnet-serverless-custom-handler)
+    * [Using Context Parameters to customize RAG prompt during a request](209-dotnet-using-context-overrides)
+6. Local models and external connectors
+    * [Using custom LLMs](104-dotnet-custom-LLM)
+    * [Using local LLMs with Ollama](212-dotnet-ollama)
+    * [Using local LLMs with llama.cpp via LlamaSharp](105-dotnet-serverless-llamasharp)
+    * [Using local models with LM Studio](208-dotnet-lmstudio)
+    * [Using Semantic Kernel LLM connectors](107-dotnet-SemanticKernel-TextCompletion)
+    * [Generating answers with Anthropic LLMs](110-dotnet-anthropic)
 7. [Upload files and ask questions from command line using curl](006-curl-calling-webservice)
-8. [Customizing RAG and summarization prompts](101-dotnet-custom-Prompts)
-9. [Custom partitioning/text chunking options](102-dotnet-custom-partitioning-options)
-10. [Using a custom embedding/vector generator](103-dotnet-custom-EmbeddingGenerator)
-11. [Using custom LLMs](104-dotnet-custom-LLM)
-12. [Using LLama](105-dotnet-serverless-llamasharp)
-13. [Summarizing documents, using synthetic memories](106-dotnet-retrieve-synthetics)
-14. [Using Semantic Kernel LLM connectors](107-dotnet-SemanticKernel-TextCompletion)
-15. [Using custom content decoders](108-dotnet-custom-content-decoders)
-16. [Using a custom web scraper to fetch web pages](109-dotnet-custom-webscraper)
-17. [Generating answers with Anthropic LLMs](110-dotnet-anthropic)
-18. [Hybrid Search with Azure AI Search](111-dotnet-azure-ai-hybrid-search)
-19. [Writing and using a custom ingestion handler](201-dotnet-serverless-custom-handler)
-20. [Running a single asynchronous pipeline handler as a standalone service](202-dotnet-custom-handler-as-a-service)
-21. [Test project using KM package from nuget.org](203-dotnet-using-core-nuget)
-22. [Integrating Memory with ASP.NET applications and controllers](204-dotnet-ASP.NET-MVC-integration)
-23. [Sample code showing how to extract text from files](205-dotnet-extract-text-from-docs)
-24. [.NET configuration and logging](206-dotnet-configuration-and-logging)
-25. [Expanding chunks retrieving adjacent partitions](207-dotnet-expanding-chunks-on-retrieval)
-26. [Using local models via LM Studio](208-dotnet-lmstudio)
-27. [Using Context Parameters to customize RAG prompt during a request](209-dotnet-using-context-overrides)
-28. [Creating a Memory instance without KernelMemoryBuilder](210-KM-without-builder)
-29. [Intent Detection](211-dotnet-WebClient-Intent-Detection)
-30. [Fetching data from Discord](301-discord-test-application)
+8. [Summarizing documents, using synthetic memories](106-dotnet-retrieve-synthetics)
+9. [Hybrid Search with Azure AI Search](111-dotnet-azure-ai-hybrid-search)
+10. [Running a single asynchronous pipeline handler as a standalone service](202-dotnet-custom-handler-as-a-service)
+11. [Integrating Memory with ASP.NET applications and controllers](204-dotnet-ASP.NET-MVC-integration)
+12. [Sample code showing how to extract text from files](205-dotnet-extract-text-from-docs)
+13. [.NET configuration and logging](206-dotnet-configuration-and-logging)
+14. [Expanding chunks retrieving adjacent partitions](207-dotnet-expanding-chunks-on-retrieval)
+15. [Creating a Memory instance without KernelMemoryBuilder](210-KM-without-builder)
+16. [Intent Detection](211-dotnet-WebClient-Intent-Detection)
+17. [Fetching data from Discord](301-discord-test-application)
+18. [Test project using KM package from nuget.org](203-dotnet-using-core-nuget)
diff --git a/extensions/Anthropic/AnthropicTextGeneration.cs b/extensions/Anthropic/AnthropicTextGeneration.cs
@@ -68,8 +68,8 @@ public AnthropicTextGeneration(
         {
             this._log.LogWarning(
                 "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
-                nameof(GPT4Tokenizer));
-            textTokenizer = new GPT4Tokenizer();
+                nameof(GPT4oTokenizer));
+            textTokenizer = new GPT4oTokenizer();
         }
 
         this._textTokenizer = textTokenizer;

diff --git a/extensions/AzureOpenAI/AzureOpenAITextEmbeddingGenerator.cs b/extensions/AzureOpenAI/AzureOpenAITextEmbeddingGenerator.cs
@@ -36,8 +36,8 @@ public AzureOpenAITextEmbeddingGenerator(
         {
             this._log.LogWarning(
                 "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
-                nameof(GPT4Tokenizer));
-            textTokenizer = new GPT4Tokenizer();
+                nameof(GPT4oTokenizer));
+            textTokenizer = new GPT4oTokenizer();
         }
 
         this._textTokenizer = textTokenizer;

diff --git a/extensions/AzureOpenAI/AzureOpenAITextGenerator.cs b/extensions/AzureOpenAI/AzureOpenAITextGenerator.cs
@@ -39,8 +39,8 @@ public AzureOpenAITextGenerator(
         {
             this._log.LogWarning(
                 "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
-                nameof(GPT4Tokenizer));
-            textTokenizer = new GPT4Tokenizer();
+                nameof(GPT4oTokenizer));
+            textTokenizer = new GPT4oTokenizer();
         }
 
         this._textTokenizer = textTokenizer;

diff --git a/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs b/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs
@@ -47,8 +47,8 @@ public LlamaSharpTextGenerator(
         {
             this._log.LogWarning(
                 "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
-                nameof(GPT4Tokenizer));
-            textTokenizer = new GPT4Tokenizer();
+                nameof(GPT4oTokenizer));
+            textTokenizer = new GPT4oTokenizer();
         }
 
         this._textTokenizer = textTokenizer;