diff --git a/Directory.Build.props b/Directory.Build.props
index f44861a1e..443c9ccfd 100644
--- a/Directory.Build.props
+++ b/Directory.Build.props
@@ -2,7 +2,7 @@
 <Project>
     <PropertyGroup>
         <!-- Central version prefix - applies to all nuget packages. -->
-        <Version>0.71.0</Version>
+        <Version>0.72.0</Version>
 
         <!-- C# lang version, https://learn.microsoft.com/dotnet/csharp/whats-new -->
         <LangVersion>12</LangVersion>
diff --git a/Directory.Packages.props b/Directory.Packages.props
index 97b15ba98..95feb4712 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -39,6 +39,7 @@
     <PackageVersion Include="MongoDB.Driver.GridFS" Version="2.28.0" />
     <PackageVersion Include="Moq" Version="4.20.70" />
     <PackageVersion Include="Newtonsoft.Json" Version="13.0.3" />
+    <PackageVersion Include="OllamaSharp" Version="3.0.1" />
     <PackageVersion Include="PdfPig" Version="0.1.8" />
     <PackageVersion Include="Pgvector" Version="0.3.0" />
     <PackageVersion Include="Polly.Core" Version="8.4.1" />
diff --git a/KernelMemory.sln b/KernelMemory.sln
index 7e07e78e8..875b417d2 100644
--- a/KernelMemory.sln
+++ b/KernelMemory.sln
@@ -317,6 +317,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "how-to", "how-to", "{6B992E
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "211-dotnet-WebClient-Intent-Detection", "examples\211-dotnet-WebClient-Intent-Detection\211-dotnet-WebClient-Intent-Detection.csproj", "{84AEC1DD-CBAE-400A-949C-91BA373C587D}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "212-dotnet-ollama", "examples\212-dotnet-ollama\212-dotnet-ollama.csproj", "{B303885D-F64F-4EEB-B085-0014E863AF61}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Ollama", "extensions\Ollama\Ollama\Ollama.csproj", "{F192513B-265B-4943-A2A9-44E23B15BA18}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -578,6 +582,13 @@ Global
 		{84AEC1DD-CBAE-400A-949C-91BA373C587D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{84AEC1DD-CBAE-400A-949C-91BA373C587D}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{84AEC1DD-CBAE-400A-949C-91BA373C587D}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{B303885D-F64F-4EEB-B085-0014E863AF61}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{B303885D-F64F-4EEB-B085-0014E863AF61}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{B303885D-F64F-4EEB-B085-0014E863AF61}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{F192513B-265B-4943-A2A9-44E23B15BA18}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{F192513B-265B-4943-A2A9-44E23B15BA18}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{F192513B-265B-4943-A2A9-44E23B15BA18}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{F192513B-265B-4943-A2A9-44E23B15BA18}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@@ -670,6 +681,8 @@ Global
 		{795CD089-05A9-4800-B6FF-3243CAD7D41B} = {7BA7F1B2-19E2-46EB-B000-513EE2F65769}
 		{6B992EFC-81B0-4E52-925F-41420BDC40B6} = {7BA7F1B2-19E2-46EB-B000-513EE2F65769}
 		{84AEC1DD-CBAE-400A-949C-91BA373C587D} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841}
+		{B303885D-F64F-4EEB-B085-0014E863AF61} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841}
+		{F192513B-265B-4943-A2A9-44E23B15BA18} = {155DA079-E267-49AF-973A-D1D44681970F}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {CC136C62-115C-41D1-B414-F9473EFF6EA8}
diff --git a/README.md b/README.md
index 56d0e4779..eb43f499f 100644
--- a/README.md
+++ b/README.md
@@ -167,26 +167,26 @@ storage engines (known as "connectors") varies across languages.
 
 Here's comparison table:
 
-| Feature                                 | Kernel Memory                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        | Semantic Memory                                                                                              |
-| --------------------------------------- |------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| ------------------------------------------------------------------------------------------------------------ |
-| Data formats                            | Web pages, PDF, Images, Word, PowerPoint, Excel, Markdown, Text, JSON, HTML                                                                                                                                                                                                                                                                                                                                                                                                                          | Text only                                                                                                    |
-| Search                                  | Cosine similarity, Hybrid search with filters (AND/OR conditions)                                                                                                                                                                                                                                                                                                                                                                                                                                    | Cosine similarity                                                                                            |
-| Language support                        | Any language, command line tools, browser extensions, low-code/no-code apps, chatbots, assistants, etc.                                                                                                                                                                                                                                                                                                                                                                                              | C#, Python, Java                                                                                             |
+| Feature                                 | Kernel Memory                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | Semantic Memory                                                                                              |
+| --------------------------------------- |-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| ------------------------------------------------------------------------------------------------------------ |
+| Data formats                            | Web pages, PDF, Images, Word, PowerPoint, Excel, Markdown, Text, JSON, HTML                                                                                                                                                                                                                                                                                                                                                                                                                         | Text only                                                                                                    |
+| Search                                  | Cosine similarity, Hybrid search with filters (AND/OR conditions)                                                                                                                                                                                                                                                                                                                                                                                                                                   | Cosine similarity                                                                                            |
+| Language support                        | Any language, command line tools, browser extensions, low-code/no-code apps, chatbots, assistants, etc.                                                                                                                                                                                                                                                                                                                                                                                             | C#, Python, Java                                                                                             |
 | Storage engines                         | [Azure AI Search](https://azure.microsoft.com/products/ai-services/ai-search), [Elasticsearch](https://www.nuget.org/packages/FreeMindLabs.KernelMemory.Elasticsearch), [MongoDB Atlas](https://www.mongodb.com/atlas/database), [Postgres+pgvector](https://github.com/microsoft/kernel-memory/extensions/postgres), [Qdrant](https://qdrant.tech), [Redis](https://redis.io), [SQL Server](https://www.nuget.org/packages/Microsoft.KernelMemory.MemoryDb.SQLServer/), In memory KNN, On disk KNN. | Azure AI Search, Chroma, DuckDB, Kusto, Milvus, MongoDB, Pinecone, Postgres, Qdrant, Redis, SQLite, Weaviate |
-| File storage                            | Disk, [Azure Blobs](https://learn.microsoft.com/azure/storage/blobs/storage-blobs-introduction), [AWS S3](https://aws.amazon.com/s3), [MongoDB Atlas](https://www.mongodb.com/atlas/database), In memory (volatile)                                                                                                                                                                                                                                                                                  | -                                                                                                            |
-| RAG                                     | Yes, with sources lookup                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | -                                                                                                            |
-| Summarization                           | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | -                                                                                                            |
-| OCR                                     | Yes via [Azure Document Intelligence](https://azure.microsoft.com/products/ai-services/ai-document-intelligence)                                                                                                                                                                                                                                                                                                                                                                                     | -                                                                                                            |
-| Security Filters                        | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | -                                                                                                            |
-| Large document ingestion                | Yes, including async processing using queues ([Azure Queues](https://learn.microsoft.com/azure/storage/queues/storage-queues-introduction), [RabbitMQ](https://www.rabbitmq.com), File based or In memory queues)                                                                                                                                                                                                                                                                                    | -                                                                                                            |
-| Document storage                        | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | -                                                                                                            |
-| Custom storage schema                   | some DBs                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | -                                                                                                            |
-| Vector DBs with internal embedding      | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | -                                                                                                            |
-| Concurrent write to multiple vector DBs | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | -                                                                                                            |
-| LLMs                                    | [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/concepts/models), [OpenAI](https://platform.openai.com/docs/models), [Anthropic](https://www.anthropic.com), [LLamaSharp](https://github.com/SciSharp/LLamaSharp) via [llama.cpp](https://github.com/ggerganov/llama.cpp), [LM Studio](https://lmstudio.ai/), Semantic Kernel connectors                                                                                                                                         | Azure OpenAI, OpenAI, Gemini, Hugging Face, ONNX, custom ones, etc.                                          |
-| LLMs with dedicated tokenization        | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | No                                                                                                           |
-| Cloud deployment                        | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | -                                                                                                            |
-| Web service with OpenAPI                | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | -                                                                                                            |
+| File storage                            | Disk, [Azure Blobs](https://learn.microsoft.com/azure/storage/blobs/storage-blobs-introduction), [AWS S3](https://aws.amazon.com/s3), [MongoDB Atlas](https://www.mongodb.com/atlas/database), In memory (volatile)                                                                                                                                                                                                                                                                                 | -                                                                                                            |
+| RAG                                     | Yes, with sources lookup                                                                                                                                                                                                                                                                                                                                                                                                                                                                            | -                                                                                                            |
+| Summarization                           | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | -                                                                                                            |
+| OCR                                     | Yes via [Azure Document Intelligence](https://azure.microsoft.com/products/ai-services/ai-document-intelligence)                                                                                                                                                                                                                                                                                                                                                                                    | -                                                                                                            |
+| Security Filters                        | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | -                                                                                                            |
+| Large document ingestion                | Yes, including async processing using queues ([Azure Queues](https://learn.microsoft.com/azure/storage/queues/storage-queues-introduction), [RabbitMQ](https://www.rabbitmq.com), File based or In memory queues)                                                                                                                                                                                                                                                                                   | -                                                                                                            |
+| Document storage                        | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | -                                                                                                            |
+| Custom storage schema                   | some DBs                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            | -                                                                                                            |
+| Vector DBs with internal embedding      | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | -                                                                                                            |
+| Concurrent write to multiple vector DBs | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | -                                                                                                            |
+| LLMs                                    | [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/concepts/models), [OpenAI](https://platform.openai.com/docs/models), [Anthropic](https://www.anthropic.com), [Ollama](https://ollama.com), [LLamaSharp](https://github.com/SciSharp/LLamaSharp), [LM Studio](https://lmstudio.ai), Semantic Kernel connectors                                                                                                           | Azure OpenAI, OpenAI, Gemini, Hugging Face, ONNX, custom ones, etc.                                          |
+| LLMs with dedicated tokenization        | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | No                                                                                                           |
+| Cloud deployment                        | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | -                                                                                                            |
+| Web service with OpenAPI                | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | -                                                                                                            |
 
 ## Quick test using the Docker image
 
@@ -303,30 +303,35 @@ running the service locally with OpenAPI enabled.
 2. [Using Kernel Memory web service to upload documents and answer questions](examples/001-dotnet-WebClient)
 3. [Importing files and asking question without running the service (serverless mode)](examples/002-dotnet-Serverless)
 4. [Using KM Plugin for Semantic Kernel](examples/003-dotnet-SemanticKernel-plugin)
-5. [Processing files with custom logic (custom handlers) in serverless mode](examples/004-dotnet-serverless-custom-pipeline)
-6. [Processing files with custom logic (custom handlers) in asynchronous mode](examples/005-dotnet-AsyncMemoryCustomPipeline)
-7. [Upload files and ask questions from command line using curl](examples/006-curl-calling-webservice)
-8. [Customizing RAG and summarization prompts](examples/101-dotnet-custom-Prompts)
-9. [Custom partitioning/text chunking options](examples/102-dotnet-custom-partitioning-options)
-10. [Using a custom embedding/vector generator](examples/103-dotnet-custom-EmbeddingGenerator)
-11. [Using custom LLMs](examples/104-dotnet-custom-LLM)
-12. [Using LLama](examples/105-dotnet-serverless-llamasharp)
-13. [Summarizing documents, using synthetic memories](examples/106-dotnet-retrieve-synthetics)
-14. [Using Semantic Kernel LLM connectors](examples/107-dotnet-SemanticKernel-TextCompletion)
-15. [Using custom content decoders](examples/108-dotnet-custom-content-decoders)
-16. [Using a custom web scraper to fetch web pages](examples/109-dotnet-custom-webscraper)
-17. [Generating answers with Anthropic LLMs](examples/110-dotnet-anthropic)
-18. [Hybrid Search with Azure AI Search](examples/111-dotnet-azure-ai-hybrid-search)
-19. [Writing and using a custom ingestion handler](examples/201-dotnet-serverless-custom-handler)
-20. [Running a single asynchronous pipeline handler as a standalone service](examples/202-dotnet-custom-handler-as-a-service)
-21. [Test project using KM package from nuget.org](examples/203-dotnet-using-core-nuget)
-22. [Integrating Memory with ASP.NET applications and controllers](examples/204-dotnet-ASP.NET-MVC-integration)
-23. [Sample code showing how to extract text from files](examples/205-dotnet-extract-text-from-docs)
-24. [.NET configuration and logging](examples/206-dotnet-configuration-and-logging)
-25. [Expanding chunks retrieving adjacent partitions](examples/207-dotnet-expanding-chunks-on-retrieval)
-26. [Using local models via LM Studio](examples/208-dotnet-lmstudio)
-27. [Using Context Parameters to customize RAG prompt during a request](examples/209-dotnet-using-context-overrides)
-28. [Creating a Memory instance without KernelMemoryBuilder](examples/210-KM-without-builder)
+5. Customizations
+   * [Processing files with custom logic (custom handlers) in serverless mode](examples/004-dotnet-serverless-custom-pipeline)
+   * [Processing files with custom logic (custom handlers) in asynchronous mode](examples/005-dotnet-AsyncMemoryCustomPipeline)
+   * [Customizing RAG and summarization prompts](examples/101-dotnet-custom-Prompts)
+   * [Custom partitioning/text chunking options](examples/102-dotnet-custom-partitioning-options)
+   * [Using a custom embedding/vector generator](examples/103-dotnet-custom-EmbeddingGenerator)
+   * [Using custom content decoders](examples/108-dotnet-custom-content-decoders)
+   * [Using a custom web scraper to fetch web pages](examples/109-dotnet-custom-webscraper)
+   * [Writing and using a custom ingestion handler](examples/201-dotnet-serverless-custom-handler)
+   * [Using Context Parameters to customize RAG prompt during a request](examples/209-dotnet-using-context-overrides)
+6. Local models and external connectors
+   * [Using custom LLMs](examples/104-dotnet-custom-LLM)
+   * [Using local LLMs with Ollama](212-dotnet-ollama) 
+   * [Using local LLMs with llama.cpp via LlamaSharp](examples/105-dotnet-serverless-llamasharp)
+   * [Using local models with LM Studio](examples/208-dotnet-lmstudio)
+   * [Using Semantic Kernel LLM connectors](examples/107-dotnet-SemanticKernel-TextCompletion)
+   * [Generating answers with Anthropic LLMs](examples/110-dotnet-anthropic)
+7. [Upload files and ask questions from command line using curl](examples/006-curl-calling-webservice) 
+8. [Summarizing documents, using synthetic memories](examples/106-dotnet-retrieve-synthetics)
+9. [Hybrid Search with Azure AI Search](examples/111-dotnet-azure-ai-hybrid-search)
+10. [Running a single asynchronous pipeline handler as a standalone service](examples/202-dotnet-custom-handler-as-a-service)
+11. [Integrating Memory with ASP.NET applications and controllers](examples/204-dotnet-ASP.NET-MVC-integration)
+12. [Sample code showing how to extract text from files](examples/205-dotnet-extract-text-from-docs)
+13. [.NET configuration and logging](examples/206-dotnet-configuration-and-logging)
+14. [Expanding chunks retrieving adjacent partitions](examples/207-dotnet-expanding-chunks-on-retrieval)
+15. [Creating a Memory instance without KernelMemoryBuilder](examples/210-KM-without-builder)
+16. [Intent Detection](examples/211-dotnet-WebClient-Intent-Detection)
+17. [Fetching data from Discord](examples/301-discord-test-application) 
+18. [Test project using KM package from nuget.org](examples/203-dotnet-using-core-nuget)
 
 ## Tools
 
diff --git a/docs/index.md b/docs/index.md
index c1f2be10b..7bc5dc6f8 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -112,7 +112,7 @@ Here's comparison table:
 | Custom storage schema                   | some DBs                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | -                                                                                                            |
 | Vector DBs with internal embedding      | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | -                                                                                                            |
 | Concurrent write to multiple vector DBs | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | -                                                                                                            |
-| LLMs                                    | [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/concepts/models), [OpenAI](https://platform.openai.com/docs/models), [Anthropic](https://www.anthropic.com), [LLamaSharp](https://github.com/SciSharp/LLamaSharp) via [llama.cpp](https://github.com/ggerganov/llama.cpp), [LM Studio](https://lmstudio.ai/), Semantic Kernel connectors                                                                                                                                         | Azure OpenAI, OpenAI, Gemini, Hugging Face, ONNX, custom ones, etc.                                          |
+| LLMs                                    | [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/concepts/models), [OpenAI](https://platform.openai.com/docs/models), [Anthropic](https://www.anthropic.com), [Ollama](https://ollama.com), [LLamaSharp](https://github.com/SciSharp/LLamaSharp), [LM Studio](https://lmstudio.ai), Semantic Kernel connectors                                                                                                                                         | Azure OpenAI, OpenAI, Gemini, Hugging Face, ONNX, custom ones, etc.                                          |
 | LLMs with dedicated tokenization        | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | No                                                                                                           |
 | Cloud deployment                        | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | -                                                                                                            |
 | Web service with OpenAPI                | Yes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | -                                                                                                            |
diff --git a/docs/quickstart.md b/docs/quickstart.md
index d96ac1e79..776f0026c 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -18,8 +18,8 @@ we will set up the service and demonstrate how to use the Memory API from Python
 * [.NET 6](https://dotnet.microsoft.com/download) or higher
 * Either an [OpenAI API Key](https://platform.openai.com/api-keys) or
   [Azure OpenAI deployment](https://azure.microsoft.com/products/ai-services/openai-service). If you are familiar
-  with llama.cpp or LLamaSharp you can also use a LLama model. However, this may result in slower AI code execution,
-  depending on your device.
+  with [Ollama](https://ollama.com) you can also use a local model such as [Microsoft phi3](https://azure.microsoft.com/products/phi-3) and [Meta LLama](https://llama.meta.com).
+  However, this may result in slower AI code execution, depending on your device.
 * A vector database, such as Azure AI Search, Qdrant, or Postgres+pgvector. For basic tests, you can use KM
   SimpleVectorDb.
 * A copy of the [KM repository](https://github.com/microsoft/kernel-memory).
diff --git a/examples/001-dotnet-WebClient/Program.cs b/examples/001-dotnet-WebClient/Program.cs
index 437dee4e4..3ad5f8ad7 100644
--- a/examples/001-dotnet-WebClient/Program.cs
+++ b/examples/001-dotnet-WebClient/Program.cs
@@ -253,7 +253,7 @@ private static async Task AskSimpleQuestion()
         var question = "What's E = m*c^2?";
         Console.WriteLine($"Question: {question}");
 
-        var answer = await s_memory.AskAsync(question, minRelevance: 0.76);
+        var answer = await s_memory.AskAsync(question, minRelevance: 0.66);
         Console.WriteLine($"\nAnswer: {answer.Result}");
 
         Console.WriteLine("\n====================================\n");
@@ -278,7 +278,7 @@ private static async Task AskSimpleQuestionAndShowSources()
         var question = "What's Kernel Memory?";
         Console.WriteLine($"Question: {question}");
 
-        var answer = await s_memory.AskAsync(question, minRelevance: 0);
+        var answer = await s_memory.AskAsync(question, minRelevance: 0.5);
         Console.WriteLine($"\nAnswer: {answer.Result}\n\n  Sources:\n");
 
         // Show sources / citations
diff --git a/examples/210-KM-without-builder/Program.cs b/examples/210-KM-without-builder/Program.cs
index 6c1439617..b68a06f44 100644
--- a/examples/210-KM-without-builder/Program.cs
+++ b/examples/210-KM-without-builder/Program.cs
@@ -73,7 +73,7 @@ public static async Task Main()
         var promptProvider = new EmbeddedPromptProvider();
 
         // AI dependencies
-        var tokenizer = new GPT4Tokenizer();
+        var tokenizer = new GPT4oTokenizer();
         var embeddingGeneratorHttpClient = new HttpClient();
         var embeddingGenerator = new AzureOpenAITextEmbeddingGenerator(azureOpenAIEmbeddingConfig, tokenizer, loggerFactory, embeddingGeneratorHttpClient);
         var textGeneratorHttpClient = new HttpClient();
diff --git a/examples/212-dotnet-ollama/212-dotnet-ollama.csproj b/examples/212-dotnet-ollama/212-dotnet-ollama.csproj
new file mode 100644
index 000000000..d90b61733
--- /dev/null
+++ b/examples/212-dotnet-ollama/212-dotnet-ollama.csproj
@@ -0,0 +1,12 @@
+﻿<Project Sdk="Microsoft.NET.Sdk.Web">
+
+    <PropertyGroup>
+        <TargetFramework>net8.0</TargetFramework>
+        <ImplicitUsings>enable</ImplicitUsings>
+    </PropertyGroup>
+
+    <ItemGroup>
+        <ProjectReference Include="..\..\service\Core\Core.csproj" />
+    </ItemGroup>
+
+</Project>
diff --git a/examples/212-dotnet-ollama/Program.cs b/examples/212-dotnet-ollama/Program.cs
new file mode 100644
index 000000000..271ef9eb1
--- /dev/null
+++ b/examples/212-dotnet-ollama/Program.cs
@@ -0,0 +1,67 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using Microsoft.KernelMemory;
+using Microsoft.KernelMemory.AI.Ollama;
+using Microsoft.KernelMemory.AI.OpenAI;
+using Microsoft.KernelMemory.Diagnostics;
+
+/* This example shows how to use KM with Ollama
+ *
+ * 1. Install and launch Ollama. You should see an icon for the app running the background.
+ *
+ * 2. Download your preferred models, e.g.
+ *      - ollama pull nomic-embed-text
+ *      - ollama pull phi3:medium-128k
+ *
+ * 3. Run the code below
+ *
+ * 4. Other things
+ *      Run "ollama show phi3:medium-128k" to see model's properties
+ *      Run "ollama list" to see the list of models you have on your system
+ *      Run "ollama serve" if you prefer running Ollama from the command line
+ */
+public static class Program
+{
+    public static async Task Main()
+    {
+        var logLevel = LogLevel.Warning;
+        SensitiveDataLogger.Enabled = false;
+
+        var config = new OllamaConfig
+        {
+            Endpoint = "http://localhost:11434",
+            TextModel = new OllamaModelConfig("phi3:medium-128k", 131072),
+            EmbeddingModel = new OllamaModelConfig("nomic-embed-text", 2048)
+        };
+
+        var memory = new KernelMemoryBuilder()
+            .WithOllamaTextGeneration(config, new GPT4oTokenizer())
+            .WithOllamaTextEmbeddingGeneration(config, new GPT4oTokenizer())
+            .Configure(builder => builder.Services.AddLogging(l =>
+            {
+                l.SetMinimumLevel(logLevel);
+                l.AddSimpleConsole(c => c.SingleLine = true);
+            }))
+            .Build();
+
+        // Import some text
+        await memory.ImportTextAsync("Today is October 32nd, 2476");
+
+        // Generate an answer - This uses OpenAI for embeddings and finding relevant data, and LM Studio to generate an answer
+        var answer = await memory.AskAsync("What's the current date (don't check for validity)?");
+        Console.WriteLine(answer.Question);
+        Console.WriteLine(answer.Result);
+
+        /*
+
+        -- Output using phi3:medium-128k:
+
+        What's the current date (don't check for validity)?
+        The given fact states that "Today is October 32nd, 2476." However, it appears to be an incorrect statement as
+        there are never more than 31 days in any month. If we consider this date without checking its validity and accept
+        the stated day of October as being 32, then the current date would be "October 32nd, 2476." However, it is important
+        to note that this date does not align with our calendar system.
+
+        */
+    }
+}
diff --git a/examples/README.md b/examples/README.md
index c365608bd..d9874ca30 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -6,29 +6,32 @@ Some examples about how to use Kernel Memory.
 2. [Using Kernel Memory web service to upload documents and answer questions](001-dotnet-WebClient)
 3. [Importing files and asking question without running the service (serverless mode)](002-dotnet-Serverless)
 4. [Using KM Plugin for Semantic Kernel](003-dotnet-SemanticKernel-plugin)
-5. [Processing files with custom logic (custom handlers) in serverless mode](004-dotnet-serverless-custom-pipeline)
-6. [Processing files with custom logic (custom handlers) in asynchronous mode](005-dotnet-AsyncMemoryCustomPipeline)
+5. Customizations
+    * [Processing files with custom logic (custom handlers) in serverless mode](004-dotnet-serverless-custom-pipeline)
+    * [Processing files with custom logic (custom handlers) in asynchronous mode](005-dotnet-AsyncMemoryCustomPipeline)
+    * [Customizing RAG and summarization prompts](101-dotnet-custom-Prompts)
+    * [Custom partitioning/text chunking options](102-dotnet-custom-partitioning-options)
+    * [Using a custom embedding/vector generator](103-dotnet-custom-EmbeddingGenerator)
+    * [Using custom content decoders](108-dotnet-custom-content-decoders)
+    * [Using a custom web scraper to fetch web pages](109-dotnet-custom-webscraper)
+    * [Writing and using a custom ingestion handler](201-dotnet-serverless-custom-handler)
+    * [Using Context Parameters to customize RAG prompt during a request](209-dotnet-using-context-overrides)
+6. Local models and external connectors
+    * [Using custom LLMs](104-dotnet-custom-LLM)
+    * [Using local LLMs with Ollama](212-dotnet-ollama)
+    * [Using local LLMs with llama.cpp via LlamaSharp](105-dotnet-serverless-llamasharp)
+    * [Using local models with LM Studio](208-dotnet-lmstudio)
+    * [Using Semantic Kernel LLM connectors](107-dotnet-SemanticKernel-TextCompletion)
+    * [Generating answers with Anthropic LLMs](110-dotnet-anthropic)
 7. [Upload files and ask questions from command line using curl](006-curl-calling-webservice)
-8. [Customizing RAG and summarization prompts](101-dotnet-custom-Prompts)
-9. [Custom partitioning/text chunking options](102-dotnet-custom-partitioning-options)
-10. [Using a custom embedding/vector generator](103-dotnet-custom-EmbeddingGenerator)
-11. [Using custom LLMs](104-dotnet-custom-LLM)
-12. [Using LLama](105-dotnet-serverless-llamasharp)
-13. [Summarizing documents, using synthetic memories](106-dotnet-retrieve-synthetics)
-14. [Using Semantic Kernel LLM connectors](107-dotnet-SemanticKernel-TextCompletion)
-15. [Using custom content decoders](108-dotnet-custom-content-decoders)
-16. [Using a custom web scraper to fetch web pages](109-dotnet-custom-webscraper)
-17. [Generating answers with Anthropic LLMs](110-dotnet-anthropic)
-18. [Hybrid Search with Azure AI Search](111-dotnet-azure-ai-hybrid-search)
-19. [Writing and using a custom ingestion handler](201-dotnet-serverless-custom-handler)
-20. [Running a single asynchronous pipeline handler as a standalone service](202-dotnet-custom-handler-as-a-service)
-21. [Test project using KM package from nuget.org](203-dotnet-using-core-nuget)
-22. [Integrating Memory with ASP.NET applications and controllers](204-dotnet-ASP.NET-MVC-integration)
-23. [Sample code showing how to extract text from files](205-dotnet-extract-text-from-docs)
-24. [.NET configuration and logging](206-dotnet-configuration-and-logging)
-25. [Expanding chunks retrieving adjacent partitions](207-dotnet-expanding-chunks-on-retrieval)
-26. [Using local models via LM Studio](208-dotnet-lmstudio)
-27. [Using Context Parameters to customize RAG prompt during a request](209-dotnet-using-context-overrides)
-28. [Creating a Memory instance without KernelMemoryBuilder](210-KM-without-builder)
-29. [Intent Detection](211-dotnet-WebClient-Intent-Detection)
-30. [Fetching data from Discord](301-discord-test-application)
+8. [Summarizing documents, using synthetic memories](106-dotnet-retrieve-synthetics)
+9. [Hybrid Search with Azure AI Search](111-dotnet-azure-ai-hybrid-search)
+10. [Running a single asynchronous pipeline handler as a standalone service](202-dotnet-custom-handler-as-a-service)
+11. [Integrating Memory with ASP.NET applications and controllers](204-dotnet-ASP.NET-MVC-integration)
+12. [Sample code showing how to extract text from files](205-dotnet-extract-text-from-docs)
+13. [.NET configuration and logging](206-dotnet-configuration-and-logging)
+14. [Expanding chunks retrieving adjacent partitions](207-dotnet-expanding-chunks-on-retrieval)
+15. [Creating a Memory instance without KernelMemoryBuilder](210-KM-without-builder)
+16. [Intent Detection](211-dotnet-WebClient-Intent-Detection)
+17. [Fetching data from Discord](301-discord-test-application)
+18. [Test project using KM package from nuget.org](203-dotnet-using-core-nuget)
diff --git a/extensions/Anthropic/AnthropicTextGeneration.cs b/extensions/Anthropic/AnthropicTextGeneration.cs
index c257d2aec..4d8597b95 100644
--- a/extensions/Anthropic/AnthropicTextGeneration.cs
+++ b/extensions/Anthropic/AnthropicTextGeneration.cs
@@ -68,8 +68,8 @@ public AnthropicTextGeneration(
         {
             this._log.LogWarning(
                 "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
-                nameof(GPT4Tokenizer));
-            textTokenizer = new GPT4Tokenizer();
+                nameof(GPT4oTokenizer));
+            textTokenizer = new GPT4oTokenizer();
         }
 
         this._textTokenizer = textTokenizer;
diff --git a/extensions/AzureOpenAI/AzureOpenAITextEmbeddingGenerator.cs b/extensions/AzureOpenAI/AzureOpenAITextEmbeddingGenerator.cs
index 4e1ca1e5c..b57fe2ac6 100644
--- a/extensions/AzureOpenAI/AzureOpenAITextEmbeddingGenerator.cs
+++ b/extensions/AzureOpenAI/AzureOpenAITextEmbeddingGenerator.cs
@@ -36,8 +36,8 @@ public AzureOpenAITextEmbeddingGenerator(
         {
             this._log.LogWarning(
                 "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
-                nameof(GPT4Tokenizer));
-            textTokenizer = new GPT4Tokenizer();
+                nameof(GPT4oTokenizer));
+            textTokenizer = new GPT4oTokenizer();
         }
 
         this._textTokenizer = textTokenizer;
diff --git a/extensions/AzureOpenAI/AzureOpenAITextGenerator.cs b/extensions/AzureOpenAI/AzureOpenAITextGenerator.cs
index bd0a1b529..313b46e3d 100644
--- a/extensions/AzureOpenAI/AzureOpenAITextGenerator.cs
+++ b/extensions/AzureOpenAI/AzureOpenAITextGenerator.cs
@@ -39,8 +39,8 @@ public AzureOpenAITextGenerator(
         {
             this._log.LogWarning(
                 "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
-                nameof(GPT4Tokenizer));
-            textTokenizer = new GPT4Tokenizer();
+                nameof(GPT4oTokenizer));
+            textTokenizer = new GPT4oTokenizer();
         }
 
         this._textTokenizer = textTokenizer;
diff --git a/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs b/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs
index b906ebc06..fcb4fa3d3 100644
--- a/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs
+++ b/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs
@@ -47,8 +47,8 @@ public LlamaSharpTextGenerator(
         {
             this._log.LogWarning(
                 "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
-                nameof(GPT4Tokenizer));
-            textTokenizer = new GPT4Tokenizer();
+                nameof(GPT4oTokenizer));
+            textTokenizer = new GPT4oTokenizer();
         }
 
         this._textTokenizer = textTokenizer;
diff --git a/extensions/Ollama/Ollama/DependencyInjection.cs b/extensions/Ollama/Ollama/DependencyInjection.cs
new file mode 100644
index 000000000..618fc3497
--- /dev/null
+++ b/extensions/Ollama/Ollama/DependencyInjection.cs
@@ -0,0 +1,120 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Logging;
+using Microsoft.KernelMemory.AI;
+using Microsoft.KernelMemory.AI.Ollama;
+using OllamaSharp;
+
+#pragma warning disable IDE0130 // reduce number of "using" statements
+// ReSharper disable once CheckNamespace - reduce number of "using" statements
+namespace Microsoft.KernelMemory;
+
+/// <summary>
+/// Kernel Memory builder extensions
+/// </summary>
+public static partial class KernelMemoryBuilderExtensions
+{
+    public static IKernelMemoryBuilder WithOllamaTextGeneration(
+        this IKernelMemoryBuilder builder,
+        OllamaConfig config,
+        ITextTokenizer? textTokenizer = null)
+    {
+        builder.Services.AddOllamaTextGeneration(config, textTokenizer);
+        return builder;
+    }
+
+    public static IKernelMemoryBuilder WithOllamaTextGeneration(
+        this IKernelMemoryBuilder builder,
+        string modelName,
+        string endpoint = "http://localhost:11434",
+        ITextTokenizer? textTokenizer = null)
+    {
+        builder.Services.AddOllamaTextGeneration(modelName, endpoint, textTokenizer);
+        return builder;
+    }
+
+    public static IKernelMemoryBuilder WithOllamaTextEmbeddingGeneration(
+        this IKernelMemoryBuilder builder,
+        OllamaConfig config,
+        ITextTokenizer? textTokenizer = null)
+    {
+        builder.Services.AddOllamaTextEmbeddingGeneration(config, textTokenizer);
+        return builder;
+    }
+
+    public static IKernelMemoryBuilder WithOllamaTextEmbeddingGeneration(
+        this IKernelMemoryBuilder builder,
+        string modelName,
+        string endpoint = "http://localhost:11434",
+        ITextTokenizer? textTokenizer = null)
+    {
+        builder.Services.AddOllamaTextEmbeddingGeneration(modelName, endpoint, textTokenizer);
+        return builder;
+    }
+}
+
+/// <summary>
+/// .NET IServiceCollection dependency injection extensions.
+/// </summary>
+public static partial class DependencyInjection
+{
+    public static IServiceCollection AddOllamaTextGeneration(
+        this IServiceCollection services,
+        string modelName,
+        string endpoint = "http://localhost:11434",
+        ITextTokenizer? textTokenizer = null)
+    {
+        return services
+            .AddSingleton<ITextGenerator>(
+                serviceProvider => new OllamaTextGenerator(
+                    new OllamaApiClient(new Uri(endpoint), modelName),
+                    new OllamaModelConfig { ModelName = modelName },
+                    textTokenizer,
+                    serviceProvider.GetService<ILoggerFactory>()));
+    }
+
+    public static IServiceCollection AddOllamaTextGeneration(
+        this IServiceCollection services,
+        OllamaConfig config,
+        ITextTokenizer? textTokenizer = null)
+    {
+        return services
+            .AddSingleton<ITextGenerator>(
+                serviceProvider => new OllamaTextGenerator(
+                    new OllamaApiClient(new Uri(config.Endpoint), config.TextModel.ModelName),
+                    config.TextModel,
+                    textTokenizer,
+                    serviceProvider.GetService<ILoggerFactory>()));
+    }
+
+    public static IServiceCollection AddOllamaTextEmbeddingGeneration(
+        this IServiceCollection services,
+        string modelName,
+        string endpoint = "http://localhost:11434",
+        ITextTokenizer? textTokenizer = null)
+    {
+        return services
+            .AddSingleton<ITextEmbeddingGenerator>(
+                serviceProvider => new OllamaTextEmbeddingGenerator(
+                    new OllamaApiClient(new Uri(endpoint), modelName),
+                    new OllamaModelConfig { ModelName = modelName },
+                    textTokenizer,
+                    serviceProvider.GetService<ILoggerFactory>()));
+    }
+
+    public static IServiceCollection AddOllamaTextEmbeddingGeneration(
+        this IServiceCollection services,
+        OllamaConfig config,
+        ITextTokenizer? textTokenizer = null)
+    {
+        return services
+            .AddSingleton<ITextEmbeddingGenerator>(
+                serviceProvider => new OllamaTextEmbeddingGenerator(
+                    new OllamaApiClient(new Uri(config.Endpoint), config.EmbeddingModel.ModelName),
+                    config.EmbeddingModel,
+                    textTokenizer,
+                    serviceProvider.GetService<ILoggerFactory>()));
+    }
+}
diff --git a/extensions/Ollama/Ollama/Ollama.csproj b/extensions/Ollama/Ollama/Ollama.csproj
new file mode 100644
index 000000000..e26c65d47
--- /dev/null
+++ b/extensions/Ollama/Ollama/Ollama.csproj
@@ -0,0 +1,33 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+    <PropertyGroup>
+        <TargetFramework>net8.0</TargetFramework>
+        <RollForward>LatestMajor</RollForward>
+        <AssemblyName>Microsoft.KernelMemory.AI.Ollama</AssemblyName>
+        <RootNamespace>Microsoft.KernelMemory.AI.Ollama</RootNamespace>
+        <NoWarn>$(NoWarn);KMEXP00;KMEXP01;CA1724;</NoWarn>
+    </PropertyGroup>
+
+    <PropertyGroup>
+        <IsPackable>true</IsPackable>
+        <PackageId>Microsoft.KernelMemory.AI.Ollama</PackageId>
+        <Product>Ollama LLM connector for Kernel Memory</Product>
+        <Description>Provide access to Ollama LLM models in Kernel Memory to generate embeddings and text</Description>
+        <PackageTags>Ollama, Memory, RAG, Kernel Memory, Semantic Memory, Episodic Memory, Declarative Memory, AI, Artificial Intelligence, Embeddings, Vector DB, Vector Search, Semantic Search, Memory DB, ETL</PackageTags>
+        <DocumentationFile>bin/$(Configuration)/$(TargetFramework)/$(AssemblyName).xml</DocumentationFile>
+    </PropertyGroup>
+
+    <ItemGroup>
+        <None Include="..\README.md" Link="README.md" Pack="true" PackagePath="." Visible="false" />
+    </ItemGroup>
+
+    <ItemGroup>
+        <PackageReference Include="OllamaSharp" />
+    </ItemGroup>
+
+    <ItemGroup>
+        <ProjectReference Include="..\..\..\service\Abstractions\Abstractions.csproj" />
+        <ProjectReference Include="..\..\OpenAI\OpenAI\OpenAI.csproj" />
+    </ItemGroup>
+
+</Project>
diff --git a/extensions/Ollama/Ollama/OllamaConfig.cs b/extensions/Ollama/Ollama/OllamaConfig.cs
new file mode 100644
index 000000000..6afa24f0e
--- /dev/null
+++ b/extensions/Ollama/Ollama/OllamaConfig.cs
@@ -0,0 +1,21 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+namespace Microsoft.KernelMemory.AI.Ollama;
+
+public class OllamaConfig
+{
+    /// <summary>
+    /// Ollama HTTP endpoint.
+    /// </summary>
+    public string Endpoint { get; set; } = "http://localhost:11434";
+
+    /// <summary>
+    /// Settings for the model used for text generation. Chat models can be used too.
+    /// </summary>
+    public OllamaModelConfig TextModel { get; set; } = new OllamaModelConfig();
+
+    /// <summary>
+    /// Settings for the model used for text embedding generation.
+    /// </summary>
+    public OllamaModelConfig EmbeddingModel { get; set; } = new OllamaModelConfig();
+}
diff --git a/extensions/Ollama/Ollama/OllamaModelConfig.cs b/extensions/Ollama/Ollama/OllamaModelConfig.cs
new file mode 100644
index 000000000..c1c3af561
--- /dev/null
+++ b/extensions/Ollama/Ollama/OllamaModelConfig.cs
@@ -0,0 +1,125 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+namespace Microsoft.KernelMemory.AI.Ollama;
+
+public class OllamaModelConfig
+{
+    /// <summary>
+    /// Model used for text generation. Chat models can be used too.
+    /// </summary>
+    public string ModelName { get; set; } = string.Empty;
+
+    /// <summary>
+    /// The max number of tokens supported by the model.
+    /// Default to 4096 for text and 8192 for embeddings.
+    /// </summary>
+    public int? MaxTokenTotal { get; set; }
+
+    /// <summary>
+    /// Enable Mirostat sampling for controlling perplexity.
+    /// (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
+    /// </summary>
+    public int? MiroStat { get; set; }
+
+    /// <summary>
+    /// Influences how quickly the algorithm responds to feedback from the
+    /// generated text. A lower learning rate will result in slower adjustments,
+    /// while a higher learning rate will make the algorithm more responsive.
+    /// (Default: 0.1)
+    /// </summary>
+    public float? MiroStatEta { get; set; }
+
+    /// <summary>
+    /// Controls the balance between coherence and diversity of the output.
+    /// A lower value will result in more focused and coherent text.
+    /// (Default: 5.0)
+    /// </summary>
+    public float? MiroStatTau { get; set; }
+
+    /// <summary>
+    /// Sets the size of the context window used to generate the next token.
+    /// (Default: 2048)
+    /// </summary>
+    public int? NumCtx { get; set; }
+
+    /// <summary>
+    /// The number of GQA groups in the transformer layer. Required for some
+    /// models, for example it is 8 for llama2:70b
+    /// </summary>
+    public int? NumGqa { get; set; }
+
+    /// <summary>
+    /// The number of layers to send to the GPU(s). On macOS it defaults to
+    /// 1 to enable metal support, 0 to disable.
+    /// </summary>
+    public int? NumGpu { get; set; }
+
+    /// <summary>
+    /// Sets the number of threads to use during computation. By default,
+    /// Ollama will detect this for optimal performance.
+    /// It is recommended to set this value to the number of physical CPU cores
+    /// your system has (as opposed to the logical number of cores).
+    /// </summary>
+    public int? NumThread { get; set; }
+
+    /// <summary>
+    /// Sets how far back for the model to look back to prevent repetition.
+    /// (Default: 64, 0 = disabled, -1 = num_ctx)
+    /// </summary>
+    public int? RepeatLastN { get; set; }
+
+    /// <summary>
+    /// Sets the random number seed to use for generation.
+    /// Setting this to a specific number will make the model generate the same
+    /// text for the same prompt. (Default: 0)
+    /// </summary>
+    public int? Seed { get; set; }
+
+    /// <summary>
+    /// Tail free sampling is used to reduce the impact of less probable
+    /// tokens from the output. A higher value (e.g., 2.0) will reduce the
+    /// impact more, while a value of 1.0 disables this setting. (default: 1)
+    /// </summary>
+    public float? TfsZ { get; set; }
+
+    /// <summary>
+    /// Maximum number of tokens to predict when generating text.
+    /// (Default: 128, -1 = infinite generation, -2 = fill context)
+    /// </summary>
+    public int? NumPredict { get; set; }
+
+    /// <summary>
+    /// Reduces the probability of generating nonsense. A higher value
+    /// (e.g. 100) will give more diverse answers, while a lower value (e.g. 10)
+    /// will be more conservative. (Default: 40)
+    /// </summary>
+    public int? TopK { get; set; }
+
+    /// <summary>
+    /// Alternative to the top_p, and aims to ensure a balance of quality and variety.min_p represents the minimum
+    /// probability for a token to be considered, relative to the probability of the most likely token.For
+    /// example, with min_p=0.05 and the most likely token having a probability of 0.9, logits with a value less
+    /// than 0.05*0.9=0.045 are filtered out. (Default: 0.0)
+    /// </summary>
+    public float? MinP { get; set; }
+
+    /// <summary>
+    /// How many requests can be processed in parallel
+    /// </summary>
+    public int MaxBatchSize { get; set; } = 1;
+
+    public OllamaModelConfig()
+    {
+    }
+
+    public OllamaModelConfig(string modelName)
+    {
+        this.ModelName = modelName;
+    }
+
+    public OllamaModelConfig(string modelName, int maxToken)
+    {
+        this.ModelName = modelName;
+        this.MaxTokenTotal = maxToken;
+    }
+}
diff --git a/extensions/Ollama/Ollama/OllamaTextEmbeddingGenerator.cs b/extensions/Ollama/Ollama/OllamaTextEmbeddingGenerator.cs
new file mode 100644
index 000000000..9719648bc
--- /dev/null
+++ b/extensions/Ollama/Ollama/OllamaTextEmbeddingGenerator.cs
@@ -0,0 +1,139 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Net.Http;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Extensions.Logging;
+using Microsoft.KernelMemory.AI.OpenAI;
+using Microsoft.KernelMemory.Diagnostics;
+using OllamaSharp;
+using OllamaSharp.Models;
+
+namespace Microsoft.KernelMemory.AI.Ollama;
+
+public class OllamaTextEmbeddingGenerator : ITextEmbeddingGenerator, ITextEmbeddingBatchGenerator
+{
+    private const int MaxTokensIfUndefined = 8192;
+
+    private readonly IOllamaApiClient _client;
+    private readonly OllamaModelConfig _modelConfig;
+    private readonly ILogger<OllamaTextEmbeddingGenerator> _log;
+    private readonly ITextTokenizer _textTokenizer;
+
+    public int MaxTokens { get; }
+
+    public int MaxBatchSize { get; }
+
+    public OllamaTextEmbeddingGenerator(
+        IOllamaApiClient ollamaClient,
+        OllamaModelConfig modelConfig,
+        ITextTokenizer? textTokenizer = null,
+        ILoggerFactory? loggerFactory = null)
+    {
+        this._client = ollamaClient;
+        this._modelConfig = modelConfig;
+        this.MaxBatchSize = modelConfig.MaxBatchSize;
+        this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger<OllamaTextEmbeddingGenerator>();
+
+        if (textTokenizer == null)
+        {
+            this._log.LogWarning(
+                "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
+                nameof(GPT4oTokenizer));
+            textTokenizer = new GPT4oTokenizer();
+        }
+
+        this._textTokenizer = textTokenizer;
+
+        this.MaxTokens = modelConfig.MaxTokenTotal ?? MaxTokensIfUndefined;
+    }
+
+    public OllamaTextEmbeddingGenerator(
+        OllamaConfig config,
+        ITextTokenizer? textTokenizer = null,
+        ILoggerFactory? loggerFactory = null)
+        : this(
+            new OllamaApiClient(new Uri(config.Endpoint), config.EmbeddingModel.ModelName),
+            config.EmbeddingModel,
+            textTokenizer,
+            loggerFactory)
+    {
+    }
+
+    public OllamaTextEmbeddingGenerator(
+        HttpClient httpClient,
+        OllamaConfig config,
+        ITextTokenizer? textTokenizer = null,
+        ILoggerFactory? loggerFactory = null)
+        : this(
+            new OllamaApiClient(httpClient, config.EmbeddingModel.ModelName),
+            config.EmbeddingModel,
+            textTokenizer,
+            loggerFactory)
+    {
+    }
+
+    public int CountTokens(string text)
+    {
+        return this._textTokenizer.CountTokens(text);
+    }
+
+    public IReadOnlyList<string> GetTokens(string text)
+    {
+        return this._textTokenizer.GetTokens(text);
+    }
+
+    public async Task<Embedding> GenerateEmbeddingAsync(
+        string text,
+        CancellationToken cancellationToken = default)
+    {
+        this._log.LogTrace("Generating embedding, text length {0} chars", text.Length);
+
+        Embedding[] result = await this.GenerateEmbeddingBatchAsync([text], cancellationToken).ConfigureAwait(false);
+        var embeddding = result.First();
+        this._log.LogTrace("Embedding ready, vector length {0}", embeddding.Length);
+
+        return embeddding;
+    }
+
+    public async Task<Embedding[]> GenerateEmbeddingBatchAsync(
+        IEnumerable<string> textList,
+        CancellationToken cancellationToken = default)
+    {
+        var list = textList.ToList();
+        this._log.LogTrace("Generating embeddings batch, size {0} texts", list.Count);
+
+        var request = new EmbedRequest
+        {
+            Model = this._client.SelectedModel,
+            Input = list,
+            Options = new RequestOptions
+            {
+                // Global settings
+                MiroStat = this._modelConfig.MiroStat,
+                MiroStatEta = this._modelConfig.MiroStatEta,
+                MiroStatTau = this._modelConfig.MiroStatTau,
+                NumCtx = this._modelConfig.NumCtx,
+                NumGqa = this._modelConfig.NumGqa,
+                NumGpu = this._modelConfig.NumGpu,
+                NumThread = this._modelConfig.NumThread,
+                RepeatLastN = this._modelConfig.RepeatLastN,
+                Seed = this._modelConfig.Seed,
+                TfsZ = this._modelConfig.TfsZ,
+                NumPredict = this._modelConfig.NumPredict,
+                TopK = this._modelConfig.TopK,
+                MinP = this._modelConfig.MinP,
+            }
+        };
+
+        EmbedResponse response = await this._client.Embed(request, cancellationToken).ConfigureAwait(false);
+        Embedding[] result = response.Embeddings.Select(Embedding.FromDoubles).ToArray();
+
+        this._log.LogTrace("Embeddings batch ready, size {0} texts", result.Length);
+
+        return result;
+    }
+}
diff --git a/extensions/Ollama/Ollama/OllamaTextGenerator.cs b/extensions/Ollama/Ollama/OllamaTextGenerator.cs
new file mode 100644
index 000000000..c5bf02eb7
--- /dev/null
+++ b/extensions/Ollama/Ollama/OllamaTextGenerator.cs
@@ -0,0 +1,140 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using System.Collections.Generic;
+using System.Net.Http;
+using System.Runtime.CompilerServices;
+using System.Threading;
+using Microsoft.Extensions.Logging;
+using Microsoft.KernelMemory.AI.OpenAI;
+using Microsoft.KernelMemory.Diagnostics;
+using OllamaSharp;
+using OllamaSharp.Models;
+
+namespace Microsoft.KernelMemory.AI.Ollama;
+
+public class OllamaTextGenerator : ITextGenerator
+{
+    private const int MaxTokensIfUndefined = 4096;
+
+    private readonly IOllamaApiClient _client;
+    private readonly OllamaModelConfig _modelConfig;
+    private readonly ILogger<OllamaTextGenerator> _log;
+    private readonly ITextTokenizer _textTokenizer;
+
+    public int MaxTokenTotal { get; }
+
+    public OllamaTextGenerator(
+        IOllamaApiClient ollamaClient,
+        OllamaModelConfig modelConfig,
+        ITextTokenizer? textTokenizer = null,
+        ILoggerFactory? loggerFactory = null)
+    {
+        this._client = ollamaClient;
+        this._modelConfig = modelConfig;
+        this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger<OllamaTextGenerator>();
+
+        if (textTokenizer == null)
+        {
+            this._log.LogWarning(
+                "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
+                nameof(GPT4oTokenizer));
+            textTokenizer = new GPT4oTokenizer();
+        }
+
+        this._textTokenizer = textTokenizer;
+
+        this.MaxTokenTotal = modelConfig.MaxTokenTotal ?? MaxTokensIfUndefined;
+    }
+
+    public OllamaTextGenerator(
+        OllamaConfig config,
+        ITextTokenizer? textTokenizer = null,
+        ILoggerFactory? loggerFactory = null)
+        : this(
+            new OllamaApiClient(new Uri(config.Endpoint), config.TextModel.ModelName),
+            config.TextModel,
+            textTokenizer,
+            loggerFactory)
+    {
+    }
+
+    public OllamaTextGenerator(
+        HttpClient httpClient,
+        OllamaConfig config,
+        ITextTokenizer? textTokenizer = null,
+        ILoggerFactory? loggerFactory = null)
+        : this(
+            new OllamaApiClient(httpClient, config.TextModel.ModelName),
+            config.TextModel,
+            textTokenizer,
+            loggerFactory)
+    {
+    }
+
+    public int CountTokens(string text)
+    {
+        return this._textTokenizer.CountTokens(text);
+    }
+
+    public IReadOnlyList<string> GetTokens(string text)
+    {
+        return this._textTokenizer.GetTokens(text);
+    }
+
+    public async IAsyncEnumerable<string> GenerateTextAsync(
+        string prompt,
+        TextGenerationOptions options,
+        [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        var request = new GenerateRequest
+        {
+            Model = this._client.SelectedModel,
+            Prompt = prompt,
+            Stream = true,
+            Options = new RequestOptions
+            {
+                // Use case specific
+                Temperature = (float)options.Temperature,
+                TopP = (float)options.NucleusSampling,
+                RepeatPenalty = (float)options.FrequencyPenalty,
+
+                // Global settings
+                MiroStat = this._modelConfig.MiroStat,
+                MiroStatEta = this._modelConfig.MiroStatEta,
+                MiroStatTau = this._modelConfig.MiroStatTau,
+                NumCtx = this._modelConfig.NumCtx,
+                NumGqa = this._modelConfig.NumGqa,
+                NumGpu = this._modelConfig.NumGpu,
+                NumThread = this._modelConfig.NumThread,
+                RepeatLastN = this._modelConfig.RepeatLastN,
+                Seed = this._modelConfig.Seed,
+                TfsZ = this._modelConfig.TfsZ,
+                NumPredict = this._modelConfig.NumPredict,
+                TopK = this._modelConfig.TopK,
+                MinP = this._modelConfig.MinP,
+            }
+        };
+
+        if (options.StopSequences is { Count: > 0 })
+        {
+            var stop = new List<string>();
+            foreach (var s in options.StopSequences) { stop.Add(s); }
+
+            request.Options.Stop = stop.ToArray();
+        }
+
+        // IAsyncEnumerable<GenerateResponseStream?> stream = this._client.Generate(request, cancellationToken);
+        // await foreach (GenerateResponseStream? token in stream)
+        // {
+        //     if (token != null) { yield return token.Response; }
+        // }
+
+        var chat = new Chat(this._client);
+        IAsyncEnumerable<string?> stream = chat.Send(prompt, cancellationToken);
+        await foreach (string? token in stream)
+        {
+            if (token != null) { yield return token; }
+        }
+    }
+}
diff --git a/extensions/Ollama/README.md b/extensions/Ollama/README.md
new file mode 100644
index 000000000..d678c047e
--- /dev/null
+++ b/extensions/Ollama/README.md
@@ -0,0 +1,29 @@
+# Kernel Memory with Ollama
+
+[![Nuget package](https://img.shields.io/nuget/v/Microsoft.KernelMemory.AI.Ollama)](https://www.nuget.org/packages/Microsoft.KernelMemory.AI.Ollama/)
+[![Discord](https://img.shields.io/discord/1063152441819942922?label=Discord&logo=discord&logoColor=white&color=d82679)](https://aka.ms/KMdiscord)
+
+This project contains the
+[Ollama](https://ollama.com)
+LLM connector to access to LLM models via Ollama service to generate text and
+text embeddings.
+
+Sample code:
+
+```csharp
+var config = new OllamaConfig
+{
+    Endpoint = "http://localhost:11434",
+    TextModel = new OllamaModelConfig("phi3:medium-128k", 131072),
+    EmbeddingModel = new OllamaModelConfig("nomic-embed-text", 2048)
+};
+
+var memory = new KernelMemoryBuilder()
+    .WithOllamaTextGeneration(config)
+    .WithOllamaTextEmbeddingGeneration(config)
+    .Build();
+
+await memory.ImportTextAsync("Today is October 32nd, 2476");
+
+var answer = await memory.AskAsync("What's the current date (don't check for validity)?");
+```
diff --git a/extensions/OpenAI/OpenAI.UnitTests/GPTTokenizersTests.cs b/extensions/OpenAI/OpenAI.UnitTests/GPTTokenizersTests.cs
index c1c01f0b5..0d93d1336 100644
--- a/extensions/OpenAI/OpenAI.UnitTests/GPTTokenizersTests.cs
+++ b/extensions/OpenAI/OpenAI.UnitTests/GPTTokenizersTests.cs
@@ -26,6 +26,10 @@ public void CanTokenize()
         var gpt4 = new GPT4Tokenizer();
         tokens = gpt4.GetTokens(helloWorld);
         Assert.Equal(["hello", " world"], tokens);
+
+        var gpt4o = new GPT4oTokenizer();
+        tokens = gpt4o.GetTokens(helloWorld);
+        Assert.Equal(["hello", " world"], tokens);
     }
 
     [Fact]
@@ -38,5 +42,6 @@ public void TheyCountTokens()
         Assert.Equal(29, new GPT2Tokenizer().CountTokens(text));
         Assert.Equal(29, new GPT3Tokenizer().CountTokens(text));
         Assert.Equal(21, new GPT4Tokenizer().CountTokens(text));
+        Assert.Equal(22, new GPT4oTokenizer().CountTokens(text));
     }
 }
diff --git a/extensions/OpenAI/OpenAI/OpenAITextEmbeddingGenerator.cs b/extensions/OpenAI/OpenAI/OpenAITextEmbeddingGenerator.cs
index 6cc1c3104..5872c8670 100644
--- a/extensions/OpenAI/OpenAI/OpenAITextEmbeddingGenerator.cs
+++ b/extensions/OpenAI/OpenAI/OpenAITextEmbeddingGenerator.cs
@@ -137,8 +137,8 @@ private OpenAITextEmbeddingGenerator(
         {
             this._log.LogWarning(
                 "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
-                nameof(GPT4Tokenizer));
-            textTokenizer = new GPT4Tokenizer();
+                nameof(GPT4oTokenizer));
+            textTokenizer = new GPT4oTokenizer();
         }
 
         this._textTokenizer = textTokenizer;
diff --git a/extensions/OpenAI/OpenAI/OpenAITextGenerator.cs b/extensions/OpenAI/OpenAI/OpenAITextGenerator.cs
index 7251bbcbd..f2c981e8d 100644
--- a/extensions/OpenAI/OpenAI/OpenAITextGenerator.cs
+++ b/extensions/OpenAI/OpenAI/OpenAITextGenerator.cs
@@ -86,8 +86,8 @@ public OpenAITextGenerator(
         {
             this._log.LogWarning(
                 "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
-                nameof(GPT4Tokenizer));
-            textTokenizer = new GPT4Tokenizer();
+                nameof(GPT4oTokenizer));
+            textTokenizer = new GPT4oTokenizer();
         }
 
         this._textTokenizer = textTokenizer;
@@ -96,13 +96,13 @@ public OpenAITextGenerator(
     /// <inheritdoc/>
     public int CountTokens(string text)
     {
-        return this._textTokenizer!.CountTokens(text);
+        return this._textTokenizer.CountTokens(text);
     }
 
     /// <inheritdoc/>
     public IReadOnlyList<string> GetTokens(string text)
     {
-        return this._textTokenizer!.GetTokens(text);
+        return this._textTokenizer.GetTokens(text);
     }
 
     /// <inheritdoc/>
diff --git a/service/Abstractions/AI/Embedding.cs b/service/Abstractions/AI/Embedding.cs
index e44526722..aa2566868 100644
--- a/service/Abstractions/AI/Embedding.cs
+++ b/service/Abstractions/AI/Embedding.cs
@@ -38,6 +38,18 @@ public Embedding(float[] vector)
         this.Data = vector;
     }
 
+    /// <summary>
+    /// This is not a ctor on purpose so we can use collections syntax with
+    /// the main ctor, and surface the extra casting cost when not using floats.
+    /// </summary>
+    public static Embedding FromDoubles(double[] vector)
+    {
+        float[] f = new float[vector.Length];
+        for (int i = 0; i < vector.Length; i++) { f[i] = (float)vector[i]; }
+
+        return new Embedding(f);
+    }
+
     public Embedding(ReadOnlyMemory<float> vector)
     {
         this.Data = vector;
diff --git a/service/Abstractions/Diagnostics/SensitiveDataLogger.cs b/service/Abstractions/Diagnostics/SensitiveDataLogger.cs
new file mode 100644
index 000000000..1262cf510
--- /dev/null
+++ b/service/Abstractions/Diagnostics/SensitiveDataLogger.cs
@@ -0,0 +1,75 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using Microsoft.Extensions.Logging;
+
+namespace Microsoft.KernelMemory.Diagnostics;
+
+#pragma warning disable CA2254 // by design
+public static class SensitiveDataLogger
+{
+    private static bool s_enabled = false;
+
+    public static bool Enabled
+    {
+        get
+        {
+            return s_enabled;
+        }
+        set
+        {
+            var env = Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT");
+            if (!string.Equals(env, "Development", StringComparison.OrdinalIgnoreCase))
+            {
+#pragma warning disable CA2201
+                throw new ApplicationException("Sensitive data logging can be enabled only in a development environment. Check ASPNETCORE_ENVIRONMENT env var.");
+#pragma warning restore CA0000
+            }
+
+            s_enabled = value && string.Equals(env, "Development", StringComparison.OrdinalIgnoreCase);
+        }
+    }
+
+    public static LogLevel LoggingLevel { get; set; } = LogLevel.Information;
+
+    public static void LogSensitive(this ILogger logger, string? message, params object?[] args)
+    {
+        if (!Enabled) { return; }
+
+        logger.Log(LoggingLevel, $"[PII] {message}", args);
+    }
+
+    public static void LogSensitive(
+        this ILogger logger,
+        Exception? exception,
+        string? message,
+        params object?[] args)
+    {
+        if (!Enabled) { return; }
+
+        logger.Log(LoggingLevel, exception, message, args);
+    }
+
+    public static void LogSensitive(
+        this ILogger logger,
+        EventId eventId,
+        Exception? exception,
+        string? message,
+        params object?[] args)
+    {
+        if (!Enabled) { return; }
+
+        logger.Log(LoggingLevel, eventId, exception, message, args);
+    }
+
+    public static void LogSensitive(
+        this ILogger logger,
+        EventId eventId,
+        string? message,
+        params object?[] args)
+    {
+        if (!Enabled) { return; }
+
+        logger.Log(LoggingLevel, eventId, message, args);
+    }
+}
diff --git a/service/Core/Core.csproj b/service/Core/Core.csproj
index b6ce2f27f..e60ba40a7 100644
--- a/service/Core/Core.csproj
+++ b/service/Core/Core.csproj
@@ -20,6 +20,7 @@
         <ProjectReference Include="..\..\extensions\Elasticsearch\Elasticsearch\Elasticsearch.csproj" />
         <ProjectReference Include="..\..\extensions\LlamaSharp\LlamaSharp\LlamaSharp.csproj" />
         <ProjectReference Include="..\..\extensions\MongoDbAtlas\MongoDbAtlas\MongoDbAtlas.csproj" />
+        <ProjectReference Include="..\..\extensions\Ollama\Ollama\Ollama.csproj" />
         <ProjectReference Include="..\..\extensions\Postgres\Postgres\Postgres.csproj" />
         <ProjectReference Include="..\..\extensions\Qdrant\Qdrant\Qdrant.csproj" />
         <ProjectReference Include="..\..\extensions\RabbitMQ\RabbitMQ.csproj" />
diff --git a/service/Core/Search/SearchClient.cs b/service/Core/Search/SearchClient.cs
index 4a229f22e..231ec9df5 100644
--- a/service/Core/Search/SearchClient.cs
+++ b/service/Core/Search/SearchClient.cs
@@ -324,7 +324,7 @@ public async Task<MemoryAnswer> AskAsync(
 
         if (factsUsedCount == 0)
         {
-            this._log.LogWarning("No memories available");
+            this._log.LogWarning("No memories available (min relevance: {0})", minRelevance);
             noAnswerFound.NoResultReason = "No memories available";
             return noAnswerFound;
         }
@@ -347,6 +347,7 @@ public async Task<MemoryAnswer> AskAsync(
         watch.Stop();
 
         answer.Result = text.ToString();
+        this._log.LogSensitive("Answer: {0}", answer.Result);
         answer.NoResult = ValueIsEquivalentTo(answer.Result, this._config.EmptyAnswer);
         if (answer.NoResult)
         {
@@ -391,6 +392,8 @@ private IAsyncEnumerable<string> GenerateAnswer(string question, string facts, I
             this._log.LogDebug("Running RAG prompt, size: {0} tokens, requesting max {1} tokens",
                 this._textGenerator.CountTokens(prompt),
                 this._config.AnswerTokens);
+
+            this._log.LogSensitive("Prompt: {0}", prompt);
         }
 
         return this._textGenerator.GenerateTextAsync(prompt, options, token);
diff --git a/service/Core/SemanticKernel/SemanticKernelTextEmbeddingGenerator.cs b/service/Core/SemanticKernel/SemanticKernelTextEmbeddingGenerator.cs
index fbcb29a72..533f0e7d7 100644
--- a/service/Core/SemanticKernel/SemanticKernelTextEmbeddingGenerator.cs
+++ b/service/Core/SemanticKernel/SemanticKernelTextEmbeddingGenerator.cs
@@ -44,8 +44,8 @@ public SemanticKernelTextEmbeddingGenerator(
         {
             this._log.LogWarning(
                 "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
-                nameof(GPT4Tokenizer));
-            textTokenizer = new GPT4Tokenizer();
+                nameof(GPT4oTokenizer));
+            textTokenizer = new GPT4oTokenizer();
         }
 
         this._tokenizer = textTokenizer;
diff --git a/service/Core/SemanticKernel/SemanticKernelTextGenerator.cs b/service/Core/SemanticKernel/SemanticKernelTextGenerator.cs
index 10fcbf2cf..3d1d48982 100644
--- a/service/Core/SemanticKernel/SemanticKernelTextGenerator.cs
+++ b/service/Core/SemanticKernel/SemanticKernelTextGenerator.cs
@@ -45,8 +45,8 @@ public SemanticKernelTextGenerator(
         {
             this._log.LogWarning(
                 "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
-                nameof(GPT4Tokenizer));
-            textTokenizer = new GPT4Tokenizer();
+                nameof(GPT4oTokenizer));
+            textTokenizer = new GPT4oTokenizer();
         }
 
         this._tokenizer = textTokenizer;
diff --git a/service/Service/ServiceConfiguration.cs b/service/Service/ServiceConfiguration.cs
index 0d7b63ba9..e3b6c27a1 100644
--- a/service/Service/ServiceConfiguration.cs
+++ b/service/Service/ServiceConfiguration.cs
@@ -6,6 +6,7 @@
 using Microsoft.Extensions.DependencyInjection;
 using Microsoft.KernelMemory.AI;
 using Microsoft.KernelMemory.AI.Anthropic;
+using Microsoft.KernelMemory.AI.Ollama;
 using Microsoft.KernelMemory.AI.OpenAI;
 using Microsoft.KernelMemory.DocumentStorage.DevTools;
 using Microsoft.KernelMemory.MemoryDb.SQLServer;
@@ -215,7 +216,7 @@ private void ConfigureIngestionEmbeddingGenerators(IKernelMemoryBuilder builder)
                     var instance = this.GetServiceInstance<ITextEmbeddingGenerator>(builder,
                         s => s.AddAzureOpenAIEmbeddingGeneration(
                             config: this.GetServiceConfig<AzureOpenAIConfig>("AzureOpenAIEmbedding"),
-                            textTokenizer: new GPT4Tokenizer()));
+                            textTokenizer: new GPT4oTokenizer()));
                     builder.AddIngestionEmbeddingGenerator(instance);
                     break;
                 }
@@ -225,7 +226,17 @@ private void ConfigureIngestionEmbeddingGenerators(IKernelMemoryBuilder builder)
                     var instance = this.GetServiceInstance<ITextEmbeddingGenerator>(builder,
                         s => s.AddOpenAITextEmbeddingGeneration(
                             config: this.GetServiceConfig<OpenAIConfig>("OpenAI"),
-                            textTokenizer: new GPT4Tokenizer()));
+                            textTokenizer: new GPT4oTokenizer()));
+                    builder.AddIngestionEmbeddingGenerator(instance);
+                    break;
+                }
+
+                case string x when x.Equals("Ollama", StringComparison.OrdinalIgnoreCase):
+                {
+                    var instance = this.GetServiceInstance<ITextEmbeddingGenerator>(builder,
+                        s => s.AddOllamaTextEmbeddingGeneration(
+                            config: this.GetServiceConfig<OllamaConfig>("Ollama"),
+                            textTokenizer: new GPT4oTokenizer()));
                     builder.AddIngestionEmbeddingGenerator(instance);
                     break;
                 }
@@ -352,13 +363,19 @@ private void ConfigureRetrievalEmbeddingGenerator(IKernelMemoryBuilder builder)
             case string y when y.Equals("AzureOpenAIEmbedding", StringComparison.OrdinalIgnoreCase):
                 builder.Services.AddAzureOpenAIEmbeddingGeneration(
                     config: this.GetServiceConfig<AzureOpenAIConfig>("AzureOpenAIEmbedding"),
-                    textTokenizer: new GPT4Tokenizer());
+                    textTokenizer: new GPT4oTokenizer());
                 break;
 
             case string x when x.Equals("OpenAI", StringComparison.OrdinalIgnoreCase):
                 builder.Services.AddOpenAITextEmbeddingGeneration(
                     config: this.GetServiceConfig<OpenAIConfig>("OpenAI"),
-                    textTokenizer: new GPT4Tokenizer());
+                    textTokenizer: new GPT4oTokenizer());
+                break;
+
+            case string x when x.Equals("Ollama", StringComparison.OrdinalIgnoreCase):
+                builder.Services.AddOllamaTextEmbeddingGeneration(
+                    config: this.GetServiceConfig<OllamaConfig>("Ollama"),
+                    textTokenizer: new GPT4oTokenizer());
                 break;
 
             default:
@@ -423,17 +440,25 @@ private void ConfigureTextGenerator(IKernelMemoryBuilder builder)
             case string y when y.Equals("AzureOpenAIText", StringComparison.OrdinalIgnoreCase):
                 builder.Services.AddAzureOpenAITextGeneration(
                     config: this.GetServiceConfig<AzureOpenAIConfig>("AzureOpenAIText"),
-                    textTokenizer: new GPT4Tokenizer());
+                    textTokenizer: new GPT4oTokenizer());
                 break;
 
             case string x when x.Equals("OpenAI", StringComparison.OrdinalIgnoreCase):
                 builder.Services.AddOpenAITextGeneration(
                     config: this.GetServiceConfig<OpenAIConfig>("OpenAI"),
-                    textTokenizer: new GPT4Tokenizer());
+                    textTokenizer: new GPT4oTokenizer());
                 break;
 
             case string x when x.Equals("Anthropic", StringComparison.OrdinalIgnoreCase):
-                builder.Services.AddAnthropicTextGeneration(this.GetServiceConfig<AnthropicConfig>("Anthropic"));
+                builder.Services.AddAnthropicTextGeneration(
+                    config: this.GetServiceConfig<AnthropicConfig>("Anthropic"),
+                    textTokenizer: new GPT4oTokenizer());
+                break;
+
+            case string x when x.Equals("Ollama", StringComparison.OrdinalIgnoreCase):
+                builder.Services.AddOllamaTextGeneration(
+                    config: this.GetServiceConfig<OllamaConfig>("Ollama"),
+                    textTokenizer: new GPT4oTokenizer());
                 break;
 
             case string x when x.Equals("LlamaSharp", StringComparison.OrdinalIgnoreCase):
diff --git a/service/Service/appsettings.json b/service/Service/appsettings.json
index 4dc741c18..e4c5bd707 100644
--- a/service/Service/appsettings.json
+++ b/service/Service/appsettings.json
@@ -375,6 +375,119 @@
         "DatabaseName": "KernelMemory",
         "UseSingleCollectionForVectorSearch": false
       },
+      "Ollama": {
+        "Endpoint": "http://localhost:11434",
+        "TextModel": {
+          "ModelName": "phi3:medium-128k",
+          "MaxTokenTotal": 131072,
+          // How many requests can be processed in parallel
+          "MaxBatchSize": 1
+          //// Enable Mirostat sampling for controlling perplexity.
+          //// (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
+          //"MiroStat": 0,
+          //// Influences how quickly the algorithm responds to feedback from the
+          //// generated text. A lower learning rate will result in slower adjustments,
+          //// while a higher learning rate will make the algorithm more responsive.
+          //// (Default: 0.1)
+          //"MiroStatEta": 0.1,
+          //// Controls the balance between coherence and diversity of the output.
+          //// A lower value will result in more focused and coherent text.
+          //// (Default: 5.0)
+          //"MiroStatTau": 5.0,
+          //// Sets the size of the context window used to generate the next token.
+          //// (Default: 2048)
+          //"NumCtx": 2048,
+          //// The number of GQA groups in the transformer layer. Required for some
+          //// models, for example it is 8 for llama2:70b
+          //"NumGqa": null,
+          //// The number of layers to send to the GPU(s). On macOS it defaults to
+          //// 1 to enable metal support, 0 to disable.
+          //"NumGpu": null,
+          //// Sets the number of threads to use during computation. By default,
+          //// Ollama will detect this for optimal performance.
+          //// It is recommended to set this value to the number of physical CPU cores
+          //// your system has (as opposed to the logical number of cores).
+          //"NumThread": null,
+          //// Sets how far back for the model to look back to prevent repetition.
+          //// (Default: 64, 0 = disabled, -1 = num_ctx)
+          //"RepeatLastN": null,
+          //// Sets the random number seed to use for generation.
+          //// Setting this to a specific number will make the model generate the same
+          //// text for the same prompt. (Default: 0)
+          //"Seed": 0,
+          //// Tail free sampling is used to reduce the impact of less probable
+          //// tokens from the output. A higher value (e.g., 2.0) will reduce the
+          //// impact more, while a value of 1.0 disables this setting. (default: 1)
+          //"TfsZ": 1.0,
+          //// Maximum number of tokens to predict when generating text.
+          //// (Default: 128, -1 = infinite generation, -2 = fill context)
+          //"NumPredict": 128,
+          //// Reduces the probability of generating nonsense. A higher value
+          //// (e.g. 100) will give more diverse answers, while a lower value (e.g. 10)
+          //// will be more conservative. (Default: 40)
+          //"TopK": 40,
+          //// Alternative to the top_p, and aims to ensure a balance of quality and variety.min_p represents the minimum
+          //// probability for a token to be considered, relative to the probability of the most likely token.For
+          //// example, with min_p=0.05 and the most likely token having a probability of 0.9, logits with a value less
+          //// than 0.05*0.9=0.045 are filtered out. (Default: 0.0)
+          //"MinP": 0.0
+        },
+        "EmbeddingModel": {
+          "ModelName": "nomic-embed-text",
+          "MaxTokenTotal": 2048,
+          // How many requests can be processed in parallel
+          "MaxBatchSize": 1
+          //// Enable Mirostat sampling for controlling perplexity.
+          //// (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
+          //"MiroStat": 0,
+          //// Influences how quickly the algorithm responds to feedback from the
+          //// generated text. A lower learning rate will result in slower adjustments,
+          //// while a higher learning rate will make the algorithm more responsive.
+          //// (Default: 0.1)
+          //"MiroStatEta": 0.1,
+          //// Controls the balance between coherence and diversity of the output.
+          //// A lower value will result in more focused and coherent text.
+          //// (Default: 5.0)
+          //"MiroStatTau": 5.0,
+          //// Sets the size of the context window used to generate the next token.
+          //// (Default: 2048)
+          //"NumCtx": 2048,
+          //// The number of GQA groups in the transformer layer. Required for some
+          //// models, for example it is 8 for llama2:70b
+          //"NumGqa": null,
+          //// The number of layers to send to the GPU(s). On macOS it defaults to
+          //// 1 to enable metal support, 0 to disable.
+          //"NumGpu": null,
+          //// Sets the number of threads to use during computation. By default,
+          //// Ollama will detect this for optimal performance.
+          //// It is recommended to set this value to the number of physical CPU cores
+          //// your system has (as opposed to the logical number of cores).
+          //"NumThread": null,
+          //// Sets how far back for the model to look back to prevent repetition.
+          //// (Default: 64, 0 = disabled, -1 = num_ctx)
+          //"RepeatLastN": null,
+          //// Sets the random number seed to use for generation.
+          //// Setting this to a specific number will make the model generate the same
+          //// text for the same prompt. (Default: 0)
+          //"Seed": 0,
+          //// Tail free sampling is used to reduce the impact of less probable
+          //// tokens from the output. A higher value (e.g., 2.0) will reduce the
+          //// impact more, while a value of 1.0 disables this setting. (default: 1)
+          //"TfsZ": 1.0,
+          //// Maximum number of tokens to predict when generating text.
+          //// (Default: 128, -1 = infinite generation, -2 = fill context)
+          //"NumPredict": 128,
+          //// Reduces the probability of generating nonsense. A higher value
+          //// (e.g. 100) will give more diverse answers, while a lower value (e.g. 10)
+          //// will be more conservative. (Default: 40)
+          //"TopK": 40,
+          //// Alternative to the top_p, and aims to ensure a balance of quality and variety.min_p represents the minimum
+          //// probability for a token to be considered, relative to the probability of the most likely token.For
+          //// example, with min_p=0.05 and the most likely token having a probability of 0.9, logits with a value less
+          //// than 0.05*0.9=0.045 are filtered out. (Default: 0.0)
+          //"MinP": 0.0
+        }
+      },
       "OpenAI": {
         // Name of the model used to generate text (text completion or chat completion)
         "TextModel": "gpt-3.5-turbo-16k",
diff --git a/tools/InteractiveSetup/Context.cs b/tools/InteractiveSetup/Context.cs
index d2b4b713b..6f93e92a6 100644
--- a/tools/InteractiveSetup/Context.cs
+++ b/tools/InteractiveSetup/Context.cs
@@ -26,6 +26,7 @@ internal sealed class Context
     public BoundedBoolean CfgAzureOpenAIEmbedding = new();
     public BoundedBoolean CfgOpenAI = new();
     public BoundedBoolean CfgLlamaSharp = new();
+    public BoundedBoolean CfgOllama = new();
     public BoundedBoolean CfgAzureAIDocIntel = new();
 
     // Vectors
diff --git a/tools/InteractiveSetup/Main.cs b/tools/InteractiveSetup/Main.cs
index bb9341041..9a2defd0b 100644
--- a/tools/InteractiveSetup/Main.cs
+++ b/tools/InteractiveSetup/Main.cs
@@ -62,6 +62,7 @@ public static void InteractiveSetup(string[] args)
             AzureOpenAIText.Setup(ctx);
             OpenAI.Setup(ctx);
             LlamaSharp.Setup(ctx);
+            Ollama.Setup(ctx);
 
             Logger.Setup();
 
@@ -198,6 +199,18 @@ private static void EmbeddingGeneratorSetup(Context ctx)
                     ctx.CfgOpenAI.Value = true;
                 }),
 
+                new("Ollama service", config.Retrieval.EmbeddingGeneratorType == "Ollama", () =>
+                {
+                    AppSettings.Change(x =>
+                    {
+                        x.Retrieval.EmbeddingGeneratorType = "Ollama";
+                        x.DataIngestion.EmbeddingGeneratorTypes = ctx.CfgEmbeddingGenerationEnabled.Value
+                            ? new List<string> { x.Retrieval.EmbeddingGeneratorType }
+                            : new List<string> { };
+                    });
+                    ctx.CfgOllama.Value = true;
+                }),
+
                 new("None/Custom (manually set with code)", string.IsNullOrEmpty(config.Retrieval.EmbeddingGeneratorType), () =>
                 {
                     AppSettings.Change(x =>
@@ -233,7 +246,13 @@ private static void TextGeneratorTypeSetup(Context ctx)
                     ctx.CfgOpenAI.Value = true;
                 }),
 
-                new("LLama model", config.TextGeneratorType == "LlamaSharp", () =>
+                new("Ollama service", config.TextGeneratorType == "Ollama", () =>
+                {
+                    AppSettings.Change(x => { x.TextGeneratorType = "Ollama"; });
+                    ctx.CfgOllama.Value = true;
+                }),
+
+                new("LlamaSharp library", config.TextGeneratorType == "LlamaSharp", () =>
                 {
                     AppSettings.Change(x => { x.TextGeneratorType = "LlamaSharp"; });
                     ctx.CfgLlamaSharp.Value = true;
diff --git a/tools/InteractiveSetup/Services/Ollama.cs b/tools/InteractiveSetup/Services/Ollama.cs
new file mode 100644
index 000000000..59b656540
--- /dev/null
+++ b/tools/InteractiveSetup/Services/Ollama.cs
@@ -0,0 +1,65 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using System.Collections.Generic;
+using System.Globalization;
+using Microsoft.KernelMemory.InteractiveSetup.UI;
+
+namespace Microsoft.KernelMemory.InteractiveSetup.Services;
+
+internal static class Ollama
+{
+    public static void Setup(Context ctx, bool force = false)
+    {
+        if (!ctx.CfgOllama.Value && !force) { return; }
+
+        ctx.CfgOllama.Value = false;
+        const string ServiceName = "Ollama";
+
+        Dictionary<string, object> textModel = new();
+        Dictionary<string, object> embeddingModel = new();
+
+        if (!AppSettings.GetCurrentConfig().Services.TryGetValue(ServiceName, out var config))
+        {
+            textModel = new Dictionary<string, object>
+            {
+                { "ModelName", "phi3:medium-128k" },
+                { "MaxTokenTotal", 131072 },
+            };
+
+            embeddingModel = new Dictionary<string, object>
+            {
+                { "ModelName", "nomic-embed-text" },
+                { "MaxTokenTotal", 2048 },
+            };
+
+            config = new Dictionary<string, object>
+            {
+                { "Endpoint", "http://localhost:11434" },
+                { "TextModel", textModel },
+                { "EmbeddingModel", embeddingModel },
+            };
+        }
+
+        AppSettings.Change(x => x.Services[ServiceName] = new Dictionary<string, object>
+        {
+            { "Endpoint", SetupUI.AskOpenQuestion("Ollama endpoint", config.TryGet("Endpoint")) }
+        });
+
+        AppSettings.Change(x => x.Services[ServiceName]["TextModel"] = new Dictionary<string, object>
+        {
+            { "ModelName", SetupUI.AskOpenQuestion("Ollama text model name", textModel.TryGet("ModelName")) },
+            { "MaxTokenTotal", SetupUI.AskOpenQuestionInt("Ollama text model max tokens", StrToInt(textModel.TryGet("MaxTokenTotal"))) },
+        });
+
+        AppSettings.Change(x => x.Services[ServiceName]["EmbeddingModel"] = new Dictionary<string, object>
+        {
+            { "ModelName", SetupUI.AskOpenQuestion("Ollama text embedding model name", embeddingModel.TryGet("ModelName")) },
+            { "MaxTokenTotal", SetupUI.AskOpenQuestionInt("Ollama text embedding model max tokens", StrToInt(embeddingModel.TryGet("MaxTokenTotal"))) },
+        });
+    }
+
+    private static int StrToInt(string s)
+    {
+        return int.Parse(s, NumberStyles.Integer, NumberFormatInfo.InvariantInfo);
+    }
+}
diff --git a/tools/InteractiveSetup/UI/SetupUI.cs b/tools/InteractiveSetup/UI/SetupUI.cs
index 1c4c8ddee..b2113261e 100644
--- a/tools/InteractiveSetup/UI/SetupUI.cs
+++ b/tools/InteractiveSetup/UI/SetupUI.cs
@@ -1,6 +1,7 @@
 ﻿// Copyright (c) Microsoft. All rights reserved.
 
 using System;
+using System.Globalization;
 using System.Linq;
 
 namespace Microsoft.KernelMemory.InteractiveSetup.UI;
@@ -30,6 +31,12 @@ public static string AskOptionalOpenQuestion(string question, string? defaultVal
         return AskOpenQuestion(question: question, defaultValue: defaultValue, optional: true);
     }
 
+    public static int AskOpenQuestionInt(string question, int defaultValue, bool optional = false)
+    {
+        string value = AskOpenQuestion(question: question, defaultValue: $"{defaultValue}", trim: true, optional: optional, isPassword: false);
+        return int.Parse(value, NumberStyles.Integer, NumberFormatInfo.InvariantInfo);
+    }
+
     public static string AskOpenQuestion(string question, string? defaultValue, bool trim = true, bool optional = false, bool isPassword = false)
     {
         if (!string.IsNullOrEmpty(defaultValue))