diff --git a/Directory.Packages.props b/Directory.Packages.props
index 41efaafd0..2834bf71d 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -34,7 +34,7 @@
     <PackageVersion Include="Microsoft.Extensions.Http" Version="8.0.0" />
     <PackageVersion Include="Microsoft.Extensions.Logging" Version="8.0.0" />
     <PackageVersion Include="Microsoft.Extensions.Logging.Abstractions" Version="8.0.1" />
-    <PackageVersion Include="Microsoft.Extensions.Logging.TraceSource" Version="8.0.0" />
+    <PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI" Version="0.4.0" />
     <PackageVersion Include="Microsoft.ML.Tokenizers" Version="0.22.0-preview.24378.1" />
     <PackageVersion Include="Microsoft.KernelMemory.Core" Version="0.75.240924.1" />
     <PackageVersion Include="Microsoft.KernelMemory.Service.AspNetCore" Version="0.75.240924.1" />
diff --git a/KernelMemory.sln b/KernelMemory.sln
index 15db02fe8..7dc6a100e 100644
--- a/KernelMemory.sln
+++ b/KernelMemory.sln
@@ -7,6 +7,7 @@ EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "docs", "docs", "{7BA7F1B2-19E2-46EB-B000-513EE2F65769}"
 	ProjectSection(SolutionItems) = preProject
 		docs\404.html = docs\404.html
+		docs\azure.md = docs\azure.md
 		docs\concepts.md = docs\concepts.md
 		docs\csharp.png = docs\csharp.png
 		docs\extensions.md = docs\extensions.md
@@ -31,7 +32,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "docs", "docs", "{7BA7F1B2-1
 		docs\service.md = docs\service.md
 		docs\_config.local.yml = docs\_config.local.yml
 		docs\_config.yml = docs\_config.yml
-		docs\azure.md = docs\azure.md
 	EndProjectSection
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "examples", "examples", "{0A43C65C-6007-4BB4-B3FE-8D439FC91841}"
@@ -70,18 +70,18 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "root", "root", "{6EF76FD8-4
 		.editorconfig = .editorconfig
 		.gitattributes = .gitattributes
 		.gitignore = .gitignore
+		azure.yaml = azure.yaml
 		CODE_OF_CONDUCT.md = CODE_OF_CONDUCT.md
 		COMMUNITY.md = COMMUNITY.md
 		CONTRIBUTING.md = CONTRIBUTING.md
 		Directory.Build.props = Directory.Build.props
 		Directory.Packages.props = Directory.Packages.props
 		Dockerfile = Dockerfile
+		KernelMemory.sln.DotSettings = KernelMemory.sln.DotSettings
 		LICENSE = LICENSE
 		nuget.config = nuget.config
 		README.md = README.md
 		SECURITY.md = SECURITY.md
-		KernelMemory.sln.DotSettings = KernelMemory.sln.DotSettings
-		azure.yaml = azure.yaml
 	EndProjectSection
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".github", ".github", "{B8976338-7CDC-47AE-8502-C2FBAFBEBD68}"
@@ -94,10 +94,10 @@ EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "workflows", "workflows", "{48E79819-1E9E-4075-90DA-BAEC761C89B2}"
 	ProjectSection(SolutionItems) = preProject
 		.github\workflows\docker-build-push.yml = .github\workflows\docker-build-push.yml
+		.github\workflows\dotnet-build.yml = .github\workflows\dotnet-build.yml
+		.github\workflows\dotnet-unit-tests.yml = .github\workflows\dotnet-unit-tests.yml
 		.github\workflows\github-pages-jekyll.yml = .github\workflows\github-pages-jekyll.yml
 		.github\workflows\spell-check-with-typos.yml = .github\workflows\spell-check-with-typos.yml
-		.github\workflows\dotnet-unit-tests.yml = .github\workflows\dotnet-unit-tests.yml
-		.github\workflows\dotnet-build.yml = .github\workflows\dotnet-build.yml
 	EndProjectSection
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "service", "service", "{87DEAE8D-138C-4FDD-B4C9-11C3A7817E8F}"
@@ -116,9 +116,9 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools", "tools", "{CA49F1A1
 		tools\run-qdrant.sh = tools\run-qdrant.sh
 		tools\run-rabbitmq.sh = tools\run-rabbitmq.sh
 		tools\run-redis.sh = tools\run-redis.sh
+		tools\run-s3ninja.sh = tools\run-s3ninja.sh
 		tools\search.sh = tools\search.sh
 		tools\upload-file.sh = tools\upload-file.sh
-		tools\run-s3ninja.sh = tools\run-s3ninja.sh
 		tools\dockerize-amd64.sh = tools\dockerize-amd64.sh
 		tools\dockerize-arm64.sh = tools\dockerize-arm64.sh
 	EndProjectSection
@@ -243,12 +243,12 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "109-dotnet-custom-webscrape
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "infra", "infra", "{B488168B-AD86-4CC5-9D89-324B6EB743D9}"
 	ProjectSection(SolutionItems) = preProject
+		infra\AZD.md = infra\AZD.md
 		infra\build-main.json.sh = infra\build-main.json.sh
 		infra\main.bicep = infra\main.bicep
 		infra\main.json = infra\main.json
-		infra\README.md = infra\README.md
-		infra\AZD.md = infra\AZD.md
 		infra\main.parameters.json = infra\main.parameters.json
+		infra\README.md = infra\README.md
 	EndProjectSection
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "modules", "modules", "{C2D3A947-B6F9-4306-BD42-21D8D1F42750}"
@@ -323,6 +323,10 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "212-dotnet-ollama", "exampl
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Ollama", "extensions\Ollama\Ollama\Ollama.csproj", "{F192513B-265B-4943-A2A9-44E23B15BA18}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Onnx.FunctionalTests", "extensions\ONNX\Onnx.FunctionalTests\Onnx.FunctionalTests.csproj", "{7BBD348E-CDD9-4462-B8C9-47613C5EC682}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Onnx", "extensions\ONNX\Onnx\Onnx.csproj", "{345DEF9B-6EE1-49DF-B46A-25E38CE9B151}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -591,6 +595,13 @@ Global
 		{F192513B-265B-4943-A2A9-44E23B15BA18}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{F192513B-265B-4943-A2A9-44E23B15BA18}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{F192513B-265B-4943-A2A9-44E23B15BA18}.Release|Any CPU.Build.0 = Release|Any CPU
+		{7BBD348E-CDD9-4462-B8C9-47613C5EC682}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{7BBD348E-CDD9-4462-B8C9-47613C5EC682}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{7BBD348E-CDD9-4462-B8C9-47613C5EC682}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{345DEF9B-6EE1-49DF-B46A-25E38CE9B151}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{345DEF9B-6EE1-49DF-B46A-25E38CE9B151}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{345DEF9B-6EE1-49DF-B46A-25E38CE9B151}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{345DEF9B-6EE1-49DF-B46A-25E38CE9B151}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@@ -685,6 +696,8 @@ Global
 		{84AEC1DD-CBAE-400A-949C-91BA373C587D} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841}
 		{B303885D-F64F-4EEB-B085-0014E863AF61} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841}
 		{F192513B-265B-4943-A2A9-44E23B15BA18} = {155DA079-E267-49AF-973A-D1D44681970F}
+		{7BBD348E-CDD9-4462-B8C9-47613C5EC682} = {3C17F42B-CFC8-4900-8CFB-88936311E919}
+		{345DEF9B-6EE1-49DF-B46A-25E38CE9B151} = {155DA079-E267-49AF-973A-D1D44681970F}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {CC136C62-115C-41D1-B414-F9473EFF6EA8}
diff --git a/extensions/ONNX/Onnx.FunctionalTests/Onnx.FunctionalTests.csproj b/extensions/ONNX/Onnx.FunctionalTests/Onnx.FunctionalTests.csproj
new file mode 100644
index 000000000..6143d9130
--- /dev/null
+++ b/extensions/ONNX/Onnx.FunctionalTests/Onnx.FunctionalTests.csproj
@@ -0,0 +1,36 @@
+﻿<Project Sdk="Microsoft.NET.Sdk.Web">
+
+    <PropertyGroup>
+        <AssemblyName>Microsoft.Onnx.FunctionalTests</AssemblyName>
+        <RootNamespace>Microsoft.Onnx.FunctionalTests</RootNamespace>
+        <TargetFramework>net8.0</TargetFramework>
+        <RollForward>LatestMajor</RollForward>
+        <IsTestProject>true</IsTestProject>
+        <ImplicitUsings>enable</ImplicitUsings>
+        <Nullable>enable</Nullable>
+        <IsPackable>false</IsPackable>
+        <NoWarn>$(NoWarn);KMEXP01;</NoWarn>
+    </PropertyGroup>
+
+    <ItemGroup>
+        <ProjectReference Include="..\..\..\service\tests\TestHelpers\TestHelpers.csproj" />
+    </ItemGroup>
+
+    <ItemGroup>
+        <PackageReference Include="Microsoft.Extensions.DependencyInjection" />
+        <PackageReference Include="Microsoft.NET.Test.Sdk" />
+        <PackageReference Include="Xunit.DependencyInjection" />
+        <PackageReference Include="Xunit.DependencyInjection.Logging" />
+        <PackageReference Include="xunit" />
+        <PackageReference Include="xunit.abstractions" />
+        <PackageReference Include="xunit.runner.visualstudio">
+            <PrivateAssets>all</PrivateAssets>
+            <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
+        </PackageReference>
+        <PackageReference Include="coverlet.collector">
+            <PrivateAssets>all</PrivateAssets>
+            <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
+        </PackageReference>
+    </ItemGroup>
+
+</Project>
diff --git a/extensions/ONNX/Onnx.FunctionalTests/OnnxTextGeneratorTest.cs b/extensions/ONNX/Onnx.FunctionalTests/OnnxTextGeneratorTest.cs
new file mode 100644
index 000000000..a2e491b6d
--- /dev/null
+++ b/extensions/ONNX/Onnx.FunctionalTests/OnnxTextGeneratorTest.cs
@@ -0,0 +1,67 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System.Diagnostics;
+using System.Text;
+using Microsoft.KernelMemory.AI;
+using Microsoft.KernelMemory.AI.Onnx;
+using Microsoft.KM.TestHelpers;
+using Xunit.Abstractions;
+
+namespace Microsoft.Onnx.FunctionalTests;
+
+public sealed class OnnxTextGeneratorTest : BaseFunctionalTestCase
+{
+    private readonly OnnxTextGenerator _target;
+    private readonly Stopwatch _timer;
+
+    public OnnxTextGeneratorTest(
+        IConfiguration cfg,
+        ITestOutputHelper output) : base(cfg, output)
+    {
+        this._timer = new Stopwatch();
+
+        this.OnnxConfig.Validate();
+        this._target = new OnnxTextGenerator(this.OnnxConfig, loggerFactory: null);
+
+        var modelDirectory = Path.GetFullPath(this.OnnxConfig.TextModelDir);
+        var modelFile = Directory.GetFiles(modelDirectory)
+            .FirstOrDefault(file => string.Equals(Path.GetExtension(file), ".ONNX", StringComparison.OrdinalIgnoreCase));
+
+        Console.WriteLine($"Using model {Path.GetFileNameWithoutExtension(modelFile)} from: {modelDirectory}");
+    }
+
+    [Fact]
+    [Trait("Category", "Onnx")]
+    public async Task ItGeneratesText()
+    {
+        var utcDate = DateTime.UtcNow.Date.ToString("MM/dd/yyyy");
+        var systemPrompt = $"Following the format \"MM/dd/yyyy\", the current date is {utcDate}.";
+        var question = $"What is the current date?";
+        var prompt = $"<|system|>{systemPrompt}<|end|><|user|>{question}<|end|><|assistant|>";
+
+        var options = new TextGenerationOptions();
+
+        // Act
+        this._timer.Restart();
+        var tokens = this._target.GenerateTextAsync(prompt, options);
+        var result = new StringBuilder();
+        await foreach (string token in tokens)
+        {
+            result.Append(token);
+        }
+
+        this._timer.Stop();
+        var answer = result.ToString();
+
+        // Assert
+        Console.WriteLine($"Model Output:\n=============================\n{answer}\n=============================");
+        Console.WriteLine($"Time: {this._timer.ElapsedMilliseconds / 1000} secs");
+        Assert.Contains(utcDate.ToString(), answer, StringComparison.OrdinalIgnoreCase);
+    }
+
+    protected override void Dispose(bool disposing)
+    {
+        base.Dispose(disposing);
+        this._target.Dispose();
+    }
+}
diff --git a/extensions/ONNX/Onnx.FunctionalTests/Startup.cs b/extensions/ONNX/Onnx.FunctionalTests/Startup.cs
new file mode 100644
index 000000000..fe9b40a95
--- /dev/null
+++ b/extensions/ONNX/Onnx.FunctionalTests/Startup.cs
@@ -0,0 +1,22 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+/* IMPORTANT: the Startup class must be at the root of the namespace and
+ * the namespace must match exactly (required by Xunit.DependencyInjection) */
+
+namespace Microsoft.Onnx.FunctionalTests;
+
+public class Startup
+{
+    public void ConfigureHost(IHostBuilder hostBuilder)
+    {
+        var config = new ConfigurationBuilder()
+            .AddJsonFile("appsettings.json")
+            .AddJsonFile("appsettings.development.json", optional: true)
+            .AddJsonFile("appsettings.Development.json", optional: true)
+            .AddUserSecrets<Startup>()
+            .AddEnvironmentVariables()
+            .Build();
+
+        hostBuilder.ConfigureHostConfiguration(builder => builder.AddConfiguration(config));
+    }
+}
diff --git a/extensions/ONNX/Onnx.FunctionalTests/Usings.cs b/extensions/ONNX/Onnx.FunctionalTests/Usings.cs
new file mode 100644
index 000000000..38b0ca7bb
--- /dev/null
+++ b/extensions/ONNX/Onnx.FunctionalTests/Usings.cs
@@ -0,0 +1,3 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+global using Xunit;
diff --git a/extensions/ONNX/Onnx.FunctionalTests/appsettings.json b/extensions/ONNX/Onnx.FunctionalTests/appsettings.json
new file mode 100644
index 000000000..ec84442d4
--- /dev/null
+++ b/extensions/ONNX/Onnx.FunctionalTests/appsettings.json
@@ -0,0 +1,67 @@
+{
+  "KernelMemory": {
+    "ServiceAuthorization": {
+      "Endpoint": "http://127.0.0.1:9001/",
+      "AccessKey": ""
+    },
+    "Services": {
+      "Onnx": {
+        // Path to directory containing ONNX Model, e.g. "C:\\....\\Phi-3-mini-128k-instruct-onnx\\....\\cpu-int4-rtn-block-32"
+        "TextModelDir": "Z:\\tools\\LocalModels\\Phi-3-mini-128k-instruct-onnx\\cpu_and_mobile\\cpu-int4-rtn-block-32"
+      },
+      "SimpleVectorDb": {
+        // Options: "Disk" or "Volatile". Volatile data is lost after each execution.
+        "StorageType": "Volatile",
+        // Directory where files are stored.
+        "Directory": "_vectors"
+      },
+      "AzureAISearch": {
+        // "ApiKey" or "AzureIdentity". For other options see <AzureAISearchConfig>.
+        // AzureIdentity: use automatic AAD authentication mechanism. You can test locally
+        //   using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET.
+        "Auth": "AzureIdentity",
+        "Endpoint": "https://<...>",
+        "APIKey": ""
+      },
+      "Postgres": {
+        // Postgres instance connection string
+        "ConnectionString": "Host=localhost;Port=5432;Username=public;Password=;Database=public",
+        // Mandatory prefix to add to the name of table managed by KM,
+        // e.g. to exclude other tables in the same schema.
+        "TableNamePrefix": "tests-"
+      },
+      "Qdrant": {
+        "Endpoint": "http://127.0.0.1:6333",
+        "APIKey": ""
+      },
+      "OpenAI": {
+        // Name of the model used to generate text (text completion or chat completion)
+        "TextModel": "gpt-4o-mini",
+        // The max number of tokens supported by the text model.
+        "TextModelMaxTokenTotal": 16384,
+        // What type of text generation, by default autodetect using the model name.
+        // Possible values: "Auto", "TextCompletion", "Chat"
+        "TextGenerationType": "Auto",
+        // Name of the model used to generate text embeddings
+        "EmbeddingModel": "text-embedding-ada-002",
+        // The max number of tokens supported by the embedding model
+        // See https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
+        "EmbeddingModelMaxTokenTotal": 8191,
+        // OpenAI API Key
+        "APIKey": "",
+        // OpenAI Organization ID (usually empty, unless you have multiple accounts on different orgs)
+        "OrgId": "",
+        // Endpoint to use. By default the system uses 'https://api.openai.com/v1'.
+        // Change this to use proxies or services compatible with OpenAI HTTP protocol like LM Studio.
+        "Endpoint": "",
+        // How many times to retry in case of throttling
+        "MaxRetries": 10
+      }
+    }
+  },
+  "Logging": {
+    "LogLevel": {
+      "Default": "Information"
+    }
+  }
+}
\ No newline at end of file
diff --git a/extensions/ONNX/Onnx/DependencyInjection.cs b/extensions/ONNX/Onnx/DependencyInjection.cs
new file mode 100644
index 000000000..55e3b5018
--- /dev/null
+++ b/extensions/ONNX/Onnx/DependencyInjection.cs
@@ -0,0 +1,60 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Logging;
+using Microsoft.KernelMemory.AI;
+using Microsoft.KernelMemory.AI.Onnx;
+
+#pragma warning disable IDE0130 // reduce number of "using" statements
+// ReSharper disable once CheckNamespace - reduce number of "using" statements
+namespace Microsoft.KernelMemory;
+
+/// <summary>
+/// Kernel Memory builder extensions
+/// </summary>
+public static partial class KernelMemoryBuilderExtensions
+{
+    public static IKernelMemoryBuilder WithOnnxTextGeneration(
+        this IKernelMemoryBuilder builder,
+        string modelPath,
+        uint maxTokenTotal,
+        ITextTokenizer? textTokenizer = null)
+    {
+        var config = new OnnxConfig
+        {
+            TextModelDir = modelPath
+        };
+
+        builder.Services.AddOnnxTextGeneration(config, textTokenizer);
+
+        return builder;
+    }
+
+    public static IKernelMemoryBuilder WithOnnxTextGeneration(
+        this IKernelMemoryBuilder builder,
+        OnnxConfig config,
+        ITextTokenizer? textTokenizer = null)
+    {
+        builder.Services.AddOnnxTextGeneration(config, textTokenizer);
+        return builder;
+    }
+}
+
+/// <summary>
+/// .NET IServiceCollection dependency injection extensions.
+/// </summary>
+public static partial class DependencyInjection
+{
+    public static IServiceCollection AddOnnxTextGeneration(
+        this IServiceCollection services,
+        OnnxConfig config,
+        ITextTokenizer? textTokenizer = null)
+    {
+        config.Validate();
+        return services
+            .AddSingleton<ITextGenerator, OnnxTextGenerator>(serviceProvider => new OnnxTextGenerator(
+                config: config,
+                textTokenizer: textTokenizer,
+                loggerFactory: serviceProvider.GetService<ILoggerFactory>()));
+    }
+}
diff --git a/extensions/ONNX/Onnx/Onnx.csproj b/extensions/ONNX/Onnx/Onnx.csproj
new file mode 100644
index 000000000..96ec73dff
--- /dev/null
+++ b/extensions/ONNX/Onnx/Onnx.csproj
@@ -0,0 +1,32 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+    <PropertyGroup>
+        <TargetFramework>net8.0</TargetFramework>
+        <RollForward>LatestMajor</RollForward>
+        <AssemblyName>Microsoft.KernelMemory.AI.Onnx</AssemblyName>
+        <RootNamespace>Microsoft.KernelMemory.AI.Onnx</RootNamespace>
+        <NoWarn>$(NoWarn);KMEXP00;KMEXP01;CA1724;</NoWarn>
+    </PropertyGroup>
+
+    <PropertyGroup>
+        <IsPackable>true</IsPackable>
+        <PackageId>Microsoft.KernelMemory.AI.Onnx</PackageId>
+        <Product>ONNX LLM connector for Kernel Memory</Product>
+        <Description>Provide access to ONNX LLM models in Kernel Memory to generate text</Description>
+        <PackageTags>ONNX, Memory, Kernel Memory, Semantic Memory, Episodic Memory, Declarative Memory, AI, Artificial Intelligence, Semantic Search, Memory DB</PackageTags>
+        <DocumentationFile>bin/$(Configuration)/$(TargetFramework)/$(AssemblyName).xml</DocumentationFile>
+    </PropertyGroup>
+
+    <ItemGroup>
+        <None Include="..\README.md" Link="README.md" Pack="true" PackagePath="." Visible="false" />
+    </ItemGroup>
+
+    <ItemGroup>
+        <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI" />
+    </ItemGroup>
+
+    <ItemGroup>
+        <ProjectReference Include="..\..\OpenAI\OpenAI\OpenAI.csproj" />
+    </ItemGroup>
+
+</Project>
diff --git a/extensions/ONNX/Onnx/OnnxConfig.cs b/extensions/ONNX/Onnx/OnnxConfig.cs
new file mode 100644
index 000000000..4a0ce66fe
--- /dev/null
+++ b/extensions/ONNX/Onnx/OnnxConfig.cs
@@ -0,0 +1,171 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using System.IO;
+using System.Linq;
+
+#pragma warning disable IDE0130 // reduce number of "using" statements
+
+#pragma warning disable IDE0130 // reduce number of "using" statements
+// ReSharper disable once CheckNamespace - reduce number of "using" statements
+namespace Microsoft.KernelMemory;
+
+public class OnnxConfig
+{
+    /// <summary>
+    /// An enum representing the possible text generation search types used by OnnxTextGenerator.
+    /// See https://onnxruntime.ai/docs/genai/reference/config.html#search-combinations for more details.
+    /// </summary>
+    public enum OnnxSearchType
+    {
+        /// <summary>
+        /// A decoding algorithm that keeps track of the top K sequences at each step. It explores
+        /// multiple paths simultaneously, balancing exploration and exploitation. Often results in more
+        /// coherent and higher quality text generation than Greedy Search would.
+        /// </summary>
+        BeamSearch,
+
+        /// <summary>
+        /// The default and simplest decoding algorithm. At each step, a token is selected with the highest
+        /// probability as the next word in the sequence.
+        /// </summary>
+        GreedySearch,
+
+        /// <summary>
+        /// Combined Top-P (Nucleus) and Top-K Sampling: A decoding algorithm that samples from the top k tokens
+        /// with the highest probabilities, while also considering the smallest set of tokens whose cumulative
+        /// probability exceeds a threshold p. This approach dynamically balances diversity and coherence in
+        /// text generation by adjusting the sampling pool based on both fixed and cumulative probability criteria.
+        /// </summary>
+        TopN
+    }
+
+    /// <summary>
+    /// Path to the directory containing the .ONNX file for Text Generation.
+    /// </summary>
+    public string TextModelDir { get; set; } = string.Empty;
+
+    /// <summary>
+    /// The maximum length of the response that the model will generate. See https://onnxruntime.ai/docs/genai/reference/config.html
+    /// </summary>
+    public int MaxTokens { get; set; } = 2048;
+
+    /// <summary>
+    /// The minimum length of the response that the model will generate. See https://onnxruntime.ai/docs/genai/reference/config.html
+    /// </summary>
+    public uint MinLength { get; set; } = 0;
+
+    /// <summary>
+    /// The algorithm used in text generation. Defaults to GreedySearch.
+    /// </summary>
+    public OnnxSearchType SearchType { get; set; } = OnnxSearchType.GreedySearch;
+
+    /// <summary>
+    /// The number of beams to apply when generating the output sequence using beam search.
+    /// If NumBeams=1, then generation is performed using greedy search. If NumBeans > 1, then
+    /// generation is performed using beam search. A null value implies using TopN search.
+    /// </summary>
+    public uint? NumBeams { get; set; } = 1;
+
+    /// <summary>
+    /// Only includes the most probable tokens with probabilities that add up to P or higher.
+    /// Defaults to 1, which includes all of the tokens. Range is 0 to 1, exclusive of 0.
+    /// </summary>
+    public double NucleusSampling { get; set; } = 1.0;
+
+    /// <summary>
+    /// Whether to stop the beam search when at least NumBeams sentences are finished per batch or not. Defaults to false.
+    /// </summary>
+    public bool EarlyStopping { get; set; } = false;
+
+    /// <summary>
+    /// The number of sequences (responses) to generate. Returns the sequences with the highest scores in order.
+    /// </summary>
+    public int ResultsPerPrompt { get; set; } = 1;
+
+    /// <summary>
+    /// Only includes tokens that fall within the list of the K most probable tokens. Range is 1 to the vocabulary size.
+    /// Defaults to 50.
+    /// </summary>
+    public uint TopK { get; set; } = 50;
+
+    /// <summary>
+    /// Discounts the scores of previously generated tokens if set to a value greater than 1.
+    /// Defaults to 1.
+    /// </summary>
+    public double RepetitionPenalty { get; set; } = 1.0;
+
+    /// <summary>
+    /// Controls the length of the output generated. Value less than 1 encourages the generation
+    /// to produce shorter sequences. Values greater than 1 encourages longer sequences. Defaults to 1.
+    /// </summary>
+    public double LengthPenalty { get; set; } = 1.0;
+
+    /// <summary>
+    /// Verify that the current state is valid.
+    /// </summary>
+    public void Validate(bool allowIO = true)
+    {
+        if (string.IsNullOrEmpty(this.TextModelDir))
+        {
+            throw new ConfigurationException($"Onnx: {nameof(this.TextModelDir)} is a required field.");
+        }
+
+        var modelDir = Path.GetFullPath(this.TextModelDir);
+
+        if (allowIO)
+        {
+            if (!Directory.Exists(modelDir))
+            {
+                throw new ConfigurationException($"Onnx: {this.TextModelDir} does not exist.");
+            }
+
+            if (Directory.GetFiles(modelDir).Length == 0)
+            {
+                throw new ConfigurationException($"Onnx: {this.TextModelDir} is an empty directory.");
+            }
+
+            var modelFiles = Directory.GetFiles(modelDir)
+                .Where(file => string.Equals(Path.GetExtension(file), ".ONNX", StringComparison.OrdinalIgnoreCase));
+
+            if (modelFiles == null)
+            {
+                throw new ConfigurationException($"Onnx: {this.TextModelDir} does not contain a valid .ONNX model.");
+            }
+        }
+
+        if (this.SearchType == OnnxSearchType.GreedySearch)
+        {
+            if (this.NumBeams != 1)
+            {
+                throw new ConfigurationException($"Onnx: {nameof(this.NumBeams)} is only used with Beam Search. Change {nameof(this.NumBeams)} to 1, or change {nameof(this.SearchType)} to BeamSearch.");
+            }
+
+            if (this.EarlyStopping != false)
+            {
+                throw new ConfigurationException($"Onnx: {nameof(this.EarlyStopping)} is only used with Beam Search. Change {nameof(this.EarlyStopping)} to false, or change {nameof(this.SearchType)} to BeamSearch.");
+            }
+        }
+
+        if (this.SearchType == OnnxSearchType.BeamSearch)
+        {
+            if (this.NumBeams == null)
+            {
+                throw new ConfigurationException($"Onnx: {nameof(this.NumBeams)} is required for Beam Search. Change {nameof(this.NumBeams)} to a value >= 1, or change the {nameof(this.SearchType)}.");
+            }
+        }
+
+        if (this.SearchType == OnnxSearchType.TopN)
+        {
+            if (this.NumBeams != null)
+            {
+                throw new ConfigurationException($"Onnx: {nameof(this.NumBeams)} isn't required with TopN Search. Change {nameof(this.NumBeams)} to null, or change the {nameof(this.SearchType)}.");
+            }
+
+            if (this.EarlyStopping != false)
+            {
+                throw new ConfigurationException($"Onnx: {nameof(this.EarlyStopping)} is only used with Beam Search. Change {nameof(this.EarlyStopping)} to false, or change {nameof(this.SearchType)} to BeamSearch.");
+            }
+        }
+    }
+}
diff --git a/extensions/ONNX/Onnx/OnnxTextGenerator.cs b/extensions/ONNX/Onnx/OnnxTextGenerator.cs
new file mode 100644
index 000000000..f0dbab49c
--- /dev/null
+++ b/extensions/ONNX/Onnx/OnnxTextGenerator.cs
@@ -0,0 +1,190 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.IO;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Extensions.Logging;
+using Microsoft.KernelMemory.AI.OpenAI;
+using Microsoft.KernelMemory.Diagnostics;
+using Microsoft.ML.OnnxRuntimeGenAI;
+using static Microsoft.KernelMemory.OnnxConfig;
+
+namespace Microsoft.KernelMemory.AI.Onnx;
+
+/// <summary>
+/// Text generator based on ONNX models, via OnnxRuntimeGenAi
+/// See https://github.com/microsoft/onnxruntime-genai
+/// </summary>
+[Experimental("KMEXP01")]
+public sealed class OnnxTextGenerator : ITextGenerator, IDisposable
+{
+    /// <summary>
+    /// The ONNX Model used for text generation
+    /// </summary>
+    private readonly Model _model;
+
+    /// <summary>
+    /// Tokenizer used with the Onnx Generator and Model classes to produce tokens.
+    /// This has the potential to contain a null value, depending on the contents of the Model Directory.
+    /// </summary>
+    private readonly Tokenizer? _tokenizer = default;
+
+    /// <summary>
+    /// Tokenizer used for GetTokens() and CountTokens()
+    /// </summary>
+    private readonly ITextTokenizer _textTokenizer;
+
+    private readonly ILogger<OnnxTextGenerator> _log;
+
+    private readonly OnnxConfig _config;
+
+    /// <inheritdoc/>
+    public int MaxTokenTotal { get; internal set; }
+
+    /// <summary>
+    /// Create a new instance
+    /// </summary>
+    /// <param name="config">Configuration settings</param>
+    /// <param name="textTokenizer">Text Tokenizer</param>
+    /// <param name="loggerFactory">Application Logger instance</param>
+    public OnnxTextGenerator(
+        OnnxConfig config,
+        ITextTokenizer? textTokenizer = null,
+        ILoggerFactory? loggerFactory = null)
+    {
+        this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger<OnnxTextGenerator>();
+        if (textTokenizer == null)
+        {
+            this._log.LogWarning(
+                "Tokenizer not specified, will use {0}. The token count might be incorrect, causing unexpected errors",
+                nameof(GPT4oTokenizer));
+            textTokenizer = new GPT4oTokenizer();
+        }
+
+        config.Validate();
+        this._config = config;
+        this.MaxTokenTotal = (int)config.MaxTokens;
+        this._textTokenizer = textTokenizer;
+
+        var modelDir = Path.GetFullPath(config.TextModelDir);
+        var modelFile = Directory.GetFiles(modelDir)
+            .FirstOrDefault(file => string.Equals(Path.GetExtension(file), ".ONNX", StringComparison.OrdinalIgnoreCase));
+
+        this._log.LogDebug("Loading Onnx model: {1} from directory {0}", modelDir, Path.GetFileNameWithoutExtension(modelFile));
+        this._model = new Model(config.TextModelDir);
+        this._tokenizer = new Tokenizer(this._model);
+        this._log.LogDebug("Onnx model loaded");
+    }
+
+    /// <inheritdoc/>
+    public async IAsyncEnumerable<string> GenerateTextAsync(
+        string prompt,
+        TextGenerationOptions? options = null,
+        [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        var tokens = this._tokenizer?.Encode(prompt);
+        using var generatorParams = new GeneratorParams(this._model);
+
+        generatorParams.SetSearchOption("max_length", this.MaxTokenTotal);
+        generatorParams.SetSearchOption("min_length", this._config.MinLength);
+        generatorParams.SetSearchOption("num_return_sequences", this._config.ResultsPerPrompt);
+        generatorParams.SetSearchOption("repetition_penalty", this._config.RepetitionPenalty);
+        generatorParams.SetSearchOption("length_penalty", this._config.LengthPenalty);
+        generatorParams.SetSearchOption("temperature", 0);
+
+        if (options != null)
+        {
+            generatorParams.SetSearchOption("num_return_sequences", options.ResultsPerPrompt);
+            generatorParams.SetSearchOption("temperature", options.Temperature);
+
+            if (options.MaxTokens > 0)
+            {
+                generatorParams.SetSearchOption("max_length", (int)options.MaxTokens);
+            }
+        }
+
+        switch (this._config.SearchType)
+        {
+            case OnnxSearchType.BeamSearch:
+                generatorParams.SetSearchOption("do_sample", false);
+                generatorParams.SetSearchOption("early_stopping", this._config.EarlyStopping);
+
+                if (this._config.NumBeams != null)
+                {
+                    generatorParams.SetSearchOption("num_beams", (double)this._config.NumBeams);
+                }
+
+                break;
+
+            case OnnxSearchType.TopN:
+                generatorParams.SetSearchOption("do_sample", true);
+                generatorParams.SetSearchOption("top_k", this._config.TopK);
+
+                generatorParams.SetSearchOption("top_p", options is { NucleusSampling: > 0 and <= 1 }
+                    ? options.NucleusSampling
+                    : this._config.NucleusSampling);
+
+                break;
+
+            default:
+
+                generatorParams.SetSearchOption("do_sample", false);
+
+                if (this._config.NumBeams != null)
+                {
+                    generatorParams.SetSearchOption("num_beams", (double)this._config.NumBeams);
+                }
+
+                break;
+        }
+
+        generatorParams.SetInputSequences(tokens);
+
+        using (var generator = new Generator(this._model, generatorParams))
+        {
+            List<int> outputTokens = new();
+
+            while (!generator.IsDone() && cancellationToken.IsCancellationRequested == false)
+            {
+                generator.ComputeLogits();
+                generator.GenerateNextToken();
+
+                outputTokens.AddRange(generator.GetSequence(0));
+
+                if (outputTokens.Count > 0 && this._tokenizer != null)
+                {
+                    var newToken = outputTokens[^1];
+                    yield return this._tokenizer.Decode(new int[] { newToken });
+                }
+            }
+        }
+
+        await Task.CompletedTask.ConfigureAwait(false);
+    }
+
+    /// <inheritdoc/>
+    public int CountTokens(string text)
+    {
+        // TODO: Implement with _tokenizer and remove _textTokenizer
+        return this._textTokenizer.CountTokens(text);
+    }
+
+    /// <inheritdoc/>
+    public IReadOnlyList<string> GetTokens(string text)
+    {
+        // TODO: Implement with _tokenizer and remove _textTokenizer
+        return this._textTokenizer.GetTokens(text);
+    }
+
+    /// <inheritdoc/>
+    public void Dispose()
+    {
+        this._model?.Dispose();
+        this._tokenizer?.Dispose();
+    }
+}
diff --git a/extensions/ONNX/README.md b/extensions/ONNX/README.md
new file mode 100644
index 000000000..2ab365705
--- /dev/null
+++ b/extensions/ONNX/README.md
@@ -0,0 +1,25 @@
+# Kernel Memory with ONNX
+
+[![Nuget package](https://img.shields.io/nuget/v/Microsoft.KernelMemory.AI.Onnx)](https://www.nuget.org/packages/Microsoft.KernelMemory.AI.Onnx/)
+[![Discord](https://img.shields.io/discord/1063152441819942922?label=Discord&logo=discord&logoColor=white&color=d82679)](https://aka.ms/KMdiscord)
+
+This project contains the
+[ONNX](https://onnxruntime.ai/docs/genai/)
+LLM connector to access to LLM models via Onnx service to generate text.
+
+Sample code:
+
+```csharp
+var config = new OnnxConfig
+{
+    ModelPath = "C:\\....\\Phi-3-mini-128k-instruct-onnx\\....\\cpu-int4-rtn-block-32"
+};
+
+var memory = new KernelMemoryBuilder()
+    .WithOnnxTextGeneration(config)
+    .Build();
+
+await memory.ImportTextAsync("Today is October 32nd, 2476");
+
+var answer = await memory.AskAsync("What's the current date (don't check for validity)?");
+```
diff --git a/service/tests/TestHelpers/BaseFunctionalTestCase.cs b/service/tests/TestHelpers/BaseFunctionalTestCase.cs
index 856bde08c..d73110713 100644
--- a/service/tests/TestHelpers/BaseFunctionalTestCase.cs
+++ b/service/tests/TestHelpers/BaseFunctionalTestCase.cs
@@ -31,6 +31,7 @@ public abstract class BaseFunctionalTestCase : IDisposable
     protected readonly SimpleVectorDbConfig SimpleVectorDbConfig;
     protected readonly LlamaSharpConfig LlamaSharpConfig;
     protected readonly ElasticsearchConfig ElasticsearchConfig;
+    protected readonly OnnxConfig OnnxConfig;
 
     // IMPORTANT: install Xunit.DependencyInjection package
     protected BaseFunctionalTestCase(IConfiguration cfg, ITestOutputHelper output)
@@ -50,6 +51,7 @@ protected BaseFunctionalTestCase(IConfiguration cfg, ITestOutputHelper output)
         this.SimpleVectorDbConfig = cfg.GetSection("KernelMemory:Services:SimpleVectorDb").Get<SimpleVectorDbConfig>() ?? new();
         this.LlamaSharpConfig = cfg.GetSection("KernelMemory:Services:LlamaSharp").Get<LlamaSharpConfig>() ?? new();
         this.ElasticsearchConfig = cfg.GetSection("KernelMemory:Services:Elasticsearch").Get<ElasticsearchConfig>() ?? new();
+        this.OnnxConfig = cfg.GetSection("KernelMemory:Services:Onnx").Get<OnnxConfig>() ?? new();
     }
 
     protected IKernelMemory GetMemoryWebClient()
diff --git a/service/tests/TestHelpers/TestHelpers.csproj b/service/tests/TestHelpers/TestHelpers.csproj
index 3996f22f9..b0b991041 100644
--- a/service/tests/TestHelpers/TestHelpers.csproj
+++ b/service/tests/TestHelpers/TestHelpers.csproj
@@ -16,6 +16,7 @@
         <ProjectReference Include="..\..\..\clients\dotnet\SemanticKernelPlugin\SemanticKernelPlugin.csproj" />
         <ProjectReference Include="..\..\..\clients\dotnet\WebClient\WebClient.csproj" />
         <ProjectReference Include="..\..\..\extensions\LlamaSharp\LlamaSharp\LlamaSharp.csproj" />
+        <ProjectReference Include="..\..\..\extensions\ONNX\Onnx\Onnx.csproj" />
     </ItemGroup>
 
     <ItemGroup>