diff --git a/Directory.Build.props b/Directory.Build.props index d9a0b701a..a7a53de17 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -2,7 +2,7 @@ - 0.51.0 + 0.60.0 12 diff --git a/README.md b/README.md index d6e066f72..77e71699c 100644 --- a/README.md +++ b/README.md @@ -33,52 +33,17 @@ This repository presents best practices and a reference architecture for memory AI and LLMs application scenarios. Please note that **the provided code serves as a demonstration** and is **not an officially supported** Microsoft offering. -## Kernel Memory (KM) and Semantic Memory (SM) +# Synchronous Memory API (aka "serverless") -**Kernel Memory (KM) is a service** built on the feedback received and lessons learned -from developing Semantic Kernel (SK) and Semantic Memory (SM). It provides several -features that would otherwise have to be developed manually, such as storing files, -extracting text from files, providing a framework to secure users' data, etc. -The KM codebase is entirely in .NET, which eliminates the need to write and maintain -features in multiple languages. As a service, **KM can be used from any language, tool, -or platform, e.g. browser extensions and ChatGPT assistants.** - -**Semantic Memory (SM) is a library for C#, Python, and Java** that wraps direct calls -to databases and supports vector search. It was developed as part of the Semantic -Kernel (SK) project and serves as the first public iteration of long-term memory. -The core library is maintained in three languages, while the list of supported -storage engines (known as "connectors") varies across languages. - -Here's comparison table: - -| Feature | Kernel Memory | Semantic Memory | -|-----------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------| -| Data formats | Web pages, PDF, Images, Word, PowerPoint, Excel, Markdown, Text, JSON, HTML | Text only | -| Search | Cosine similarity, Hybrid search with filters (AND/OR conditions) | Cosine similarity | -| Language support | Any language, command line tools, browser extensions, low-code/no-code apps, chatbots, assistants, etc. | C#, Python, Java | -| Storage engines | [Azure AI Search](https://azure.microsoft.com/products/ai-services/ai-search), [Elasticsearch](https://www.nuget.org/packages/FreeMindLabs.KernelMemory.Elasticsearch), [MongoDB Atlas](https://www.mongodb.com/atlas/database), [Postgres+pgvector](https://github.com/microsoft/kernel-memory/extensions/postgres), [Qdrant](https://qdrant.tech), [Redis](https://redis.io), [MSSQL Server](https://www.nuget.org/packages/KernelMemory.MemoryStorage.SqlServer), In memory KNN, On disk KNN. In progress: Azure Cosmos DB for MongoDB vCore, [Chroma](https://www.trychroma.com) | Azure AI Search, Chroma, DuckDB, Kusto, Milvus, MongoDB, Pinecone, Postgres, Qdrant, Redis, SQLite, Weaviate | -| File storage | Disk, [Azure Blobs](https://learn.microsoft.com/azure/storage/blobs/storage-blobs-introduction), [MongoDB Atlas](https://www.mongodb.com/atlas/database), In memory (volatile) | - | -| RAG | Yes, with sources lookup | - | -| Summarization | Yes | - | -| OCR | Yes via [Azure Document Intelligence](https://azure.microsoft.com/products/ai-services/ai-document-intelligence) | - | -| Security Filters | Yes | - | -| Large document ingestion | Yes, including async processing using queues ([Azure Queues](https://learn.microsoft.com/azure/storage/queues/storage-queues-introduction), [RabbitMQ](https://www.rabbitmq.com), File based or In memory queues) | - | -| Document storage | Yes | - | -| Custom storage schema | some DBs | - | -| Vector DBs with internal embedding | Yes | - | -| Concurrent write to multiple vector DBs | Yes | - | -| LLMs | [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/concepts/models), [OpenAI](https://platform.openai.com/docs/models), [LLamaSharp](https://github.com/SciSharp/LLamaSharp) via [llama.cpp](https://github.com/ggerganov/llama.cpp), [LM Studio](https://lmstudio.ai/), Anthropic, Semantic Kernel connectors | Azure OpenAI, OpenAI, Gemini, Hugging Face, ONNX, custom ones, etc. -| LLMs with dedicated tokenization | Yes | No | -| Cloud deployment | Yes | - | -| Web service with OpenAPI | Yes | - | - -# Kernel Memory in serverless mode - -Kernel Memory works and scales at best when running as a **Web Service**, allowing to +Kernel Memory works and scales at best when running as an asynchronous **Web Service**, allowing to ingest thousands of documents and information without blocking your app. -However, you can use Kernel Memory also in serverless mode, embedding the `MemoryServerless` -class in your .NET app. +However, Kernel Memory can also run in serverless mode, embedding `MemoryServerless` +class instance in .NET backend/console/desktop apps in synchronous mode. This approach +works as well as in ASP.NET Web APIs and Azure Functions. Each request is processed +immediately, although calling clients are responsible for handling transient errors. + +![image](docs/infra-sync.png) > ### Importing documents into your Kernel Memory can be as simple as this: > @@ -108,7 +73,7 @@ class in your .NET app. > var answer2 = await memory.AskAsync("what's the project timeline?", filter: new MemoryFilter().ByTag("user", "Blake")); > ``` -The code leverages the default documents ingestion pipeline: +The example leverages the default documents ingestion pipeline: 1. Extract text: recognize the file format and extract the information 2. Partition the text in small chunks, to optimize search @@ -117,51 +82,41 @@ The code leverages the default documents ingestion pipeline: [Azure AI Search](https://learn.microsoft.com/azure/search/vector-search-overview), [Qdrant](https://qdrant.tech/) or other DBs. -Documents are organized by users, safeguarding their private information. -Furthermore, memories can be categorized and structured using **tags**, enabling -efficient search and retrieval through faceted navigation. - -# Data lineage, citations - -All memories and answers are fully correlated to the data provided. When -producing an answer, Kernel Memory includes all the information needed -to verify its accuracy: - -```csharp -await memory.ImportFileAsync("NASA-news.pdf"); - -var answer = await memory.AskAsync("Any news from NASA about Orion?"); - -Console.WriteLine(answer.Result + "/n"); - -foreach (var x in answer.RelevantSources) -{ - Console.WriteLine($" * {x.SourceName} -- {x.Partitions.First().LastUpdate:D}"); -} -``` +In the example, memories are organized by users using tags, safeguarding private information. +Furthermore, memories can be categorized and structured using **tags**, enabling efficient +search and retrieval through faceted navigation. -> Yes, there is news from NASA about the Orion spacecraft. NASA has invited the -> media to see a new test version of the Orion spacecraft and the hardware that -> will be used to recover the capsule and astronauts upon their return from -> space during the Artemis II mission. The event is scheduled to take place at -> Naval Base San Diego on Wednesday, August 2, at 11 a.m. PDT. Personnel from -> NASA, the U.S. Navy, and the U.S. Air Force will be available to speak with -> the media. Teams are currently conducting tests in the Pacific Ocean to -> demonstrate and evaluate the processes, procedures, and hardware for recovery -> operations for crewed Artemis missions. These tests will help prepare the -> team for Artemis II, which will be NASA's first crewed mission under the -> Artemis program. The Artemis II crew, consisting of NASA astronauts Reid -> Wiseman, Victor Glover, and Christina Koch, and Canadian Space Agency -> astronaut Jeremy Hansen, will participate in recovery testing at sea next -> year. For more information about the Artemis program, you can visit the NASA -> website. +> ### Data lineage, citations, referencing sources: +> +> All memories and answers are fully correlated to the data provided. When +> producing an answer, Kernel Memory includes all the information needed +> to verify its accuracy: +> +> ```csharp +> await memory.ImportFileAsync("NASA-news.pdf"); > -> - **NASA-news.pdf -- Tuesday, August 1, 2023** +> var answer = await memory.AskAsync("Any news from NASA about Orion?"); +> +> Console.WriteLine(answer.Result + "/n"); +> +> foreach (var x in answer.RelevantSources) +> { +> Console.WriteLine($" * {x.SourceName} -- {x.Partitions.First().LastUpdate:D}"); +> } +> ``` +> +> > Yes, there is news from NASA about the Orion spacecraft. NASA has invited the +> > media to see a new test version [......] For more information about the Artemis program, +> > you can visit the NASA website. +> > +> > - **NASA-news.pdf -- Tuesday, August 1, 2023** -## Using Kernel Memory Service +# Memory as a Service - Asynchronous API Depending on your scenarios, you might want to run all the code **locally -inside your process, or remotely through an asynchronous service.** +inside your process, or remotely through an asynchronous and scalable service.** + +![image](docs/infra-async.png) If you're importing small files, and need only C# and can block the process during the import, local-in-process execution can be fine, using @@ -169,16 +124,16 @@ the **MemoryServerless** seen above. However, if you are in one of these scenarios: -* I'd just like a web service to import data and send queries to answer -* My app is written in **TypeScript, Java, Rust, or some other language** -* I want to define **custom pipelines mixing multiple languages** - like Python, TypeScript, etc -* I'm importing **big documents that can require minutes to process**, and +- I'd just like a web service to import data and send queries to answer +- My app is written in **TypeScript, Java, Rust, or some other language** +- I'm importing **big documents that can require minutes to process**, and I don't want to block the user interface -* I need memory import to **run independently, supporting failures and retry +- I need memory import to **run independently, supporting failures and retry logic** +- I want to define **custom pipelines mixing multiple languages** + like Python, TypeScript, etc -then you can deploy Kernel Memory as a service, plugging in the +then you can deploy Kernel Memory as a backend service, plugging in the default handlers, or your custom Python/TypeScript/Java/etc. handlers, and leveraging the asynchronous non-blocking memory encoding process, sending documents and asking questions using the **MemoryWebClient**. @@ -186,6 +141,45 @@ sending documents and asking questions using the **MemoryWebClient**. [Here](service/Service/README.md) you can find a complete set of instruction about [how to run the Kernel Memory service](service/Service/README.md). +# Kernel Memory (KM) and SK Semantic Memory (SM) + +**Kernel Memory (KM) is a service** built on the feedback received and lessons learned +from developing Semantic Kernel (SK) and Semantic Memory (SM). It provides several +features that would otherwise have to be developed manually, such as storing files, +extracting text from files, providing a framework to secure users' data, etc. +The KM codebase is entirely in .NET, which eliminates the need to write and maintain +features in multiple languages. As a service, **KM can be used from any language, tool, +or platform, e.g. browser extensions and ChatGPT assistants.** + +**Semantic Memory (SM) is a library for C#, Python, and Java** that wraps direct calls +to databases and supports vector search. It was developed as part of the Semantic +Kernel (SK) project and serves as the first public iteration of long-term memory. +The core library is maintained in three languages, while the list of supported +storage engines (known as "connectors") varies across languages. + +Here's comparison table: + +| Feature | Kernel Memory | Semantic Memory | +| --------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------ | +| Data formats | Web pages, PDF, Images, Word, PowerPoint, Excel, Markdown, Text, JSON, HTML | Text only | +| Search | Cosine similarity, Hybrid search with filters (AND/OR conditions) | Cosine similarity | +| Language support | Any language, command line tools, browser extensions, low-code/no-code apps, chatbots, assistants, etc. | C#, Python, Java | +| Storage engines | [Azure AI Search](https://azure.microsoft.com/products/ai-services/ai-search), [Elasticsearch](https://www.nuget.org/packages/FreeMindLabs.KernelMemory.Elasticsearch), [MongoDB Atlas](https://www.mongodb.com/atlas/database), [Postgres+pgvector](https://github.com/microsoft/kernel-memory/extensions/postgres), [Qdrant](https://qdrant.tech), [Redis](https://redis.io), [SQL Server](https://www.nuget.org/packages/KernelMemory.MemoryStorage.SqlServer), In memory KNN, On disk KNN. | Azure AI Search, Chroma, DuckDB, Kusto, Milvus, MongoDB, Pinecone, Postgres, Qdrant, Redis, SQLite, Weaviate | +| File storage | Disk, [Azure Blobs](https://learn.microsoft.com/azure/storage/blobs/storage-blobs-introduction), [MongoDB Atlas](https://www.mongodb.com/atlas/database), In memory (volatile) | - | +| RAG | Yes, with sources lookup | - | +| Summarization | Yes | - | +| OCR | Yes via [Azure Document Intelligence](https://azure.microsoft.com/products/ai-services/ai-document-intelligence) | - | +| Security Filters | Yes | - | +| Large document ingestion | Yes, including async processing using queues ([Azure Queues](https://learn.microsoft.com/azure/storage/queues/storage-queues-introduction), [RabbitMQ](https://www.rabbitmq.com), File based or In memory queues) | - | +| Document storage | Yes | - | +| Custom storage schema | some DBs | - | +| Vector DBs with internal embedding | Yes | - | +| Concurrent write to multiple vector DBs | Yes | - | +| LLMs | [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/concepts/models), [OpenAI](https://platform.openai.com/docs/models), [LLamaSharp](https://github.com/SciSharp/LLamaSharp) via [llama.cpp](https://github.com/ggerganov/llama.cpp), [LM Studio](https://lmstudio.ai/), Anthropic, Semantic Kernel connectors | Azure OpenAI, OpenAI, Gemini, Hugging Face, ONNX, custom ones, etc. | +| LLMs with dedicated tokenization | Yes | No | +| Cloud deployment | Yes | - | +| Web service with OpenAPI | Yes | - | + ## Quick test using the Docker image If you want to give the service a quick test, use the following command @@ -214,7 +208,7 @@ on macOS/Linux: docker run --volume ./appsettings.Development.json:/app/appsettings.Production.json -it --rm -p 9001:9001 kernelmemory/service -### To import files using Kernel Memory **web service**, use `MemoryWebClient`: +# Import files using KM web service and `MemoryWebClient` > ```csharp > #reference clients/WebClient/WebClient.csproj @@ -232,15 +226,16 @@ on macOS/Linux: > .AddTag("fiscalYear", "2023")); > ``` -### Getting answers via the web service +# Get answers via the web service > ``` > curl http://127.0.0.1:9001/ask -d'{"query":"Any news from NASA about Orion?"}' -H 'Content-Type: application/json' > ``` +> > ```json > { > "Query": "Any news from NASA about Orion?", -> "Text": "Yes, there is news from NASA about the Orion spacecraft. NASA has invited the media to see a new test version of the Orion spacecraft and the hardware that will be used to recover the capsule and astronauts upon their return from space during the Artemis II mission. The event is scheduled to take place at Naval Base San Diego on August 2nd at 11 a.m. PDT. Personnel from NASA, the U.S. Navy, and the U.S. Air Force will be available to speak with the media. Teams are currently conducting tests in the Pacific Ocean to demonstrate and evaluate the processes, procedures, and hardware for recovery operations for crewed Artemis missions. These tests will help prepare the team for Artemis II, which will be NASA's first crewed mission under the Artemis program. The Artemis II crew, consisting of NASA astronauts Reid Wiseman, Victor Glover, and Christina Koch, and Canadian Space Agency astronaut Jeremy Hansen, will participate in recovery testing at sea next year. For more information about the Artemis program, you can visit the NASA website.", +> "Text": "Yes, there is news from NASA about the Orion spacecraft. NASA has invited the media to see a new test version [......] For more information about the Artemis program, you can visit the NASA website.", > "RelevantSources": [ > { > "Link": "...", @@ -248,7 +243,7 @@ on macOS/Linux: > "SourceName": "file5-NASA-news.pdf", > "Partitions": [ > { -> "Text": "Skip to main content\nJul 28, 2023\nMEDIA ADVISORY M23-095\nNASA Invites Media to See Recovery Craft for\nArtemis Moon Mission\n(/sites/default/files/thumbnails/image/ksc-20230725-ph-fmx01_0003orig.jpg)\nAboard the USS John P. Murtha, NASA and Department of Defense personnel practice recovery operations for Artemis II in July. A\ncrew module test article is used to help verify the recovery team will be ready to recovery the Artemis II crew and the Orion spacecraft.\nCredits: NASA/Frank Michaux\nMedia are invited to see the new test version of NASA’s Orion spacecraft and the hardware teams will use\nto recover the capsule and astronauts upon their return from space during the Artemis II\n(http://www.nasa.gov/artemis-ii) mission. The event will take place at 11 a.m. PDT on Wednesday, Aug. 2,\nat Naval Base San Diego.\nPersonnel involved in recovery operations from NASA, the U.S. Navy, and the U.S. Air Force will be\navailable to speak with media.\nU.S. media interested in attending must RSVP by 4 p.m., Monday, July 31, to the Naval Base San Diego\nPublic Affairs (mailto:nbsd.pao@us.navy.mil) or 619-556-7359.\nOrion Spacecraft (/exploration/systems/orion/index.html)\nNASA Invites Media to See Recovery Craft for Artemis Moon Miss... https://www.nasa.gov/press-release/nasa-invites-media-to-see-recov...\n1 of 3 7/28/23, 4:51 PMTeams are currently conducting the first in a series of tests in the Pacific Ocean to demonstrate and\nevaluate the processes, procedures, and hardware for recovery operations (https://www.nasa.gov\n/exploration/systems/ground/index.html) for crewed Artemis missions. The tests will help prepare the\nteam for Artemis II, NASA’s first crewed mission under Artemis that will send four astronauts in Orion\naround the Moon to checkout systems ahead of future lunar missions.\nThe Artemis II crew – NASA astronauts Reid Wiseman, Victor Glover, and Christina Koch, and CSA\n(Canadian Space Agency) astronaut Jeremy Hansen – will participate in recovery testing at sea next year.\nFor more information about Artemis, visit:\nhttps://www.nasa.gov/artemis (https://www.nasa.gov/artemis)\n-end-\nRachel Kraft\nHeadquarters, Washington\n202-358-1100\nrachel.h.kraft@nasa.gov (mailto:rachel.h.kraft@nasa.gov)\nMadison Tuttle\nKennedy Space Center, Florida\n321-298-5868\nmadison.e.tuttle@nasa.gov (mailto:madison.e.tuttle@nasa.gov)\nLast Updated: Jul 28, 2023\nEditor: Claire O’Shea\nTags:  Artemis (/artemisprogram),Ground Systems (http://www.nasa.gov/exploration/systems/ground\n/index.html),Kennedy Space Center (/centers/kennedy/home/index.html),Moon to Mars (/topics/moon-to-\nmars/),Orion Spacecraft (/exploration/systems/orion/index.html)\nNASA Invites Media to See Recovery Craft for Artemis Moon Miss... https://www.nasa.gov/press-release/nasa-invites-media-to-see-recov...\n2 of 3 7/28/23, 4:51 PM", +> "Text": "Skip to main content\nJul 28, 2023\nMEDIA ADVISORY M23-095\nNASA Invites Media to See Recovery Craft for\nArtemis Moon Mission\n(/sites/default/files/thumbnails/image/ksc-20230725-ph-fmx01_0003orig.jpg)\nAboard the [......] to Mars (/topics/moon-to-\nmars/),Orion Spacecraft (/exploration/systems/orion/index.html)\nNASA Invites Media to See Recovery Craft for Artemis Moon Miss... https://www.nasa.gov/press-release/nasa-invites-media-to-see-recov...\n2 of 3 7/28/23, 4:51 PM", > "Relevance": 0.8430657, > "SizeInTokens": 863, > "LastUpdate": "2023-08-01T08:15:02-07:00" @@ -261,7 +256,7 @@ on macOS/Linux: You can find a [full example here](examples/001-dotnet-WebClient/README.md). -## Custom memory ingestion pipelines +# Custom memory ingestion pipelines On the other hand, if you need a custom data pipeline, you can also customize the steps, which will be handled by your custom business logic: @@ -287,7 +282,7 @@ await memory.ImportDocumentAsync( steps: new[] { "step1", "step2", "step3" }); ``` -# Web API specs +# Web API specs with OpenAI swagger The API schema is available at http://127.0.0.1:9001/swagger/index.html when running the service locally with OpenAPI enabled. @@ -298,8 +293,8 @@ running the service locally with OpenAPI enabled. 1. [Collection of Jupyter notebooks with various scenarios](examples/000-notebooks) 2. [Using Kernel Memory web service to upload documents and answer questions](examples/001-dotnet-WebClient) -4. [Importing files and asking question without running the service (serverless mode)](examples/002-dotnet-Serverless) -3. [Using KM Plugin for Semantic Kernel](examples/003-dotnet-SemanticKernel-plugin) +3. [Importing files and asking question without running the service (serverless mode)](examples/002-dotnet-Serverless) +4. [Using KM Plugin for Semantic Kernel](examples/003-dotnet-SemanticKernel-plugin) 5. [Processing files with custom logic (custom handlers) in serverless mode](examples/004-dotnet-serverless-custom-pipeline) 6. [Processing files with custom logic (custom handlers) in asynchronous mode](examples/005-dotnet-AsyncMemoryCustomPipeline) 7. [Upload files and ask questions from command line using curl](examples/006-curl-calling-webservice) @@ -336,27 +331,27 @@ running the service locally with OpenAPI enabled. ### .NET packages -* **Microsoft.KernelMemory.WebClient:** .NET web client to call a running instance of Kernel Memory web service. +- **Microsoft.KernelMemory.WebClient:** .NET web client to call a running instance of Kernel Memory web service. [![Nuget package](https://img.shields.io/nuget/vpre/Microsoft.KernelMemory.WebClient)](https://www.nuget.org/packages/Microsoft.KernelMemory.WebClient/) [![Example code](https://img.shields.io/badge/example-code-blue)](examples/001-dotnet-WebClient) -* **Microsoft.KernelMemory.Core:** Kernel Memory core library including all extensions, can be used to build custom pipelines and handlers, contains +- **Microsoft.KernelMemory.Core:** Kernel Memory core library including all extensions, can be used to build custom pipelines and handlers, contains also the serverless client to use memory in a synchronous way without the web service. [![Nuget package](https://img.shields.io/nuget/vpre/Microsoft.KernelMemory.Core)](https://www.nuget.org/packages/Microsoft.KernelMemory.Core/) [![Example code](https://img.shields.io/badge/example-code-blue)](examples/002-dotnet-Serverless) -* **Microsoft.KernelMemory.Service.AspNetCore:** an extension to load Kernel Memory into your ASP.NET apps. +- **Microsoft.KernelMemory.Service.AspNetCore:** an extension to load Kernel Memory into your ASP.NET apps. [![Nuget package](https://img.shields.io/nuget/vpre/Microsoft.KernelMemory.Service.AspNetCore)](https://www.nuget.org/packages/Microsoft.KernelMemory.Service.AspNetCore/) [![Example code](https://img.shields.io/badge/example-code-blue)](examples/204-dotnet-ASP.NET-MVC-integration) -* **Microsoft.KernelMemory.SemanticKernelPlugin:** a Memory plugin for Semantic Kernel, +- **Microsoft.KernelMemory.SemanticKernelPlugin:** a Memory plugin for Semantic Kernel, replacing the original Semantic Memory available in SK. [![Nuget package](https://img.shields.io/nuget/vpre/Microsoft.KernelMemory.SemanticKernelPlugin)](https://www.nuget.org/packages/Microsoft.KernelMemory.SemanticKernelPlugin/) - [![Example code](https://img.shields.io/badge/example-code-blue)](examples/011-dotnet-using-MemoryPlugin) + [![Example code](https://img.shields.io/badge/example-code-blue)](examples/003-dotnet-SemanticKernel-plugin) ### Packages for Python, Java and other languages @@ -375,26 +370,26 @@ We also welcome PR contributions to support more languages. githubcontrib --repo kernel-memory --owner microsoft --showlogin true --sortBy login --cols 6 --imagesize 110 --> -[aaronpowell](https://github.com/aaronpowell) |[afederici75](https://github.com/afederici75) |[alexibraimov](https://github.com/alexibraimov) |[alkampfergit](https://github.com/alkampfergit) |[amomra](https://github.com/amomra) |[anthonypuppo](https://github.com/anthonypuppo) | -:---: |:---: |:---: |:---: |:---: |:---: | -[aaronpowell](https://github.com/aaronpowell) |[afederici75](https://github.com/afederici75) |[alexibraimov](https://github.com/alexibraimov) |[alkampfergit](https://github.com/alkampfergit) |[amomra](https://github.com/amomra) |[anthonypuppo](https://github.com/anthonypuppo) | +| [aaronpowell](https://github.com/aaronpowell) | [afederici75](https://github.com/afederici75) | [alexibraimov](https://github.com/alexibraimov) | [alkampfergit](https://github.com/alkampfergit) | [amomra](https://github.com/amomra) | [anthonypuppo](https://github.com/anthonypuppo) | +| :----------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------: | +| [aaronpowell](https://github.com/aaronpowell) | [afederici75](https://github.com/afederici75) | [alexibraimov](https://github.com/alexibraimov) | [alkampfergit](https://github.com/alkampfergit) | [amomra](https://github.com/amomra) | [anthonypuppo](https://github.com/anthonypuppo) | -[cherchyk](https://github.com/cherchyk) |[coryisakson](https://github.com/coryisakson) |[crickman](https://github.com/crickman) |[dluc](https://github.com/dluc) |[DM-98](https://github.com/DM-98) |[GraemeJones104](https://github.com/GraemeJones104) | -:---: |:---: |:---: |:---: |:---: |:---: | -[cherchyk](https://github.com/cherchyk) |[coryisakson](https://github.com/coryisakson) |[crickman](https://github.com/crickman) |[dluc](https://github.com/dluc) |[DM-98](https://github.com/DM-98) |[GraemeJones104](https://github.com/GraemeJones104) | +| [cherchyk](https://github.com/cherchyk) | [coryisakson](https://github.com/coryisakson) | [crickman](https://github.com/crickman) | [dluc](https://github.com/dluc) | [DM-98](https://github.com/DM-98) | [GraemeJones104](https://github.com/GraemeJones104) | +| :-----------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------: | +| [cherchyk](https://github.com/cherchyk) | [coryisakson](https://github.com/coryisakson) | [crickman](https://github.com/crickman) | [dluc](https://github.com/dluc) | [DM-98](https://github.com/DM-98) | [GraemeJones104](https://github.com/GraemeJones104) | -[jurepurgar](https://github.com/jurepurgar) |[kbeaugrand](https://github.com/kbeaugrand) |[KSemenenko](https://github.com/KSemenenko) |[lecramr](https://github.com/lecramr) |[luismanez](https://github.com/luismanez) |[marcominerva](https://github.com/marcominerva) | -:---: |:---: |:---: |:---: |:---: |:---: | -[jurepurgar](https://github.com/jurepurgar) |[kbeaugrand](https://github.com/kbeaugrand) |[KSemenenko](https://github.com/KSemenenko) |[lecramr](https://github.com/lecramr) |[luismanez](https://github.com/luismanez) |[marcominerva](https://github.com/marcominerva) | +| [jurepurgar](https://github.com/jurepurgar) | [kbeaugrand](https://github.com/kbeaugrand) | [KSemenenko](https://github.com/KSemenenko) | [lecramr](https://github.com/lecramr) | [luismanez](https://github.com/luismanez) | [marcominerva](https://github.com/marcominerva) | +| :---------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------: | +| [jurepurgar](https://github.com/jurepurgar) | [kbeaugrand](https://github.com/kbeaugrand) | [KSemenenko](https://github.com/KSemenenko) | [lecramr](https://github.com/lecramr) | [luismanez](https://github.com/luismanez) | [marcominerva](https://github.com/marcominerva) | -[neel015](https://github.com/neel015) |[pascalberger](https://github.com/pascalberger) |[pawarsum12](https://github.com/pawarsum12) |[pradeepr-roboticist](https://github.com/pradeepr-roboticist) |[qihangnet](https://github.com/qihangnet) |[slapointe](https://github.com/slapointe) | -:---: |:---: |:---: |:---: |:---: |:---: | -[neel015](https://github.com/neel015) |[pascalberger](https://github.com/pascalberger) |[pawarsum12](https://github.com/pawarsum12) |[pradeepr-roboticist](https://github.com/pradeepr-roboticist) |[qihangnet](https://github.com/qihangnet) |[slapointe](https://github.com/slapointe) | +| [neel015](https://github.com/neel015) | [pascalberger](https://github.com/pascalberger) | [pawarsum12](https://github.com/pawarsum12) | [pradeepr-roboticist](https://github.com/pradeepr-roboticist) | [qihangnet](https://github.com/qihangnet) | [slapointe](https://github.com/slapointe) | +| :----------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------: | +| [neel015](https://github.com/neel015) | [pascalberger](https://github.com/pascalberger) | [pawarsum12](https://github.com/pawarsum12) | [pradeepr-roboticist](https://github.com/pradeepr-roboticist) | [qihangnet](https://github.com/qihangnet) | [slapointe](https://github.com/slapointe) | -[slorello89](https://github.com/slorello89) |[spenavajr](https://github.com/spenavajr) |[TaoChenOSU](https://github.com/TaoChenOSU) |[teresaqhoang](https://github.com/teresaqhoang) |[Valkozaur](https://github.com/Valkozaur) |[vicperdana](https://github.com/vicperdana) | -:---: |:---: |:---: |:---: |:---: |:---: | -[slorello89](https://github.com/slorello89) |[spenavajr](https://github.com/spenavajr) |[TaoChenOSU](https://github.com/TaoChenOSU) |[teresaqhoang](https://github.com/teresaqhoang) |[Valkozaur](https://github.com/Valkozaur) |[vicperdana](https://github.com/vicperdana) | +| [slorello89](https://github.com/slorello89) | [spenavajr](https://github.com/spenavajr) | [TaoChenOSU](https://github.com/TaoChenOSU) | [teresaqhoang](https://github.com/teresaqhoang) | [Valkozaur](https://github.com/Valkozaur) | [vicperdana](https://github.com/vicperdana) | +| :----------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------: | +| [slorello89](https://github.com/slorello89) | [spenavajr](https://github.com/spenavajr) | [TaoChenOSU](https://github.com/TaoChenOSU) | [teresaqhoang](https://github.com/teresaqhoang) | [Valkozaur](https://github.com/Valkozaur) | [vicperdana](https://github.com/vicperdana) | -[xbotter](https://github.com/xbotter) | -:---: | -[xbotter](https://github.com/xbotter) | +| [xbotter](https://github.com/xbotter) | +| :---------------------------------------------------------------------------------------------------------------------------: | +| [xbotter](https://github.com/xbotter) | diff --git a/docs/index.md b/docs/index.md index eaad31c40..1ed03772d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -79,7 +79,7 @@ Here's comparison table: * [Quickstart: test KM in few minutes](quickstart) * [**Memory service**, web clients and plugins](service) * [**Memory API**, memory ingestion and information retrieval](functions) -* [KM **Extensions**: vector DBs, AI models, Data formats, Orchestration, Content storage](extensions) +* [KM **Extensions**: vector DBs, AI models, Data formats, Orchestration, Document storage](extensions) * [Embedding **serverless** memory in .NET apps](serverless) * [**Security**, service and users](security) * [**How-to guides**, customizing KM and examples](how-to) diff --git a/docs/infra-async.png b/docs/infra-async.png new file mode 100644 index 000000000..7c9b86c29 Binary files /dev/null and b/docs/infra-async.png differ diff --git a/docs/infra-sync.png b/docs/infra-sync.png new file mode 100644 index 000000000..11cf78a46 Binary files /dev/null and b/docs/infra-sync.png differ diff --git a/docs/packages.md b/docs/packages.md index 3c6638082..29ba5084b 100644 --- a/docs/packages.md +++ b/docs/packages.md @@ -7,77 +7,28 @@ layout: default --- # .NET packages -* **Microsoft.KernelMemory.WebClient:** The web client library, can be used to call - a running instance of the Memory web service. .NET Standard 2.0 compatible. +* **Microsoft.KernelMemory.WebClient:** .NET web client to call a running instance of Kernel Memory web service. [![Nuget package](https://img.shields.io/nuget/vpre/Microsoft.KernelMemory.WebClient)](https://www.nuget.org/packages/Microsoft.KernelMemory.WebClient/) - [![Example code](https://img.shields.io/badge/example-code-blue)](examples/001-dotnet-WebClient) + [![Example code](https://img.shields.io/badge/example-code-blue)](https://github.com/microsoft/kernel-memory/tree/main/examples/001-dotnet-WebClient) -* **Microsoft.KernelMemory.SemanticKernelPlugin:** a Memory plugin for Semantic Kernel, - replacing the original Semantic Memory available in SK. .NET Standard 2.0 compatible. - - [![Nuget package](https://img.shields.io/nuget/vpre/Microsoft.KernelMemory.SemanticKernelPlugin)](https://www.nuget.org/packages/Microsoft.KernelMemory.SemanticKernelPlugin/) - [![Example code](https://img.shields.io/badge/example-code-blue)](examples/011-dotnet-using-MemoryPlugin) - -* **Microsoft.KernelMemory.Abstractions:** The internal interfaces and models - shared by all packages, used to extend KM to support third party services. - .NET Standard 2.0 compatible. - - [![Nuget package](https://img.shields.io/nuget/v/Microsoft.KernelMemory.Abstractions)](https://www.nuget.org/packages/Microsoft.KernelMemory.Abstractions/) - -* **Microsoft.KernelMemory.MemoryDb.AzureAISearch:** Memory storage using - **[Azure AI Search](extensions/AzureAISearch)**. - - [![Nuget package](https://img.shields.io/nuget/v/Microsoft.KernelMemory.MemoryDb.AzureAISearch)](https://www.nuget.org/packages/Microsoft.KernelMemory.MemoryDb.AzureAISearch/) - -* **Microsoft.KernelMemory.MemoryDb.Postgres:** Memory storage using - **[PostgreSQL](extensions/Postgres)**. - - [![Nuget package](https://img.shields.io/nuget/v/Microsoft.KernelMemory.MemoryDb.Postgres)](https://www.nuget.org/packages/Microsoft.KernelMemory.MemoryDb.Postgres/) - -* **Microsoft.KernelMemory.MemoryDb.Qdrant:** Memory storage using - **[Qdrant](extensions/Qdrant)**. - - [![Nuget package](https://img.shields.io/nuget/v/Microsoft.KernelMemory.MemoryDb.Qdrant)](https://www.nuget.org/packages/Microsoft.KernelMemory.MemoryDb.Qdrant/) - -* **Microsoft.KernelMemory.AI.AzureOpenAI:** Integration with **[Azure OpenAI](extensions/OpenAI)** LLMs. - - [![Nuget package](https://img.shields.io/nuget/v/Microsoft.KernelMemory.AI.AzureOpenAI)](https://www.nuget.org/packages/Microsoft.KernelMemory.AI.AzureOpenAI/) +* **Microsoft.KernelMemory.Core:** Kernel Memory core library including all extensions, can be used to build custom + pipelines and handlers, contains + also the serverless client to use memory in a synchronous way without the web service. -* **Microsoft.KernelMemory.AI.LlamaSharp:** Integration with **[LLama](extensions/LlamaSharp)** LLMs. - - [![Nuget package](https://img.shields.io/nuget/v/Microsoft.KernelMemory.AI.LlamaSharp)](https://www.nuget.org/packages/Microsoft.KernelMemory.AI.LlamaSharp/) - -* **Microsoft.KernelMemory.AI.OpenAI:** Integration with **[OpenAI](extensions/OpenAI)** LLMs. - - [![Nuget package](https://img.shields.io/nuget/v/Microsoft.KernelMemory.AI.OpenAI)](https://www.nuget.org/packages/Microsoft.KernelMemory.AI.OpenAI/) - -* **Microsoft.KernelMemory.DataFormats.AzureAIDocIntel:** Integration with - [Azure AI Document Intelligence](extensions/AzureAIDocIntel). - - [![Nuget package](https://img.shields.io/nuget/v/Microsoft.KernelMemory.DataFormats.AzureAIDocIntel)](https://www.nuget.org/packages/Microsoft.KernelMemory.DataFormats.AzureAIDocIntel/) - -* **Microsoft.KernelMemory.Orchestration.AzureQueues:** Ingestion and synthetic memory - pipelines via [Azure Queue Storage](extensions/AzureQueues). - - [![Nuget package](https://img.shields.io/nuget/v/Microsoft.KernelMemory.Orchestration.AzureQueues)](https://www.nuget.org/packages/Microsoft.KernelMemory.Orchestration.AzureQueues/) - -* **Microsoft.KernelMemory.Orchestration.RabbitMQ:** Ingestion and synthetic memory - pipelines via [RabbitMQ](extensions/RabbitMQ). - - [![Nuget package](https://img.shields.io/nuget/v/Microsoft.KernelMemory.Orchestration.RabbitMQ)](https://www.nuget.org/packages/Microsoft.KernelMemory.Orchestration.RabbitMQ/) + [![Nuget package](https://img.shields.io/nuget/vpre/Microsoft.KernelMemory.Core)](https://www.nuget.org/packages/Microsoft.KernelMemory.Core/) + [![Example code](https://img.shields.io/badge/example-code-blue)](https://github.com/microsoft/kernel-memory/tree/main/examples/002-dotnet-Serverless) -* **Microsoft.KernelMemory.ContentStorage.AzureBlobs:** Used to store content on - [Azure Storage Blobs](extensions/AzureBlobs). +* **Microsoft.KernelMemory.Service.AspNetCore:** an extension to load Kernel Memory into your ASP.NET apps. - [![Nuget package](https://img.shields.io/nuget/v/Microsoft.KernelMemory.ContentStorage.AzureBlobs)](https://www.nuget.org/packages/Microsoft.KernelMemory.ContentStorage.AzureBlobs/) + [![Nuget package](https://img.shields.io/nuget/vpre/Microsoft.KernelMemory.Service.AspNetCore)](https://www.nuget.org/packages/Microsoft.KernelMemory.Service.AspNetCore/) + [![Example code](https://img.shields.io/badge/example-code-blue)](https://github.com/microsoft/kernel-memory/tree/main/examples/204-dotnet-ASP.NET-MVC-integration) -* **Microsoft.KernelMemory.Core:** The core library, can be used to build custom - pipelines and handlers, and contains a serverless client to use memory in a - synchronous way, without the web service. .NET 6+. +* **Microsoft.KernelMemory.SemanticKernelPlugin:** a Memory plugin for Semantic Kernel, + replacing the original Semantic Memory available in SK. - [![Nuget package](https://img.shields.io/nuget/vpre/Microsoft.KernelMemory.Core)](https://www.nuget.org/packages/Microsoft.KernelMemory.Core/) - [![Example code](https://img.shields.io/badge/example-code-blue)](examples/002-dotnet-Serverless) + [![Nuget package](https://img.shields.io/nuget/vpre/Microsoft.KernelMemory.SemanticKernelPlugin)](https://www.nuget.org/packages/Microsoft.KernelMemory.SemanticKernelPlugin/) + [![Example code](https://img.shields.io/badge/example-code-blue)](https://github.com/microsoft/kernel-memory/tree/main/examples/003-dotnet-SemanticKernel-plugin) ### Packages for Python, Java and other languages diff --git a/examples/001-dotnet-WebClient/file9-settings.json b/examples/001-dotnet-WebClient/file9-settings.json index d517b90c1..44b616648 100644 --- a/examples/001-dotnet-WebClient/file9-settings.json +++ b/examples/001-dotnet-WebClient/file9-settings.json @@ -7,7 +7,7 @@ // "Microsoft.KernelMemory.Handlers.TextPartitioningHandler": "Information", // "Microsoft.KernelMemory.Handlers.GenerateEmbeddingsHandler": "Information", // "Microsoft.KernelMemory.Handlers.SaveEmbeddingsHandler": "Information", - // "Microsoft.KernelMemory.ContentStorage.AzureBlobs": "Information", + // "Microsoft.KernelMemory.DocumentStorage.AzureBlobs": "Information", // "Microsoft.KernelMemory.Pipeline.Queue.AzureQueues": "Information", "Microsoft.AspNetCore": "Warning" } diff --git a/examples/002-dotnet-Serverless/appsettings.json b/examples/002-dotnet-Serverless/appsettings.json index 72a4eeec2..e0d716ca4 100644 --- a/examples/002-dotnet-Serverless/appsettings.json +++ b/examples/002-dotnet-Serverless/appsettings.json @@ -7,7 +7,7 @@ // "Microsoft.KernelMemory.Handlers.TextPartitioningHandler": "Information", // "Microsoft.KernelMemory.Handlers.GenerateEmbeddingsHandler": "Information", // "Microsoft.KernelMemory.Handlers.SaveEmbeddingsHandler": "Information", - // "Microsoft.KernelMemory.ContentStorage.AzureBlobs": "Information", + // "Microsoft.KernelMemory.DocumentStorage.AzureBlobs": "Information", // "Microsoft.KernelMemory.Pipeline.Queue.AzureQueues": "Information", "Microsoft.AspNetCore": "Warning" } diff --git a/examples/002-dotnet-Serverless/file9-settings.json b/examples/002-dotnet-Serverless/file9-settings.json index d517b90c1..44b616648 100644 --- a/examples/002-dotnet-Serverless/file9-settings.json +++ b/examples/002-dotnet-Serverless/file9-settings.json @@ -7,7 +7,7 @@ // "Microsoft.KernelMemory.Handlers.TextPartitioningHandler": "Information", // "Microsoft.KernelMemory.Handlers.GenerateEmbeddingsHandler": "Information", // "Microsoft.KernelMemory.Handlers.SaveEmbeddingsHandler": "Information", - // "Microsoft.KernelMemory.ContentStorage.AzureBlobs": "Information", + // "Microsoft.KernelMemory.DocumentStorage.AzureBlobs": "Information", // "Microsoft.KernelMemory.Pipeline.Queue.AzureQueues": "Information", "Microsoft.AspNetCore": "Warning" } diff --git a/examples/101-dotnet-custom-Prompts/appsettings.json b/examples/101-dotnet-custom-Prompts/appsettings.json index 8236acc4e..13b62ff1f 100644 --- a/examples/101-dotnet-custom-Prompts/appsettings.json +++ b/examples/101-dotnet-custom-Prompts/appsettings.json @@ -7,7 +7,7 @@ // "Microsoft.KernelMemory.Handlers.TextPartitioningHandler": "Information", // "Microsoft.KernelMemory.Handlers.GenerateEmbeddingsHandler": "Information", // "Microsoft.KernelMemory.Handlers.SaveEmbeddingsHandler": "Information", - // "Microsoft.KernelMemory.ContentStorage.AzureBlobs": "Information", + // "Microsoft.KernelMemory.DocumentStorage.AzureBlobs": "Information", // "Microsoft.KernelMemory.Pipeline.Queue.AzureQueues": "Information", "Microsoft.AspNetCore": "Warning" } diff --git a/examples/103-dotnet-custom-EmbeddingGenerator/appsettings.json b/examples/103-dotnet-custom-EmbeddingGenerator/appsettings.json index 84fb060fa..f76869849 100644 --- a/examples/103-dotnet-custom-EmbeddingGenerator/appsettings.json +++ b/examples/103-dotnet-custom-EmbeddingGenerator/appsettings.json @@ -7,7 +7,7 @@ // "Microsoft.KernelMemory.Handlers.TextPartitioningHandler": "Information", // "Microsoft.KernelMemory.Handlers.GenerateEmbeddingsHandler": "Information", // "Microsoft.KernelMemory.Handlers.SaveEmbeddingsHandler": "Information", - // "Microsoft.KernelMemory.ContentStorage.AzureBlobs": "Information", + // "Microsoft.KernelMemory.DocumentStorage.AzureBlobs": "Information", // "Microsoft.KernelMemory.Pipeline.Queue.AzureQueues": "Information", "Microsoft.AspNetCore": "Warning" } diff --git a/examples/104-dotnet-custom-LLM/appsettings.json b/examples/104-dotnet-custom-LLM/appsettings.json index 433b8fb43..972bd42ba 100644 --- a/examples/104-dotnet-custom-LLM/appsettings.json +++ b/examples/104-dotnet-custom-LLM/appsettings.json @@ -7,7 +7,7 @@ // "Microsoft.KernelMemory.Handlers.TextPartitioningHandler": "Information", // "Microsoft.KernelMemory.Handlers.GenerateEmbeddingsHandler": "Information", // "Microsoft.KernelMemory.Handlers.SaveEmbeddingsHandler": "Information", - // "Microsoft.KernelMemory.ContentStorage.AzureBlobs": "Information", + // "Microsoft.KernelMemory.DocumentStorage.AzureBlobs": "Information", // "Microsoft.KernelMemory.Pipeline.Queue.AzureQueues": "Information", "Microsoft.AspNetCore": "Warning" } diff --git a/examples/105-dotnet-serverless-llamasharp/appsettings.json b/examples/105-dotnet-serverless-llamasharp/appsettings.json index 104d58b05..11b2437e4 100644 --- a/examples/105-dotnet-serverless-llamasharp/appsettings.json +++ b/examples/105-dotnet-serverless-llamasharp/appsettings.json @@ -7,7 +7,7 @@ // "Microsoft.KernelMemory.Handlers.TextPartitioningHandler": "Information", // "Microsoft.KernelMemory.Handlers.GenerateEmbeddingsHandler": "Information", // "Microsoft.KernelMemory.Handlers.SaveEmbeddingsHandler": "Information", - // "Microsoft.KernelMemory.ContentStorage.AzureBlobs": "Information", + // "Microsoft.KernelMemory.DocumentStorage.AzureBlobs": "Information", // "Microsoft.KernelMemory.Pipeline.Queue.AzureQueues": "Information", "Microsoft.AspNetCore": "Warning" } @@ -33,7 +33,7 @@ "Deployment": "", // The max number of tokens supported by model deployed // See https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models - "MaxTokenTotal": 8191, + "MaxTokenTotal": 8191 }, "OpenAI": { // Name of the model used to generate text (text completion or chat completion) diff --git a/examples/106-dotnet-retrieve-synthetics/Program.cs b/examples/106-dotnet-retrieve-synthetics/Program.cs index c93176895..34edaa118 100644 --- a/examples/106-dotnet-retrieve-synthetics/Program.cs +++ b/examples/106-dotnet-retrieve-synthetics/Program.cs @@ -53,7 +53,7 @@ await memory.ImportDocumentAsync(new Document("doc1") == file4-KM-Readme.pdf summary == Kernel Memory is an AI service designed for efficient indexing of datasets, supporting features like Retrieval Augmented Generation, synthetic memory, and custom semantic memory processing. It integrates with platforms like Semantic Kernel, Microsoft Copilot, and ChatGPT, and is available as a GPT Plugin, web clients, a .NET library, and a Docker container. It allows natural language querying and provides answers with citations from indexed data. Semantic Memory, part of the Semantic Kernel project, is a library for C#, Python, and Java that supports vector search and wraps database calls. Kernel Memory builds upon this, offering additional features like text extraction from various file formats, secure data frameworks, and a .NET codebase for ease of use across languages and platforms. -Kernel Memory supports a wide range of data formats and backends, including Microsoft Office files, PDFs, web pages, images with OCR, and JSON files. It integrates with various AI and vector storage services and offers content storage and orchestration options. +Kernel Memory supports a wide range of data formats and backends, including Microsoft Office files, PDFs, web pages, images with OCR, and JSON files. It integrates with various AI and vector storage services and offers document storage and orchestration options. Kernel Memory can be used in serverless mode, embedded in applications, or as a service for scalable document ingestion and information retrieval. It supports custom ingestion pipelines, data lineage, and citations for verifying answer accuracy. The service provides a web API with OpenAPI documentation for easy access and testing. It also includes a Docker image for quick deployment and a web client for file import and querying. Custom memory ingestion pipelines can be defined with .NET handlers, and the service offers a range of .NET packages for integration with different services and platforms. Python and Java packages are also planned, with contributions for other languages welcomed. diff --git a/examples/106-dotnet-retrieve-synthetics/appsettings.json b/examples/106-dotnet-retrieve-synthetics/appsettings.json index af3077878..de6f997f3 100644 --- a/examples/106-dotnet-retrieve-synthetics/appsettings.json +++ b/examples/106-dotnet-retrieve-synthetics/appsettings.json @@ -7,7 +7,7 @@ // "Microsoft.KernelMemory.Handlers.TextPartitioningHandler": "Information", // "Microsoft.KernelMemory.Handlers.GenerateEmbeddingsHandler": "Information", // "Microsoft.KernelMemory.Handlers.SaveEmbeddingsHandler": "Information", - // "Microsoft.KernelMemory.ContentStorage.AzureBlobs": "Information", + // "Microsoft.KernelMemory.DocumentStorage.AzureBlobs": "Information", // "Microsoft.KernelMemory.Pipeline.Queue.AzureQueues": "Information", "Microsoft.AspNetCore": "Warning" } diff --git a/examples/108-dotnet-custom-content-decoders/appsettings.json b/examples/108-dotnet-custom-content-decoders/appsettings.json index 8236acc4e..13b62ff1f 100644 --- a/examples/108-dotnet-custom-content-decoders/appsettings.json +++ b/examples/108-dotnet-custom-content-decoders/appsettings.json @@ -7,7 +7,7 @@ // "Microsoft.KernelMemory.Handlers.TextPartitioningHandler": "Information", // "Microsoft.KernelMemory.Handlers.GenerateEmbeddingsHandler": "Information", // "Microsoft.KernelMemory.Handlers.SaveEmbeddingsHandler": "Information", - // "Microsoft.KernelMemory.ContentStorage.AzureBlobs": "Information", + // "Microsoft.KernelMemory.DocumentStorage.AzureBlobs": "Information", // "Microsoft.KernelMemory.Pipeline.Queue.AzureQueues": "Information", "Microsoft.AspNetCore": "Warning" } diff --git a/examples/111-dotnet-azure-ai-hybrid-search/appsettings.json b/examples/111-dotnet-azure-ai-hybrid-search/appsettings.json index 32766e715..d3a228d16 100644 --- a/examples/111-dotnet-azure-ai-hybrid-search/appsettings.json +++ b/examples/111-dotnet-azure-ai-hybrid-search/appsettings.json @@ -7,7 +7,7 @@ // "Microsoft.KernelMemory.Handlers.TextPartitioningHandler": "Information", // "Microsoft.KernelMemory.Handlers.GenerateEmbeddingsHandler": "Information", // "Microsoft.KernelMemory.Handlers.SaveEmbeddingsHandler": "Information", - // "Microsoft.KernelMemory.ContentStorage.AzureBlobs": "Information", + // "Microsoft.KernelMemory.DocumentStorage.AzureBlobs": "Information", // "Microsoft.KernelMemory.Pipeline.Queue.AzureQueues": "Information", "Microsoft.AspNetCore": "Warning" } diff --git a/examples/206-dotnet-configuration-and-logging/appsettings.json b/examples/206-dotnet-configuration-and-logging/appsettings.json index 1b4807271..6d75c4293 100644 --- a/examples/206-dotnet-configuration-and-logging/appsettings.json +++ b/examples/206-dotnet-configuration-and-logging/appsettings.json @@ -7,7 +7,7 @@ // "Microsoft.KernelMemory.Handlers.TextPartitioningHandler": "Information", // "Microsoft.KernelMemory.Handlers.GenerateEmbeddingsHandler": "Information", // "Microsoft.KernelMemory.Handlers.SaveEmbeddingsHandler": "Information", - // "Microsoft.KernelMemory.ContentStorage.AzureBlobs": "Information", + // "Microsoft.KernelMemory.DocumentStorage.AzureBlobs": "Information", // "Microsoft.KernelMemory.Pipeline.Queue.AzureQueues": "Information", "Microsoft.AspNetCore": "Warning" } diff --git a/examples/207-dotnet-expanding-chunks-on-retrieval/Program.cs b/examples/207-dotnet-expanding-chunks-on-retrieval/Program.cs index fa34454f3..3f3cd48ad 100644 --- a/examples/207-dotnet-expanding-chunks-on-retrieval/Program.cs +++ b/examples/207-dotnet-expanding-chunks-on-retrieval/Program.cs @@ -2,7 +2,7 @@ using Microsoft.KernelMemory; using Microsoft.KernelMemory.Configuration; -using Microsoft.KernelMemory.ContentStorage.DevTools; +using Microsoft.KernelMemory.DocumentStorage.DevTools; using Microsoft.KernelMemory.FileSystem.DevTools; using Microsoft.KernelMemory.MemoryStorage.DevTools; diff --git a/examples/207-dotnet-expanding-chunks-on-retrieval/appsettings.json b/examples/207-dotnet-expanding-chunks-on-retrieval/appsettings.json index 1b4807271..6d75c4293 100644 --- a/examples/207-dotnet-expanding-chunks-on-retrieval/appsettings.json +++ b/examples/207-dotnet-expanding-chunks-on-retrieval/appsettings.json @@ -7,7 +7,7 @@ // "Microsoft.KernelMemory.Handlers.TextPartitioningHandler": "Information", // "Microsoft.KernelMemory.Handlers.GenerateEmbeddingsHandler": "Information", // "Microsoft.KernelMemory.Handlers.SaveEmbeddingsHandler": "Information", - // "Microsoft.KernelMemory.ContentStorage.AzureBlobs": "Information", + // "Microsoft.KernelMemory.DocumentStorage.AzureBlobs": "Information", // "Microsoft.KernelMemory.Pipeline.Queue.AzureQueues": "Information", "Microsoft.AspNetCore": "Warning" } diff --git a/examples/301-discord-test-application/DiscordMessageHandler.cs b/examples/301-discord-test-application/DiscordMessageHandler.cs index dba54b67d..5519a98d3 100644 --- a/examples/301-discord-test-application/DiscordMessageHandler.cs +++ b/examples/301-discord-test-application/DiscordMessageHandler.cs @@ -9,7 +9,7 @@ namespace Microsoft.Discord.TestApplication; /// -/// KM pipeline handler fetching discord data files from content storage +/// KM pipeline handler fetching discord data files from document storage /// and storing messages in Postgres. /// public sealed class DiscordMessageHandler : IPipelineStepHandler, IDisposable, IAsyncDisposable diff --git a/examples/301-discord-test-application/Program.cs b/examples/301-discord-test-application/Program.cs index 5701fc6a5..53434d933 100644 --- a/examples/301-discord-test-application/Program.cs +++ b/examples/301-discord-test-application/Program.cs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using Microsoft.KernelMemory; -using Microsoft.KernelMemory.ContentStorage.DevTools; +using Microsoft.KernelMemory.DocumentStorage.DevTools; using Microsoft.KernelMemory.Sources.DiscordBot; namespace Microsoft.Discord.TestApplication; diff --git a/examples/301-discord-test-application/appsettings.json b/examples/301-discord-test-application/appsettings.json index f4bd01e51..392e91563 100644 --- a/examples/301-discord-test-application/appsettings.json +++ b/examples/301-discord-test-application/appsettings.json @@ -5,7 +5,7 @@ "DiscordToken": "", // Index where to store files, e.g. disk folder, Azure blobs folder, etc. "Index": "discord", - // File name used when uploading a message to content storage. + // File name used when uploading a message to document storage. "FileName": "discord-msg.json", // Handlers processing the incoming Discord events "Steps": [ diff --git a/extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs b/extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs index 84c0536e1..5193ec199 100644 --- a/extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs +++ b/extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs @@ -16,8 +16,8 @@ using Azure.Search.Documents.Models; using Microsoft.Extensions.Logging; using Microsoft.KernelMemory.AI; -using Microsoft.KernelMemory.ContentStorage; using Microsoft.KernelMemory.Diagnostics; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.MemoryStorage; namespace Microsoft.KernelMemory.MemoryDb.AzureAISearch; @@ -92,7 +92,7 @@ public AzureAISearchMemory( default: this._log.LogCritical("Azure AI Search authentication type '{0}' undefined or not supported", config.Auth); - throw new ContentStorageException($"Azure AI Search authentication type '{config.Auth}' undefined or not supported"); + throw new DocumentStorageException($"Azure AI Search authentication type '{config.Auth}' undefined or not supported"); } } diff --git a/extensions/AzureBlobs/AzureBlobs.csproj b/extensions/AzureBlobs/AzureBlobs.csproj index 41bb27fa2..34e913bbd 100644 --- a/extensions/AzureBlobs/AzureBlobs.csproj +++ b/extensions/AzureBlobs/AzureBlobs.csproj @@ -3,8 +3,8 @@ net8.0 LatestMajor - Microsoft.KernelMemory.ContentStorage.AzureBlobs - Microsoft.KernelMemory.ContentStorage.AzureBlobs + Microsoft.KernelMemory.DocumentStorage.AzureBlobs + Microsoft.KernelMemory.DocumentStorage.AzureBlobs $(NoWarn);KMEXP03;CA1724;CS1591; @@ -19,8 +19,8 @@ true - Microsoft.KernelMemory.ContentStorage.AzureBlobs - Azure Blob Storage for Kernel Memory content storage + Microsoft.KernelMemory.DocumentStorage.AzureBlobs + Azure Blob Storage for Kernel Memory document storage Azure Blob Storage adapter allowing Kernel Memory to upload documents and maintain their state in Azure Blobs Copilot, Plugin, Memory, RAG, Kernel Memory, Azure Blob, Semantic Memory, Episodic Memory, Declarative Memory, AI, Artificial Intelligence, Embeddings, Vector DB, Vector Search, Memory DB, ETL bin/$(Configuration)/$(TargetFramework)/$(AssemblyName).xml diff --git a/extensions/AzureBlobs/AzureBlobsStorage.cs b/extensions/AzureBlobs/AzureBlobsStorage.cs index 5bed6e372..052b5018b 100644 --- a/extensions/AzureBlobs/AzureBlobsStorage.cs +++ b/extensions/AzureBlobs/AzureBlobsStorage.cs @@ -15,12 +15,12 @@ using Microsoft.KernelMemory.Diagnostics; using Microsoft.KernelMemory.Pipeline; -namespace Microsoft.KernelMemory.ContentStorage.AzureBlobs; +namespace Microsoft.KernelMemory.DocumentStorage.AzureBlobs; // TODO: a container can contain up to 50000 blocks // TODO: optionally use one container per index [Experimental("KMEXP03")] -public sealed class AzureBlobsStorage : IContentStorage +public sealed class AzureBlobsStorage : IDocumentStorage { private const string DefaultContainerName = "smemory"; private const string DefaultEndpointSuffix = "core.windows.net"; @@ -91,7 +91,7 @@ public AzureBlobsStorage( default: this._log.LogCritical("Azure Blob authentication type '{0}' undefined or not supported", config.Auth); - throw new ContentStorageException($"Azure Blob authentication type '{config.Auth}' undefined or not supported"); + throw new DocumentStorageException($"Azure Blob authentication type '{config.Auth}' undefined or not supported"); } this._containerName = config.Container; @@ -105,7 +105,7 @@ public AzureBlobsStorage( if (this._containerClient == null) { this._log.LogCritical("Unable to instantiate Azure Blob container client"); - throw new ContentStorageException("Unable to instantiate Azure Blob container client"); + throw new DocumentStorageException("Unable to instantiate Azure Blob container client"); } } @@ -132,7 +132,7 @@ public Task DeleteIndexDirectoryAsync(string index, CancellationToken cancellati { if (string.IsNullOrWhiteSpace(index)) { - throw new ContentStorageException("The index name is empty, stopping the process to prevent data loss"); + throw new DocumentStorageException("The index name is empty, stopping the process to prevent data loss"); } return this.DeleteBlobsByPrefixAsync(index, cancellationToken); @@ -163,7 +163,7 @@ public Task EmptyDocumentDirectoryAsync(string index, string documentId, Cancell var directoryName = JoinPaths(index, documentId); if (string.IsNullOrWhiteSpace(index) || string.IsNullOrWhiteSpace(documentId) || string.IsNullOrWhiteSpace(directoryName)) { - throw new ContentStorageException("The index, or document ID, or directory name is empty, stopping the process to prevent data loss"); + throw new DocumentStorageException("The index, or document ID, or directory name is empty, stopping the process to prevent data loss"); } return this.DeleteBlobsByPrefixAsync(directoryName, cancellationToken); @@ -178,7 +178,7 @@ public Task DeleteDocumentDirectoryAsync( var directoryName = JoinPaths(index, documentId); if (string.IsNullOrWhiteSpace(index) || string.IsNullOrWhiteSpace(documentId) || string.IsNullOrWhiteSpace(directoryName)) { - throw new ContentStorageException("The index, or document ID, or directory name is empty, stopping the process to prevent data loss"); + throw new DocumentStorageException("The index, or document ID, or directory name is empty, stopping the process to prevent data loss"); } return this.DeleteBlobsByPrefixAsync(directoryName, cancellationToken); @@ -234,12 +234,12 @@ public async Task ReadFileAsync( if (logErrIfNotFound) { this._log.LogError("Unable to download file {0}", blobName); } - throw new ContentStorageFileNotFoundException("Unable to fetch blob content"); + throw new DocumentStorageFileNotFoundException("Unable to fetch blob content"); } catch (RequestFailedException e) when (e.Status == 404) { this._log.LogInformation("File not found: {0}", blobName); - throw new ContentStorageFileNotFoundException("File not found", e); + throw new DocumentStorageFileNotFoundException("File not found", e); } } @@ -294,7 +294,7 @@ private async Task InternalWriteAsync( size = stream.Length; break; default: - throw new ContentStorageException($"Unexpected object type {content.GetType().FullName}"); + throw new DocumentStorageException($"Unexpected object type {content.GetType().FullName}"); } if (size == 0) @@ -311,7 +311,7 @@ private async Task DeleteBlobsByPrefixAsync(string prefix, CancellationToken can { if (string.IsNullOrWhiteSpace(prefix)) { - throw new ContentStorageException("The blob prefix is empty, stopping the process to prevent data loss"); + throw new DocumentStorageException("The blob prefix is empty, stopping the process to prevent data loss"); } this._log.LogInformation("Deleting blobs at {0}", prefix); @@ -350,7 +350,7 @@ private BlobClient GetBlobClient(string blobName) BlobClient? blobClient = this._containerClient.GetBlobClient(blobName); if (blobClient == null) { - throw new ContentStorageException("Unable to instantiate Azure Blob blob client"); + throw new DocumentStorageException("Unable to instantiate Azure Blob blob client"); } return blobClient; @@ -361,7 +361,7 @@ private BlobLeaseClient GetBlobLeaseClient(BlobClient blobClient) var blobLeaseClient = blobClient.GetBlobLeaseClient(); if (blobLeaseClient == null) { - throw new ContentStorageException("Unable to instantiate Azure blob lease client"); + throw new DocumentStorageException("Unable to instantiate Azure blob lease client"); } return blobLeaseClient; @@ -376,7 +376,7 @@ private async Task LeaseBlobAsync(BlobLeaseClient blobLeaseClient, Ca .ConfigureAwait(false); if (lease == null || !lease.HasValue) { - throw new ContentStorageException("Unable to lease blob"); + throw new DocumentStorageException("Unable to lease blob"); } this._log.LogTrace("Blob {0} leased", blobLeaseClient.Uri); @@ -401,7 +401,7 @@ private void ValidateAccountName(string value) if (string.IsNullOrEmpty(value)) { this._log.LogCritical("The Azure Blob account name is empty"); - throw new ContentStorageException("The account name is empty"); + throw new DocumentStorageException("The account name is empty"); } } @@ -410,7 +410,7 @@ private void ValidateAccountKey(string value) if (string.IsNullOrEmpty(value)) { this._log.LogCritical("The Azure Blob account key is empty"); - throw new ContentStorageException("The Azure Blob account key is empty"); + throw new DocumentStorageException("The Azure Blob account key is empty"); } } @@ -419,7 +419,7 @@ private void ValidateConnectionString(string value) if (string.IsNullOrEmpty(value)) { this._log.LogCritical("The Azure Blob connection string is empty"); - throw new ContentStorageException("The Azure Blob connection string is empty"); + throw new DocumentStorageException("The Azure Blob connection string is empty"); } } diff --git a/extensions/AzureBlobs/DependencyInjection.cs b/extensions/AzureBlobs/DependencyInjection.cs index ef4e239bf..896ae63dd 100644 --- a/extensions/AzureBlobs/DependencyInjection.cs +++ b/extensions/AzureBlobs/DependencyInjection.cs @@ -1,8 +1,8 @@ // Copyright (c) Microsoft. All rights reserved. using Microsoft.Extensions.DependencyInjection; -using Microsoft.KernelMemory.ContentStorage; -using Microsoft.KernelMemory.ContentStorage.AzureBlobs; +using Microsoft.KernelMemory.DocumentStorage; +using Microsoft.KernelMemory.DocumentStorage.AzureBlobs; #pragma warning disable IDE0130 // reduce number of "using" statements // ReSharper disable once CheckNamespace - reduce number of "using" statements @@ -10,19 +10,19 @@ namespace Microsoft.KernelMemory; public static partial class KernelMemoryBuilderExtensions { - public static IKernelMemoryBuilder WithAzureBlobsStorage(this IKernelMemoryBuilder builder, AzureBlobsConfig config) + public static IKernelMemoryBuilder WithAzureBlobsDocumentStorage(this IKernelMemoryBuilder builder, AzureBlobsConfig config) { - builder.Services.AddAzureBlobsAsContentStorage(config); + builder.Services.AddAzureBlobsAsDocumentStorage(config); return builder; } } public static partial class DependencyInjection { - public static IServiceCollection AddAzureBlobsAsContentStorage(this IServiceCollection services, AzureBlobsConfig config) + public static IServiceCollection AddAzureBlobsAsDocumentStorage(this IServiceCollection services, AzureBlobsConfig config) { return services .AddSingleton(config) - .AddSingleton(); + .AddSingleton(); } } diff --git a/extensions/AzureBlobs/README.md b/extensions/AzureBlobs/README.md index ddb430460..03fc3d1e5 100644 --- a/extensions/AzureBlobs/README.md +++ b/extensions/AzureBlobs/README.md @@ -1,6 +1,6 @@ # Kernel Memory with Azure Blob Storage -[![Nuget package](https://img.shields.io/nuget/v/Microsoft.KernelMemory.ContentStorage.AzureBlobs)](https://www.nuget.org/packages/Microsoft.KernelMemory.ContentStorage.AzureBlobs/) +[![Nuget package](https://img.shields.io/nuget/v/Microsoft.KernelMemory.DocumentStorage.AzureBlobs)](https://www.nuget.org/packages/Microsoft.KernelMemory.DocumentStorage.AzureBlobs/) [![Discord](https://img.shields.io/discord/1063152441819942922?label=Discord&logo=discord&logoColor=white&color=d82679)](https://aka.ms/KMdiscord) This project contains the [Azure Blob Storage](https://learn.microsoft.com/azure/storage/blobs) diff --git a/extensions/AzureQueues/AzureQueuesPipeline.cs b/extensions/AzureQueues/AzureQueuesPipeline.cs index c77f07839..80ce56af2 100644 --- a/extensions/AzureQueues/AzureQueuesPipeline.cs +++ b/extensions/AzureQueues/AzureQueuesPipeline.cs @@ -12,8 +12,8 @@ using Azure.Storage.Queues; using Azure.Storage.Queues.Models; using Microsoft.Extensions.Logging; -using Microsoft.KernelMemory.ContentStorage; using Microsoft.KernelMemory.Diagnostics; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.Pipeline.Queue; using Timer = System.Timers.Timer; @@ -124,7 +124,7 @@ public AzureQueuesPipeline( default: this._log.LogCritical("Azure Queue authentication type '{0}' undefined or not supported", config.Auth); - throw new ContentStorageException($"Azure Queue authentication type '{config.Auth}' undefined or not supported"); + throw new DocumentStorageException($"Azure Queue authentication type '{config.Auth}' undefined or not supported"); } } @@ -345,7 +345,7 @@ private void ValidateAccountName(string value) if (string.IsNullOrEmpty(value)) { this._log.LogCritical("The Azure Queue account name is empty"); - throw new ContentStorageException("The account name is empty"); + throw new DocumentStorageException("The account name is empty"); } } @@ -354,7 +354,7 @@ private void ValidateAccountKey(string value) if (string.IsNullOrEmpty(value)) { this._log.LogCritical("The Azure Queue account key is empty"); - throw new ContentStorageException("The Azure Queue account key is empty"); + throw new DocumentStorageException("The Azure Queue account key is empty"); } } @@ -363,7 +363,7 @@ private void ValidateConnectionString(string value) if (string.IsNullOrEmpty(value)) { this._log.LogCritical("The Azure Queue connection string is empty"); - throw new ContentStorageException("The Azure Queue connection string is empty"); + throw new DocumentStorageException("The Azure Queue connection string is empty"); } } diff --git a/extensions/Discord/Discord/DiscordConnector.cs b/extensions/Discord/Discord/DiscordConnector.cs index 760e8d992..a3398a7da 100644 --- a/extensions/Discord/Discord/DiscordConnector.cs +++ b/extensions/Discord/Discord/DiscordConnector.cs @@ -25,8 +25,8 @@ public sealed class DiscordConnector : IHostedService, IDisposable, IAsyncDispos private readonly IKernelMemory _memory; private readonly ILogger _log; private readonly string _authToken; - private readonly string _contentStorageIndex; - private readonly string _contentStorageFilename; + private readonly string _docStorageIndex; + private readonly string _docStorageFilename; private readonly List _pipelineSteps; /// @@ -55,9 +55,9 @@ public DiscordConnector( this._client.Log += this.OnLog; this._client.MessageReceived += this.OnMessage; this._memory = memory; - this._contentStorageIndex = config.Index; + this._docStorageIndex = config.Index; this._pipelineSteps = config.Steps; - this._contentStorageFilename = config.FileName; + this._docStorageFilename = config.FileName; } /// @@ -153,9 +153,9 @@ private Task OnMessage(SocketMessage message) { await this._memory.ImportDocumentAsync( fileContent, - fileName: this._contentStorageFilename, + fileName: this._docStorageFilename, documentId: documentId, - index: this._contentStorageIndex, + index: this._docStorageIndex, steps: this._pipelineSteps).ConfigureAwait(false); } }); diff --git a/extensions/MongoDbAtlas/MongoDbAtlas.FunctionalTests/StorageTests.cs b/extensions/MongoDbAtlas/MongoDbAtlas.FunctionalTests/StorageTests.cs index 7f2c42dbe..d01a4251c 100644 --- a/extensions/MongoDbAtlas/MongoDbAtlas.FunctionalTests/StorageTests.cs +++ b/extensions/MongoDbAtlas/MongoDbAtlas.FunctionalTests/StorageTests.cs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using System.Text; -using Microsoft.KernelMemory.ContentStorage; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.MongoDbAtlas; using Microsoft.KM.TestHelpers; @@ -165,7 +165,7 @@ public async Task CanCleanIndexCorrectly() await this._sut.EmptyDocumentDirectoryAsync(this.IndexName, id); // Assert: check that the files are not there anymore - await Assert.ThrowsAsync(async () => await this._sut.ReadFileAsync(this.IndexName, id, fileName1, false)); - await Assert.ThrowsAsync(async () => await this._sut.ReadFileAsync(this.IndexName, id, fileName2, false)); + await Assert.ThrowsAsync(async () => await this._sut.ReadFileAsync(this.IndexName, id, fileName1, false)); + await Assert.ThrowsAsync(async () => await this._sut.ReadFileAsync(this.IndexName, id, fileName2, false)); } } diff --git a/extensions/MongoDbAtlas/MongoDbAtlas/DependencyInjection.cs b/extensions/MongoDbAtlas/MongoDbAtlas/DependencyInjection.cs index 8cb12f5ea..6092de0e1 100644 --- a/extensions/MongoDbAtlas/MongoDbAtlas/DependencyInjection.cs +++ b/extensions/MongoDbAtlas/MongoDbAtlas/DependencyInjection.cs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using Microsoft.Extensions.DependencyInjection; -using Microsoft.KernelMemory.ContentStorage; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.MemoryStorage; using Microsoft.KernelMemory.MongoDbAtlas; @@ -25,7 +25,7 @@ public static IKernelMemoryBuilder WithMongoDbAtlasMemoryDb( } /// - /// Adds Mongodb as content storage for files. + /// Adds Mongodb as document storage for files. /// /// The kernel builder /// Configuration for Mongodb @@ -33,20 +33,20 @@ public static IKernelMemoryBuilder WithMongoDbAtlasStorage( this IKernelMemoryBuilder builder, MongoDbAtlasConfig config) { - builder.Services.AddMongoDbAtlasAsContentStorage(config); + builder.Services.AddMongoDbAtlasAsDocumentStorage(config); return builder; } /// - /// Adds Mongodb as content storage service and memory service, for both files and memory records. + /// Adds Mongodb as document storage and memory db, for both files and memory records. /// /// The kernel builder /// Configuration for Mongodb - public static IKernelMemoryBuilder WithMongoDbAtlasMemoryDbAndStorage( + public static IKernelMemoryBuilder WithMongoDbAtlasMemoryDbAndDocumentStorage( this IKernelMemoryBuilder builder, MongoDbAtlasConfig config) { - builder.Services.AddMongoDbAtlasAsMemoryDbAndContentStorage(config); + builder.Services.AddMongoDbAtlasAsMemoryDbAndDocumentStorage(config); return builder; } } @@ -75,13 +75,13 @@ public static IServiceCollection AddMongoDbAtlasAsMemoryDb( /// /// The services collection /// Mongodb configuration. - public static IServiceCollection AddMongoDbAtlasAsContentStorage( + public static IServiceCollection AddMongoDbAtlasAsDocumentStorage( this IServiceCollection services, MongoDbAtlasConfig config) { return services .AddSingleton(config) - .AddSingleton(); + .AddSingleton(); } /// @@ -89,13 +89,13 @@ public static IServiceCollection AddMongoDbAtlasAsContentStorage( /// /// The services collection /// Mongodb configuration. - public static IServiceCollection AddMongoDbAtlasAsMemoryDbAndContentStorage( + public static IServiceCollection AddMongoDbAtlasAsMemoryDbAndDocumentStorage( this IServiceCollection services, MongoDbAtlasConfig config) { return services .AddSingleton(config) .AddSingleton() - .AddSingleton(); + .AddSingleton(); } } diff --git a/extensions/MongoDbAtlas/MongoDbAtlas/MongoDbAtlasStorage.cs b/extensions/MongoDbAtlas/MongoDbAtlas/MongoDbAtlasStorage.cs index 3bc3307a1..4979f06a7 100644 --- a/extensions/MongoDbAtlas/MongoDbAtlas/MongoDbAtlasStorage.cs +++ b/extensions/MongoDbAtlas/MongoDbAtlas/MongoDbAtlasStorage.cs @@ -5,7 +5,7 @@ using System.IO; using System.Threading; using System.Threading.Tasks; -using Microsoft.KernelMemory.ContentStorage; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.Pipeline; using MongoDB.Bson; using MongoDB.Driver; @@ -14,7 +14,7 @@ namespace Microsoft.KernelMemory.MongoDbAtlas; [Experimental("KMEXP03")] -public sealed class MongoDbAtlasStorage : MongoDbAtlasBaseStorage, IContentStorage +public sealed class MongoDbAtlasStorage : MongoDbAtlasBaseStorage, IDocumentStorage { private readonly IMimeTypeDetection _mimeTypeDetection; @@ -163,7 +163,7 @@ public async Task ReadFileAsync( Console.WriteLine(error); } - throw new ContentStorageFileNotFoundException(error); + throw new DocumentStorageFileNotFoundException(error); } BinaryData docData = new(doc["content"].AsString); @@ -188,7 +188,7 @@ public async Task ReadFileAsync( Console.WriteLine($"File {fileName} not found in index {index} and document {documentId}"); } - throw new ContentStorageFileNotFoundException("File not found"); + throw new DocumentStorageFileNotFoundException("File not found"); } BinaryData docData = new(doc["content"].AsString); @@ -215,7 +215,7 @@ public async Task ReadFileAsync( Console.WriteLine($"File {fileName} not found in index {index} and document {documentId}"); } - throw new ContentStorageFileNotFoundException("File not found"); + throw new DocumentStorageFileNotFoundException("File not found"); } async Task AsyncStreamDelegate() => await bucket.OpenDownloadStreamAsync(file.Id, cancellationToken: cancellationToken).ConfigureAwait(false); diff --git a/extensions/Postgres/Postgres.TestApplication/Program.cs b/extensions/Postgres/Postgres.TestApplication/Program.cs index 252257e8e..d4431238c 100644 --- a/extensions/Postgres/Postgres.TestApplication/Program.cs +++ b/extensions/Postgres/Postgres.TestApplication/Program.cs @@ -1,10 +1,10 @@ // Copyright (c) Microsoft. All rights reserved. using Microsoft.KernelMemory; -using Microsoft.KernelMemory.ContentStorage.DevTools; +using Microsoft.KernelMemory.DocumentStorage.DevTools; using Microsoft.KernelMemory.FileSystem.DevTools; -namespace Postgres.TestApplication; +namespace Microsoft.Postgres.TestApplication; internal static class Program { @@ -31,23 +31,25 @@ private static async Task Test1() var azureOpenAITextConfig = cfg.GetSection("KernelMemory:Services:AzureOpenAIText").Get(); ArgumentNullExceptionEx.ThrowIfNull(azureOpenAITextConfig, nameof(azureOpenAITextConfig), "AzureOpenAIText config not found"); - // Concatenate our 'WithPostgres()' after 'WithOpenAIDefaults()' from the core nuget + // Concatenate our 'WithPostgresMemoryDb()' after 'WithOpenAIDefaults()' from the core nuget var mem1 = new KernelMemoryBuilder() .WithAzureOpenAITextGeneration(azureOpenAITextConfig) .WithAzureOpenAITextEmbeddingGeneration(azureOpenAIEmbeddingConfig) .WithPostgresMemoryDb(postgresConfig) + .WithSimpleFileStorage(SimpleFileStorageConfig.Persistent) .Build(); - // Concatenate our 'WithPostgres()' before 'WithOpenAIDefaults()' from the core nuget + // Concatenate our 'WithPostgresMemoryDb()' before 'WithOpenAIDefaults()' from the core nuget var mem2 = new KernelMemoryBuilder() .WithPostgresMemoryDb(postgresConfig) + .WithSimpleFileStorage(SimpleFileStorageConfig.Persistent) .WithAzureOpenAITextGeneration(azureOpenAITextConfig) .WithAzureOpenAITextEmbeddingGeneration(azureOpenAIEmbeddingConfig) .Build(); - // Concatenate our 'WithPostgres()' before and after KM builder extension methods from the core nuget + // Concatenate our 'WithPostgresMemoryDb()' before and after KM builder extension methods from the core nuget var mem3 = new KernelMemoryBuilder() - .WithSimpleFileStorage() + .WithSimpleFileStorage(SimpleFileStorageConfig.Persistent) .WithAzureOpenAITextGeneration(azureOpenAITextConfig) .WithPostgresMemoryDb(postgresConfig) .WithAzureOpenAITextEmbeddingGeneration(azureOpenAIEmbeddingConfig) diff --git a/extensions/Postgres/Postgres.UnitTests/PostgresConfigTests.cs b/extensions/Postgres/Postgres.UnitTests/PostgresConfigTests.cs index 1b7e34431..1fc35dba5 100644 --- a/extensions/Postgres/Postgres.UnitTests/PostgresConfigTests.cs +++ b/extensions/Postgres/Postgres.UnitTests/PostgresConfigTests.cs @@ -2,7 +2,7 @@ using Microsoft.KernelMemory; -namespace Postgres.UnitTests; +namespace Microsoft.Postgres.UnitTests; public class PostgresConfigTests { diff --git a/extensions/Postgres/README.md b/extensions/Postgres/README.md index 27713781c..0ccaebbc9 100644 --- a/extensions/Postgres/README.md +++ b/extensions/Postgres/README.md @@ -41,7 +41,7 @@ To use Postgres with Kernel Memory: } } ``` -4. Configure KM builder to store memories in Postgres, for example: +4. Configure KM builder to store memories in Postgres, and to persist documents, for example: ```csharp // using Microsoft.KernelMemory; // using Microsoft.KernelMemory.Postgres; @@ -56,7 +56,8 @@ To use Postgres with Kernel Memory: .BindSection("KernelMemory:Services:Postgres", postgresConfig); var memory = new KernelMemoryBuilder() - .WithPostgres(postgresConfig) + .WithPostgresMemoryDb(postgresConfig) + .WithSimpleFileStorage(SimpleFileStorageConfig.Persistent) .WithAzureOpenAITextGeneration(azureOpenAIConfig) .WithAzureOpenAITextEmbeddingGeneration(azureOpenAIConfig) .Build(); diff --git a/extensions/Qdrant/Qdrant.TestApplication/Program.cs b/extensions/Qdrant/Qdrant.TestApplication/Program.cs index ab8dc9286..7b8eae32a 100644 --- a/extensions/Qdrant/Qdrant.TestApplication/Program.cs +++ b/extensions/Qdrant/Qdrant.TestApplication/Program.cs @@ -5,7 +5,7 @@ using Microsoft.KernelMemory.MemoryStorage; using Microsoft.KM.TestHelpers; -namespace Qdrant.TestApplication; +namespace Microsoft.Qdrant.TestApplication; public static class Program { diff --git a/extensions/Redis/Redis.FunctionalTests/AdditionalFilteringTests.cs b/extensions/Redis/Redis.FunctionalTests/AdditionalFilteringTests.cs index 9a7fecf88..4c5396d81 100644 --- a/extensions/Redis/Redis.FunctionalTests/AdditionalFilteringTests.cs +++ b/extensions/Redis/Redis.FunctionalTests/AdditionalFilteringTests.cs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using Microsoft.KernelMemory; -using Microsoft.KernelMemory.ContentStorage.DevTools; +using Microsoft.KernelMemory.DocumentStorage.DevTools; using Microsoft.KernelMemory.FileSystem.DevTools; using Microsoft.KM.TestHelpers; using Xunit.Abstractions; diff --git a/extensions/Redis/Redis.TestApplication/MockEmbeddingGenerator.cs b/extensions/Redis/Redis.TestApplication/MockEmbeddingGenerator.cs index 766e8982d..b67acc4a2 100644 --- a/extensions/Redis/Redis.TestApplication/MockEmbeddingGenerator.cs +++ b/extensions/Redis/Redis.TestApplication/MockEmbeddingGenerator.cs @@ -3,7 +3,7 @@ using Microsoft.KernelMemory; using Microsoft.KernelMemory.AI; -namespace Redis.TestApplication; +namespace Microsoft.Redis.TestApplication; internal sealed class MockEmbeddingGenerator : ITextEmbeddingGenerator { diff --git a/extensions/Redis/Redis.TestApplication/Program.cs b/extensions/Redis/Redis.TestApplication/Program.cs index 469d267b6..4fb3fb617 100644 --- a/extensions/Redis/Redis.TestApplication/Program.cs +++ b/extensions/Redis/Redis.TestApplication/Program.cs @@ -7,7 +7,7 @@ using Microsoft.KernelMemory.MemoryStorage; using StackExchange.Redis; -namespace Redis.TestApplication; +namespace Microsoft.Redis.TestApplication; public static class Program { diff --git a/extensions/TikToken/TikToken.UnitTests/Startup.cs b/extensions/TikToken/TikToken.UnitTests/Startup.cs index d30e2a5d5..99a1a4f77 100644 --- a/extensions/TikToken/TikToken.UnitTests/Startup.cs +++ b/extensions/TikToken/TikToken.UnitTests/Startup.cs @@ -5,7 +5,7 @@ using Microsoft.Extensions.Hosting; -namespace TikToken.UnitTests; +namespace Microsoft.TikToken.UnitTests; public class Startup { diff --git a/extensions/TikToken/TikToken.UnitTests/TikTokenTokenizersTest.cs b/extensions/TikToken/TikToken.UnitTests/TikTokenTokenizersTest.cs index 095fbc9c6..41c4203db 100644 --- a/extensions/TikToken/TikToken.UnitTests/TikTokenTokenizersTest.cs +++ b/extensions/TikToken/TikToken.UnitTests/TikTokenTokenizersTest.cs @@ -6,7 +6,7 @@ using Xunit; using Xunit.Abstractions; -namespace TikToken.UnitTests; +namespace Microsoft.TikToken.UnitTests; public class TikTokenTokenizers : BaseUnitTestCase { diff --git a/infra/main.bicep b/infra/main.bicep index 8b0f921a2..96059d90a 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -105,7 +105,7 @@ module managedidentity 'modules/managed-identity.bicep' = { Module to create a Storage Account See https://learn.microsoft.com/azure/storage/common/storage-account-overview - The storage account is used to store files (KM Content Storage) and + The storage account is used to store files (KM Document Storage) and to run asynchronous ingestion (KM Pipelines Orchestration). */ module storage 'modules/storage.bicep' = { diff --git a/infra/main.json b/infra/main.json index 29b39e26a..e14f79f09 100644 --- a/infra/main.json +++ b/infra/main.json @@ -1231,7 +1231,7 @@ "value": "true" }, { - "name": "KernelMemory__ContentStorageType", + "name": "KernelMemory__DocumentStorageType", "value": "AzureBlobs" }, { diff --git a/infra/modules/container-app.bicep b/infra/modules/container-app.bicep index 8ea4612a2..015c21053 100644 --- a/infra/modules/container-app.bicep +++ b/infra/modules/container-app.bicep @@ -81,7 +81,7 @@ resource kmService 'Microsoft.App/containerApps@2023-05-01' = { value: 'true' } { - name: 'KernelMemory__ContentStorageType' + name: 'KernelMemory__DocumentStorageType' value: 'AzureBlobs' } { diff --git a/service/Abstractions/ContentStorage/ContentStorageException.cs b/service/Abstractions/ContentStorage/ContentStorageException.cs deleted file mode 100644 index 87ee5085d..000000000 --- a/service/Abstractions/ContentStorage/ContentStorageException.cs +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) Microsoft. All rights reserved. - -using System; - -namespace Microsoft.KernelMemory.ContentStorage; - -public class ContentStorageException : KernelMemoryException -{ - /// - public ContentStorageException() { } - - /// - public ContentStorageException(string message) : base(message) { } - - /// - public ContentStorageException(string message, Exception? innerException) : base(message, innerException) { } -} - -public class ContentStorageFileNotFoundException : ContentStorageException -{ - /// - public ContentStorageFileNotFoundException() { } - - /// - public ContentStorageFileNotFoundException(string message) : base(message) { } - - /// - public ContentStorageFileNotFoundException(string message, Exception? innerException) : base(message, innerException) { } -} diff --git a/service/Abstractions/DocumentStorage/DocumentStorageException.cs b/service/Abstractions/DocumentStorage/DocumentStorageException.cs new file mode 100644 index 000000000..144e60d37 --- /dev/null +++ b/service/Abstractions/DocumentStorage/DocumentStorageException.cs @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; + +namespace Microsoft.KernelMemory.DocumentStorage; + +public class DocumentStorageException : KernelMemoryException +{ + /// + public DocumentStorageException() { } + + /// + public DocumentStorageException(string message) : base(message) { } + + /// + public DocumentStorageException(string message, Exception? innerException) : base(message, innerException) { } +} + +public class DocumentStorageFileNotFoundException : DocumentStorageException +{ + /// + public DocumentStorageFileNotFoundException() { } + + /// + public DocumentStorageFileNotFoundException(string message) : base(message) { } + + /// + public DocumentStorageFileNotFoundException(string message, Exception? innerException) : base(message, innerException) { } +} diff --git a/service/Abstractions/ContentStorage/EmbeddingFileContent.cs b/service/Abstractions/DocumentStorage/EmbeddingFileContent.cs similarity index 95% rename from service/Abstractions/ContentStorage/EmbeddingFileContent.cs rename to service/Abstractions/DocumentStorage/EmbeddingFileContent.cs index 8fba9d1de..90c6af075 100644 --- a/service/Abstractions/ContentStorage/EmbeddingFileContent.cs +++ b/service/Abstractions/DocumentStorage/EmbeddingFileContent.cs @@ -3,7 +3,7 @@ using System; using System.Text.Json.Serialization; -namespace Microsoft.KernelMemory.ContentStorage; +namespace Microsoft.KernelMemory.DocumentStorage; public class EmbeddingFileContent { diff --git a/service/Abstractions/ContentStorage/IContentStorage.cs b/service/Abstractions/DocumentStorage/IDocumentStorage.cs similarity index 97% rename from service/Abstractions/ContentStorage/IContentStorage.cs rename to service/Abstractions/DocumentStorage/IDocumentStorage.cs index ab93b0d0d..7add23129 100644 --- a/service/Abstractions/ContentStorage/IContentStorage.cs +++ b/service/Abstractions/DocumentStorage/IDocumentStorage.cs @@ -4,9 +4,9 @@ using System.Threading; using System.Threading.Tasks; -namespace Microsoft.KernelMemory.ContentStorage; +namespace Microsoft.KernelMemory.DocumentStorage; -public interface IContentStorage +public interface IDocumentStorage { /// /// Create a new container (aka index), if it doesn't exist already diff --git a/service/Abstractions/IKernelMemory.cs b/service/Abstractions/IKernelMemory.cs index bf7bed55d..00ce78632 100644 --- a/service/Abstractions/IKernelMemory.cs +++ b/service/Abstractions/IKernelMemory.cs @@ -161,7 +161,7 @@ public Task IsDocumentReadyAsync( CancellationToken cancellationToken = default); /// - /// Export a file from content storage + /// Export a file from document storage /// /// ID of the document containing the file /// File name diff --git a/service/Abstractions/KernelMemoryBuilderExtensions.cs b/service/Abstractions/KernelMemoryBuilderExtensions.cs index 4e7184aab..be2881582 100644 --- a/service/Abstractions/KernelMemoryBuilderExtensions.cs +++ b/service/Abstractions/KernelMemoryBuilderExtensions.cs @@ -3,8 +3,8 @@ using System; using Microsoft.KernelMemory.AI; using Microsoft.KernelMemory.Configuration; -using Microsoft.KernelMemory.ContentStorage; using Microsoft.KernelMemory.DataFormats; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.MemoryStorage; using Microsoft.KernelMemory.Pipeline; using Microsoft.KernelMemory.Pipeline.Queue; @@ -86,18 +86,18 @@ public static IKernelMemoryBuilder WithCustomIngestionQueueClientFactory( return builder; } - public static IKernelMemoryBuilder WithCustomStorage( - this IKernelMemoryBuilder builder, IContentStorage service) + public static IKernelMemoryBuilder WithCustomDocumentStorage( + this IKernelMemoryBuilder builder, IDocumentStorage service) { - service = service ?? throw new ConfigurationException("Memory Builder: the content storage instance is NULL"); - builder.AddSingleton(service); + service = service ?? throw new ConfigurationException("Memory Builder: the document storage instance is NULL"); + builder.AddSingleton(service); return builder; } - public static IKernelMemoryBuilder WithCustomStorage( - this IKernelMemoryBuilder builder) where T : class, IContentStorage + public static IKernelMemoryBuilder WithCustomDocumentStorage( + this IKernelMemoryBuilder builder) where T : class, IDocumentStorage { - builder.AddSingleton(); + builder.AddSingleton(); return builder; } diff --git a/service/Abstractions/Pipeline/IPipelineOrchestrator.cs b/service/Abstractions/Pipeline/IPipelineOrchestrator.cs index 52ff6d8a3..7d57b97d9 100644 --- a/service/Abstractions/Pipeline/IPipelineOrchestrator.cs +++ b/service/Abstractions/Pipeline/IPipelineOrchestrator.cs @@ -92,7 +92,7 @@ public interface IPipelineOrchestrator Task StopAllPipelinesAsync(); /// - /// Fetch a file from content storage, streaming its content and details + /// Fetch a file from document storage, streaming its content and details /// /// Pipeline containing the file /// Name of the file to fetch @@ -101,7 +101,7 @@ public interface IPipelineOrchestrator Task ReadFileAsStreamAsync(DataPipeline pipeline, string fileName, CancellationToken cancellationToken = default); /// - /// Fetch a file from content storage + /// Fetch a file from document storage /// /// Pipeline containing the file /// Name of the file to fetch @@ -109,7 +109,7 @@ public interface IPipelineOrchestrator Task ReadFileAsync(DataPipeline pipeline, string fileName, CancellationToken cancellationToken = default); /// - /// Fetch a file from content storage + /// Fetch a file from document storage /// /// Pipeline containing the file /// Name of the file to fetch @@ -117,7 +117,7 @@ public interface IPipelineOrchestrator Task ReadTextFileAsync(DataPipeline pipeline, string fileName, CancellationToken cancellationToken = default); /// - /// Write a text file from content storage + /// Write a text file from document storage /// /// Pipeline containing the file /// Name of the file to fetch @@ -126,7 +126,7 @@ public interface IPipelineOrchestrator Task WriteTextFileAsync(DataPipeline pipeline, string fileName, string fileContent, CancellationToken cancellationToken = default); /// - /// Write a file from content storage + /// Write a file from document storage /// /// Pipeline containing the file /// Name of the file to fetch @@ -172,7 +172,7 @@ public interface IPipelineOrchestrator /// /// Start an asynchronous job, via handlers, to delete a specified index - /// from vector and content storage. This might be a long running + /// from vector and document storage. This might be a long running /// operation, hence the use of queue/handlers. /// /// Optional index name diff --git a/service/Core/Configuration/KernelMemoryConfig.cs b/service/Core/Configuration/KernelMemoryConfig.cs index a1373e075..f1b5b62e4 100644 --- a/service/Core/Configuration/KernelMemoryConfig.cs +++ b/service/Core/Configuration/KernelMemoryConfig.cs @@ -1,5 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. +using System; using System.Collections.Generic; using System.Linq; using Microsoft.Extensions.Configuration; @@ -122,10 +123,33 @@ public class RetrievalConfig /// public ServiceConfig Service { get; set; } = new(); + /// + /// Legacy Documents storage settings. + /// + [Obsolete("`ContentStorageType` has been deprecated, please use `DocumentStorageType`")] + + public string ContentStorageType + { + get + { + return this._contentStorageType; + } + set + { + this._contentStorageType = value; + if (!string.IsNullOrEmpty(this._contentStorageType)) + { + throw new ConfigurationException($"`ContentStorageType` (value: {this._contentStorageType}) has been deprecated, please use `DocumentStorageType`"); + } + } + } + + private string _contentStorageType = string.Empty; + /// /// Documents storage settings. /// - public string ContentStorageType { get; set; } = string.Empty; + public string DocumentStorageType { get; set; } = string.Empty; /// /// The text generator used to generate synthetic data during ingestion diff --git a/service/Core/ContentStorage/DevTools/DependencyInjection.cs b/service/Core/DocumentStorage/DevTools/DependencyInjection.cs similarity index 56% rename from service/Core/ContentStorage/DevTools/DependencyInjection.cs rename to service/Core/DocumentStorage/DevTools/DependencyInjection.cs index f80c6d87e..61052e5d2 100644 --- a/service/Core/ContentStorage/DevTools/DependencyInjection.cs +++ b/service/Core/DocumentStorage/DevTools/DependencyInjection.cs @@ -1,8 +1,8 @@ // Copyright (c) Microsoft. All rights reserved. using Microsoft.Extensions.DependencyInjection; -using Microsoft.KernelMemory.ContentStorage; -using Microsoft.KernelMemory.ContentStorage.DevTools; +using Microsoft.KernelMemory.DocumentStorage; +using Microsoft.KernelMemory.DocumentStorage.DevTools; using Microsoft.KernelMemory.FileSystem.DevTools; // ReSharper disable once CheckNamespace @@ -12,29 +12,29 @@ public static partial class KernelMemoryBuilderExtensions { public static IKernelMemoryBuilder WithSimpleFileStorage(this IKernelMemoryBuilder builder, SimpleFileStorageConfig? config = null) { - builder.Services.AddSimpleFileStorageAsContentStorage(config ?? new SimpleFileStorageConfig()); + builder.Services.AddSimpleFileStorageAsDocumentStorage(config ?? new SimpleFileStorageConfig()); return builder; } public static IKernelMemoryBuilder WithSimpleFileStorage(this IKernelMemoryBuilder builder, string directory) { - builder.Services.AddSimpleFileStorageAsContentStorage(directory); + builder.Services.AddSimpleFileStorageAsDocumentStorage(directory); return builder; } } public static partial class DependencyInjection { - public static IServiceCollection AddSimpleFileStorageAsContentStorage(this IServiceCollection services, SimpleFileStorageConfig config) + public static IServiceCollection AddSimpleFileStorageAsDocumentStorage(this IServiceCollection services, SimpleFileStorageConfig config) { return services .AddSingleton(config) - .AddSingleton(); + .AddSingleton(); } - public static IServiceCollection AddSimpleFileStorageAsContentStorage(this IServiceCollection services, string directory) + public static IServiceCollection AddSimpleFileStorageAsDocumentStorage(this IServiceCollection services, string directory) { var config = new SimpleFileStorageConfig { StorageType = FileSystemTypes.Disk, Directory = directory }; - return services.AddSimpleFileStorageAsContentStorage(config); + return services.AddSimpleFileStorageAsDocumentStorage(config); } } diff --git a/service/Core/ContentStorage/DevTools/SimpleFileStorage.cs b/service/Core/DocumentStorage/DevTools/SimpleFileStorage.cs similarity index 95% rename from service/Core/ContentStorage/DevTools/SimpleFileStorage.cs rename to service/Core/DocumentStorage/DevTools/SimpleFileStorage.cs index 96983f419..de91dc1b1 100644 --- a/service/Core/ContentStorage/DevTools/SimpleFileStorage.cs +++ b/service/Core/DocumentStorage/DevTools/SimpleFileStorage.cs @@ -10,10 +10,10 @@ using Microsoft.KernelMemory.FileSystem.DevTools; using Microsoft.KernelMemory.Pipeline; -namespace Microsoft.KernelMemory.ContentStorage.DevTools; +namespace Microsoft.KernelMemory.DocumentStorage.DevTools; [Experimental("KMEXP03")] -public class SimpleFileStorage : IContentStorage +public class SimpleFileStorage : IDocumentStorage { private readonly ILogger _log; private readonly IFileSystem _fileSystem; @@ -120,7 +120,7 @@ public async Task ReadFileAsync( this._log.LogError("File not found {0}/{1}/{2}", index, documentId, fileName); } - throw new ContentStorageFileNotFoundException("File not found"); + throw new DocumentStorageFileNotFoundException("File not found"); } } } diff --git a/service/Core/ContentStorage/DevTools/SimpleFileStorageConfig.cs b/service/Core/DocumentStorage/DevTools/SimpleFileStorageConfig.cs similarity index 91% rename from service/Core/ContentStorage/DevTools/SimpleFileStorageConfig.cs rename to service/Core/DocumentStorage/DevTools/SimpleFileStorageConfig.cs index 5c623c015..f20dc10f0 100644 --- a/service/Core/ContentStorage/DevTools/SimpleFileStorageConfig.cs +++ b/service/Core/DocumentStorage/DevTools/SimpleFileStorageConfig.cs @@ -2,7 +2,7 @@ using Microsoft.KernelMemory.FileSystem.DevTools; -namespace Microsoft.KernelMemory.ContentStorage.DevTools; +namespace Microsoft.KernelMemory.DocumentStorage.DevTools; public class SimpleFileStorageConfig { diff --git a/service/Core/Handlers/DeleteDocumentHandler.cs b/service/Core/Handlers/DeleteDocumentHandler.cs index cb075bba8..f7c848a7f 100644 --- a/service/Core/Handlers/DeleteDocumentHandler.cs +++ b/service/Core/Handlers/DeleteDocumentHandler.cs @@ -4,8 +4,8 @@ using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.Logging; -using Microsoft.KernelMemory.ContentStorage; using Microsoft.KernelMemory.Diagnostics; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.MemoryStorage; using Microsoft.KernelMemory.Pipeline; @@ -14,19 +14,19 @@ namespace Microsoft.KernelMemory.Handlers; public sealed class DeleteDocumentHandler : IPipelineStepHandler { private readonly List _memoryDbs; - private readonly IContentStorage _contentStorage; + private readonly IDocumentStorage _documentStorage; private readonly ILogger _log; public string StepName { get; } public DeleteDocumentHandler( string stepName, - IContentStorage contentStorage, + IDocumentStorage documentStorage, List memoryDbs, ILogger? log = null) { this.StepName = stepName; - this._contentStorage = contentStorage; + this._documentStorage = documentStorage; this._memoryDbs = memoryDbs; this._log = log ?? DefaultLogger.Instance; @@ -55,7 +55,7 @@ public DeleteDocumentHandler( } // Delete files, leaving the status file - await this._contentStorage.EmptyDocumentDirectoryAsync( + await this._documentStorage.EmptyDocumentDirectoryAsync( index: pipeline.Index, documentId: pipeline.DocumentId, cancellationToken).ConfigureAwait(false); diff --git a/service/Core/Handlers/DeleteGeneratedFilesHandler.cs b/service/Core/Handlers/DeleteGeneratedFilesHandler.cs index 37f9e003c..b888376e0 100644 --- a/service/Core/Handlers/DeleteGeneratedFilesHandler.cs +++ b/service/Core/Handlers/DeleteGeneratedFilesHandler.cs @@ -3,26 +3,26 @@ using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.Logging; -using Microsoft.KernelMemory.ContentStorage; using Microsoft.KernelMemory.Diagnostics; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.Pipeline; namespace Microsoft.KernelMemory.Handlers; public sealed class DeleteGeneratedFilesHandler : IPipelineStepHandler { - private readonly IContentStorage _contentStorage; + private readonly IDocumentStorage _documentStorage; private readonly ILogger _log; public string StepName { get; } public DeleteGeneratedFilesHandler( string stepName, - IContentStorage contentStorage, + IDocumentStorage documentStorage, ILogger? log = null) { this.StepName = stepName; - this._contentStorage = contentStorage; + this._documentStorage = documentStorage; this._log = log ?? DefaultLogger.Instance; this._log.LogInformation("Handler '{0}' ready", stepName); @@ -35,7 +35,7 @@ public DeleteGeneratedFilesHandler( this._log.LogDebug("Deleting generated files, pipeline '{0}/{1}'", pipeline.Index, pipeline.DocumentId); // Delete files, leaving the status file - await this._contentStorage.EmptyDocumentDirectoryAsync( + await this._documentStorage.EmptyDocumentDirectoryAsync( index: pipeline.Index, documentId: pipeline.DocumentId, cancellationToken).ConfigureAwait(false); diff --git a/service/Core/Handlers/DeleteIndexHandler.cs b/service/Core/Handlers/DeleteIndexHandler.cs index f47279bee..2b539bd0a 100644 --- a/service/Core/Handlers/DeleteIndexHandler.cs +++ b/service/Core/Handlers/DeleteIndexHandler.cs @@ -4,8 +4,8 @@ using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.Logging; -using Microsoft.KernelMemory.ContentStorage; using Microsoft.KernelMemory.Diagnostics; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.MemoryStorage; using Microsoft.KernelMemory.Pipeline; @@ -14,19 +14,19 @@ namespace Microsoft.KernelMemory.Handlers; public sealed class DeleteIndexHandler : IPipelineStepHandler { private readonly List _memoryDbs; - private readonly IContentStorage _contentStorage; + private readonly IDocumentStorage _documentStorage; private readonly ILogger _log; public string StepName { get; } public DeleteIndexHandler( string stepName, - IContentStorage contentStorage, + IDocumentStorage documentStorage, List memoryDbs, ILogger? log = null) { this.StepName = stepName; - this._contentStorage = contentStorage; + this._documentStorage = documentStorage; this._memoryDbs = memoryDbs; this._log = log ?? DefaultLogger.Instance; @@ -46,7 +46,7 @@ public DeleteIndexHandler( } // Delete index from file storage - await this._contentStorage.DeleteIndexDirectoryAsync( + await this._documentStorage.DeleteIndexDirectoryAsync( index: pipeline.Index, cancellationToken).ConfigureAwait(false); diff --git a/service/Core/Handlers/GenerateEmbeddingsHandler.cs b/service/Core/Handlers/GenerateEmbeddingsHandler.cs index ddf3492c4..e73e870f8 100644 --- a/service/Core/Handlers/GenerateEmbeddingsHandler.cs +++ b/service/Core/Handlers/GenerateEmbeddingsHandler.cs @@ -8,14 +8,14 @@ using System.Threading.Tasks; using Microsoft.Extensions.Logging; using Microsoft.KernelMemory.AI; -using Microsoft.KernelMemory.ContentStorage; using Microsoft.KernelMemory.Diagnostics; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.Pipeline; namespace Microsoft.KernelMemory.Handlers; /// -/// Memory ingestion pipeline handler responsible for generating text embedding and saving them to the content storage. +/// Memory ingestion pipeline handler responsible for generating text embedding and saving them to the document storage. /// public sealed class GenerateEmbeddingsHandler : IPipelineStepHandler { @@ -28,7 +28,7 @@ public sealed class GenerateEmbeddingsHandler : IPipelineStepHandler public string StepName { get; } /// - /// Handler responsible for generating embeddings and saving them to content storages (not memory db). + /// Handler responsible for generating embeddings and saving them to document storages (not memory db). /// Note: stepName and other params are injected with DI /// /// Pipeline step for which the handler will be invoked diff --git a/service/Core/Handlers/GenerateEmbeddingsParallelHandler.cs b/service/Core/Handlers/GenerateEmbeddingsParallelHandler.cs index 10254639b..567de42ce 100644 --- a/service/Core/Handlers/GenerateEmbeddingsParallelHandler.cs +++ b/service/Core/Handlers/GenerateEmbeddingsParallelHandler.cs @@ -8,14 +8,14 @@ using System.Threading.Tasks; using Microsoft.Extensions.Logging; using Microsoft.KernelMemory.AI; -using Microsoft.KernelMemory.ContentStorage; using Microsoft.KernelMemory.Diagnostics; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.Pipeline; namespace Microsoft.KernelMemory.Handlers; /// -/// Memory ingestion pipeline handler responsible for generating text embedding and saving them to the content storage. +/// Memory ingestion pipeline handler responsible for generating text embedding and saving them to the document storage. /// public sealed class GenerateEmbeddingsParallelHandler : IPipelineStepHandler { @@ -28,7 +28,7 @@ public sealed class GenerateEmbeddingsParallelHandler : IPipelineStepHandler public string StepName { get; } /// - /// Handler responsible for generating embeddings and saving them to content storages (not memory db). + /// Handler responsible for generating embeddings and saving them to document storage (not memory db). /// Note: stepName and other params are injected with DI /// /// Pipeline step for which the handler will be invoked diff --git a/service/Core/Handlers/SaveRecordsHandler.cs b/service/Core/Handlers/SaveRecordsHandler.cs index 42921f81f..0f8cfa904 100644 --- a/service/Core/Handlers/SaveRecordsHandler.cs +++ b/service/Core/Handlers/SaveRecordsHandler.cs @@ -7,8 +7,8 @@ using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.Logging; -using Microsoft.KernelMemory.ContentStorage; using Microsoft.KernelMemory.Diagnostics; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.FileSystem.DevTools; using Microsoft.KernelMemory.MemoryStorage; using Microsoft.KernelMemory.Pipeline; diff --git a/service/Core/Handlers/TextExtractionHandler.cs b/service/Core/Handlers/TextExtractionHandler.cs index e1639f43c..065ec74fc 100644 --- a/service/Core/Handlers/TextExtractionHandler.cs +++ b/service/Core/Handlers/TextExtractionHandler.cs @@ -16,7 +16,7 @@ namespace Microsoft.KernelMemory.Handlers; /// -/// Memory ingestion pipeline handler responsible for extracting text from files and saving it to content storage. +/// Memory ingestion pipeline handler responsible for extracting text from files and saving it to document storage. /// public sealed class TextExtractionHandler : IPipelineStepHandler, IDisposable { diff --git a/service/Core/KernelMemoryBuilder.cs b/service/Core/KernelMemoryBuilder.cs index 2b6dc151d..71eef0898 100644 --- a/service/Core/KernelMemoryBuilder.cs +++ b/service/Core/KernelMemoryBuilder.cs @@ -5,8 +5,8 @@ using Microsoft.Extensions.DependencyInjection; using Microsoft.KernelMemory.AI; using Microsoft.KernelMemory.AppBuilders; -using Microsoft.KernelMemory.ContentStorage; -using Microsoft.KernelMemory.ContentStorage.DevTools; +using Microsoft.KernelMemory.DocumentStorage; +using Microsoft.KernelMemory.DocumentStorage.DevTools; using Microsoft.KernelMemory.FileSystem.DevTools; using Microsoft.KernelMemory.MemoryStorage; using Microsoft.KernelMemory.MemoryStorage.DevTools; @@ -349,19 +349,19 @@ private bool IsEmbeddingGeneratorEnabled() private ClientTypes GetBuildType() { var hasQueueFactory = (this._memoryServiceCollection.HasService()); - var hasContentStorage = (this._memoryServiceCollection.HasService()); + var hasDocumentStorage = (this._memoryServiceCollection.HasService()); var hasMimeDetector = (this._memoryServiceCollection.HasService()); var hasEmbeddingGenerator = (this._memoryServiceCollection.HasService()); var hasMemoryDb = (this._memoryServiceCollection.HasService()); var hasTextGenerator = (this._memoryServiceCollection.HasService()); - if (hasContentStorage && hasMimeDetector && hasEmbeddingGenerator && hasMemoryDb && hasTextGenerator) + if (hasDocumentStorage && hasMimeDetector && hasEmbeddingGenerator && hasMemoryDb && hasTextGenerator) { return hasQueueFactory ? ClientTypes.AsyncService : ClientTypes.SyncServerless; } var missing = new List(); - if (!hasContentStorage) { missing.Add("Content storage"); } + if (!hasDocumentStorage) { missing.Add("Document storage"); } if (!hasMimeDetector) { missing.Add("MIME type detection"); } diff --git a/service/Core/Pipeline/BaseOrchestrator.cs b/service/Core/Pipeline/BaseOrchestrator.cs index 1de025024..6b67f96e1 100644 --- a/service/Core/Pipeline/BaseOrchestrator.cs +++ b/service/Core/Pipeline/BaseOrchestrator.cs @@ -9,8 +9,8 @@ using System.Threading.Tasks; using Microsoft.Extensions.Logging; using Microsoft.KernelMemory.AI; -using Microsoft.KernelMemory.ContentStorage; using Microsoft.KernelMemory.Diagnostics; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.FileSystem.DevTools; using Microsoft.KernelMemory.MemoryStorage; using Microsoft.KernelMemory.Models; @@ -27,7 +27,7 @@ public abstract class BaseOrchestrator : IPipelineOrchestrator, IDisposable private readonly List _embeddingGenerators; private readonly ITextGenerator _textGenerator; private readonly List _defaultIngestionSteps; - private readonly IContentStorage _contentStorage; + private readonly IDocumentStorage _documentStorage; private readonly IMimeTypeDetection _mimeTypeDetection; private readonly string? _defaultIndexName; @@ -35,7 +35,7 @@ public abstract class BaseOrchestrator : IPipelineOrchestrator, IDisposable protected CancellationTokenSource CancellationTokenSource { get; private set; } protected BaseOrchestrator( - IContentStorage contentStorage, + IDocumentStorage documentStorage, List embeddingGenerators, List memoryDbs, ITextGenerator textGenerator, @@ -48,7 +48,7 @@ protected BaseOrchestrator( this.Log = log ?? DefaultLogger.Instance; this._defaultIngestionSteps = config.DataIngestion.GetDefaultStepsOrDefaults(); this.EmbeddingGenerationEnabled = config.DataIngestion.EmbeddingGenerationEnabled; - this._contentStorage = contentStorage; + this._documentStorage = documentStorage; this._embeddingGenerators = embeddingGenerators; this._memoryDbs = memoryDbs; this._textGenerator = textGenerator; @@ -153,7 +153,7 @@ public DataPipeline PrepareNewDocumentUpload( try { - using StreamableFileContent? streamableContent = await this._contentStorage.ReadFileAsync(index, documentId, Constants.PipelineStatusFilename, false, cancellationToken) + using StreamableFileContent? streamableContent = await this._documentStorage.ReadFileAsync(index, documentId, Constants.PipelineStatusFilename, false, cancellationToken) .ConfigureAwait(false); if (streamableContent == null) @@ -178,7 +178,7 @@ public DataPipeline PrepareNewDocumentUpload( return result; } - catch (ContentStorageFileNotFoundException) + catch (DocumentStorageFileNotFoundException) { throw new PipelineNotFoundException("Pipeline/Document not found"); } @@ -226,7 +226,7 @@ public Task StopAllPipelinesAsync() public async Task ReadFileAsStreamAsync(DataPipeline pipeline, string fileName, CancellationToken cancellationToken = default) { pipeline.Index = IndexName.CleanName(pipeline.Index, this._defaultIndexName); - return await this._contentStorage.ReadFileAsync(pipeline.Index, pipeline.DocumentId, fileName, true, cancellationToken) + return await this._documentStorage.ReadFileAsync(pipeline.Index, pipeline.DocumentId, fileName, true, cancellationToken) .ConfigureAwait(false); } @@ -256,7 +256,7 @@ public Task WriteTextFileAsync(DataPipeline pipeline, string fileName, string fi public Task WriteFileAsync(DataPipeline pipeline, string fileName, BinaryData fileContent, CancellationToken cancellationToken = default) { pipeline.Index = IndexName.CleanName(pipeline.Index, this._defaultIndexName); - return this._contentStorage.WriteFileAsync(pipeline.Index, pipeline.DocumentId, fileName, fileContent.ToStream(), cancellationToken); + return this._documentStorage.WriteFileAsync(pipeline.Index, pipeline.DocumentId, fileName, fileContent.ToStream(), cancellationToken); } /// @@ -315,7 +315,7 @@ protected async Task CleanUpAfterCompletionAsync(DataPipeline pipeline, Cancella { try { - await this._contentStorage.DeleteDocumentDirectoryAsync(index: pipeline.Index, documentId: pipeline.DocumentId, cancellationToken).ConfigureAwait(false); + await this._documentStorage.DeleteDocumentDirectoryAsync(index: pipeline.Index, documentId: pipeline.DocumentId, cancellationToken).ConfigureAwait(false); } catch (Exception e) { @@ -327,7 +327,7 @@ protected async Task CleanUpAfterCompletionAsync(DataPipeline pipeline, Cancella { try { - await this._contentStorage.DeleteIndexDirectoryAsync(pipeline.Index, cancellationToken).ConfigureAwait(false); + await this._documentStorage.DeleteIndexDirectoryAsync(pipeline.Index, cancellationToken).ConfigureAwait(false); } catch (Exception e) { @@ -423,7 +423,7 @@ protected async Task UpdatePipelineStatusAsync(DataPipeline pipeline, Cancellati this.Log.LogDebug("Saving pipeline status to '{0}/{1}/{2}'", pipeline.Index, pipeline.DocumentId, Constants.PipelineStatusFilename); try { - await this._contentStorage.WriteFileAsync( + await this._documentStorage.WriteFileAsync( pipeline.Index, pipeline.DocumentId, Constants.PipelineStatusFilename, @@ -447,8 +447,8 @@ private async Task UploadFormFilesAsync(DataPipeline pipeline, CancellationToken { this.Log.LogDebug("Uploading {0} files, pipeline '{1}/{2}'", pipeline.FilesToUpload.Count, pipeline.Index, pipeline.DocumentId); - await this._contentStorage.CreateIndexDirectoryAsync(pipeline.Index, cancellationToken).ConfigureAwait(false); - await this._contentStorage.CreateDocumentDirectoryAsync(pipeline.Index, pipeline.DocumentId, cancellationToken).ConfigureAwait(false); + await this._documentStorage.CreateIndexDirectoryAsync(pipeline.Index, cancellationToken).ConfigureAwait(false); + await this._documentStorage.CreateDocumentDirectoryAsync(pipeline.Index, pipeline.DocumentId, cancellationToken).ConfigureAwait(false); foreach (DocumentUploadRequest.UploadedFile file in pipeline.FilesToUpload) { @@ -462,7 +462,7 @@ private async Task UploadFormFilesAsync(DataPipeline pipeline, CancellationToken var fileSize = file.FileContent.Length; this.Log.LogDebug("Uploading file '{0}', size {1} bytes", file.FileName, fileSize); - await this._contentStorage.WriteFileAsync(pipeline.Index, pipeline.DocumentId, file.FileName, file.FileContent, cancellationToken).ConfigureAwait(false); + await this._documentStorage.WriteFileAsync(pipeline.Index, pipeline.DocumentId, file.FileName, file.FileContent, cancellationToken).ConfigureAwait(false); string mimeType = string.Empty; try diff --git a/service/Core/Pipeline/DistributedPipelineOrchestrator.cs b/service/Core/Pipeline/DistributedPipelineOrchestrator.cs index a36607530..b569c11b4 100644 --- a/service/Core/Pipeline/DistributedPipelineOrchestrator.cs +++ b/service/Core/Pipeline/DistributedPipelineOrchestrator.cs @@ -9,7 +9,7 @@ using System.Threading.Tasks; using Microsoft.Extensions.Logging; using Microsoft.KernelMemory.AI; -using Microsoft.KernelMemory.ContentStorage; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.MemoryStorage; using Microsoft.KernelMemory.Pipeline.Queue; @@ -36,7 +36,7 @@ public sealed class DistributedPipelineOrchestrator : BaseOrchestrator /// Create a new instance of the asynchronous orchestrator /// /// Queue client factory - /// Service used to store files + /// Service used to store files /// Services used to generate embeddings during the ingestion /// Services where to store memory records /// Service used to generate text, e.g. synthetic memory records @@ -45,14 +45,14 @@ public sealed class DistributedPipelineOrchestrator : BaseOrchestrator /// public DistributedPipelineOrchestrator( QueueClientFactory queueClientFactory, - IContentStorage contentStorage, + IDocumentStorage documentStorage, List embeddingGenerators, List memoryDbs, ITextGenerator textGenerator, IMimeTypeDetection? mimeTypeDetection = null, KernelMemoryConfig? config = null, ILogger? log = null) - : base(contentStorage, embeddingGenerators, memoryDbs, textGenerator, mimeTypeDetection, config, log) + : base(documentStorage, embeddingGenerators, memoryDbs, textGenerator, mimeTypeDetection, config, log) { this._queueClientFactory = queueClientFactory; } diff --git a/service/Core/Pipeline/InProcessPipelineOrchestrator.cs b/service/Core/Pipeline/InProcessPipelineOrchestrator.cs index 5d88d42b3..ff2e8941f 100644 --- a/service/Core/Pipeline/InProcessPipelineOrchestrator.cs +++ b/service/Core/Pipeline/InProcessPipelineOrchestrator.cs @@ -10,7 +10,7 @@ using Microsoft.Extensions.Logging; using Microsoft.KernelMemory.AI; using Microsoft.KernelMemory.Configuration; -using Microsoft.KernelMemory.ContentStorage; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.Handlers; using Microsoft.KernelMemory.MemoryStorage; @@ -26,7 +26,7 @@ public sealed class InProcessPipelineOrchestrator : BaseOrchestrator /// /// Create a new instance of the synchronous orchestrator. /// - /// Service used to store files + /// Service used to store files /// Services used to generate embeddings during the ingestion /// Services where to store memory records /// Service used to generate text, e.g. synthetic memory records @@ -35,7 +35,7 @@ public sealed class InProcessPipelineOrchestrator : BaseOrchestrator /// Global KM configuration /// Application logger public InProcessPipelineOrchestrator( - IContentStorage contentStorage, + IDocumentStorage documentStorage, List embeddingGenerators, List memoryDbs, ITextGenerator textGenerator, @@ -43,7 +43,7 @@ public InProcessPipelineOrchestrator( IServiceProvider? serviceProvider = null, KernelMemoryConfig? config = null, ILogger? log = null) - : base(contentStorage, embeddingGenerators, memoryDbs, textGenerator, mimeTypeDetection, config, log) + : base(documentStorage, embeddingGenerators, memoryDbs, textGenerator, mimeTypeDetection, config, log) { this._serviceProvider = serviceProvider; } diff --git a/service/Service.AspNetCore/WebAPIEndpoints.cs b/service/Service.AspNetCore/WebAPIEndpoints.cs index bbe86a395..0e7b44221 100644 --- a/service/Service.AspNetCore/WebAPIEndpoints.cs +++ b/service/Service.AspNetCore/WebAPIEndpoints.cs @@ -13,10 +13,10 @@ using Microsoft.AspNetCore.Mvc; using Microsoft.AspNetCore.Routing; using Microsoft.Extensions.Logging; -using Microsoft.KernelMemory.ContentStorage; using Microsoft.KernelMemory.Service.AspNetCore.Models; using System.IO; using Microsoft.AspNetCore.Http.HttpResults; +using Microsoft.KernelMemory.DocumentStorage; namespace Microsoft.KernelMemory.Service.AspNetCore; @@ -367,7 +367,7 @@ public static void AddGetDownloadEndpoint(this IEndpointRouteBuilder builder, st return response; } - catch (ContentStorageFileNotFoundException e) + catch (DocumentStorageFileNotFoundException e) { return Results.Problem(title: "File not found", detail: e.Message, statusCode: 404); } diff --git a/service/Service/Program.cs b/service/Service/Program.cs index ea1b06e9a..1944e64a0 100644 --- a/service/Service/Program.cs +++ b/service/Service/Program.cs @@ -5,17 +5,17 @@ using System.Linq; using Microsoft.AspNetCore.Builder; using Microsoft.AspNetCore.Http; +using Microsoft.AspNetCore.Mvc; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using Microsoft.KernelMemory.AI; using Microsoft.KernelMemory.Configuration; -using Microsoft.KernelMemory.ContentStorage; using Microsoft.KernelMemory.Diagnostics; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.MemoryStorage; -using Microsoft.KernelMemory.Service.AspNetCore; using Microsoft.KernelMemory.Pipeline; -using Microsoft.AspNetCore.Mvc; +using Microsoft.KernelMemory.Service.AspNetCore; // KM Configuration: // @@ -160,7 +160,7 @@ public static void Main(string[] args) Console.WriteLine("* Web service auth : " + (config.ServiceAuthorization.Enabled ? "Enabled" : "Disabled")); Console.WriteLine("* OpenAPI swagger : " + (config.Service.OpenApiEnabled ? "Enabled" : "Disabled")); Console.WriteLine("* Memory Db : " + app.Services.GetService()?.GetType().FullName); - Console.WriteLine("* Content storage : " + app.Services.GetService()?.GetType().FullName); + Console.WriteLine("* Document storage : " + app.Services.GetService()?.GetType().FullName); Console.WriteLine("* Embedding generation: " + app.Services.GetService()?.GetType().FullName); Console.WriteLine("* Text generation : " + app.Services.GetService()?.GetType().FullName); Console.WriteLine("* Log level : " + app.Logger.GetLogLevelName()); diff --git a/service/Service/README.md b/service/Service/README.md index 8d4898020..adcec7834 100644 --- a/service/Service/README.md +++ b/service/Service/README.md @@ -89,9 +89,17 @@ env var, so the code will use the settings stored in `appsettings.Development.js The service depends on three main components: -- **Content storage**: this is where content like files, chats, emails are - saved and transformed when uploaded. Currently, the solution supports Azure Blobs, - local filesystem and in-memory volatile filesystem. +- **Document storage**: this is where the application stores files, chats, emails, + cache, async job status, and temporary files used during the memory ingestion. + The solution supports Azure Blobs, local filesystem, MongoDb, and in-memory + volatile filesystem. The volatile file system is used by default, but should be + avoided in Production and used only for demos and tests. + +- **Memory storage**: service used to persist embeddings and memory records. + The service supports **Azure AI Search**, **Postgres**, **Qdrant**, **Redis**, + **SQL Server** and other engines, plus a very basic in memory vector storage + with support for persistence on disk called **SimpleVectorDb**. + Unless configured differently, KM uses SimpleVectorDb storing data in memory only. - **Embedding generator**: all the documents uploaded are automatically partitioned (aka "chunked") and indexed for vector search, generating @@ -108,12 +116,6 @@ The service depends on three main components: factor affecting summarization and answer generations, so you might get better results with 16k, 32k and bigger models. -- **Vector storage**: service used to persist embeddings. The - service supports **Azure AI Search**, **Qdrant**, **Redis** and other engines, - plus a very basic in memory vector storage with support for persistence on disk - called **SimpleVectorDb**. Unless configured differently, KM uses SimpleVectorDb - storing data in memory only. - - **Data ingestion orchestration**: this can run in memory and in the same process, e.g. when working with small files, or run as a service, in which case it requires persistent queues like **Azure Queues** or **RabbitMQ**. diff --git a/service/Service/ServiceConfiguration.cs b/service/Service/ServiceConfiguration.cs index c5caf614a..3b1164ed7 100644 --- a/service/Service/ServiceConfiguration.cs +++ b/service/Service/ServiceConfiguration.cs @@ -5,7 +5,7 @@ using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Microsoft.KernelMemory.AI; -using Microsoft.KernelMemory.ContentStorage.DevTools; +using Microsoft.KernelMemory.DocumentStorage.DevTools; using Microsoft.KernelMemory.MemoryDb.SQLServer; using Microsoft.KernelMemory.MemoryStorage; using Microsoft.KernelMemory.MemoryStorage.DevTools; @@ -152,21 +152,21 @@ private void ConfigureQueueDependency(IKernelMemoryBuilder builder) private void ConfigureStorageDependency(IKernelMemoryBuilder builder) { - switch (this._memoryConfiguration.ContentStorageType) + switch (this._memoryConfiguration.DocumentStorageType) { case string x1 when x1.Equals("AzureBlob", StringComparison.OrdinalIgnoreCase): case string x2 when x2.Equals("AzureBlobs", StringComparison.OrdinalIgnoreCase): // Check 2 keys for backward compatibility - builder.Services.AddAzureBlobsAsContentStorage(this.GetServiceConfig("AzureBlobs") - ?? this.GetServiceConfig("AzureBlob")); + builder.Services.AddAzureBlobsAsDocumentStorage(this.GetServiceConfig("AzureBlobs") + ?? this.GetServiceConfig("AzureBlob")); break; case string x when x.Equals("MongoDbAtlas", StringComparison.OrdinalIgnoreCase): - builder.Services.AddMongoDbAtlasAsContentStorage(this.GetServiceConfig("MongoDbAtlas")); + builder.Services.AddMongoDbAtlasAsDocumentStorage(this.GetServiceConfig("MongoDbAtlas")); break; case string x when x.Equals("SimpleFileStorage", StringComparison.OrdinalIgnoreCase): - builder.Services.AddSimpleFileStorageAsContentStorage(this.GetServiceConfig("SimpleFileStorage")); + builder.Services.AddSimpleFileStorageAsDocumentStorage(this.GetServiceConfig("SimpleFileStorage")); break; default: diff --git a/service/Service/appsettings.json b/service/Service/appsettings.json index 86221adfb..e5dc196fe 100644 --- a/service/Service/appsettings.json +++ b/service/Service/appsettings.json @@ -19,7 +19,7 @@ // "Microsoft.KernelMemory.Handlers.TextPartitioningHandler": "Information", // "Microsoft.KernelMemory.Handlers.GenerateEmbeddingsHandler": "Information", // "Microsoft.KernelMemory.Handlers.SaveEmbeddingsHandler": "Information", - // "Microsoft.KernelMemory.ContentStorage.AzureBlobs": "Information", + // "Microsoft.KernelMemory.DocumentStorage.AzureBlobs": "Information", // "Microsoft.KernelMemory.Pipeline.Queue.AzureQueues": "Information", "Microsoft.AspNetCore": "Warning" }, @@ -106,7 +106,7 @@ "AccessKey2": "" }, // "AzureBlobs" or "SimpleFileStorage" - "ContentStorageType": "SimpleFileStorage", + "DocumentStorageType": "SimpleFileStorage", // "AzureOpenAIText", "OpenAI" or "LlamaSharp" "TextGeneratorType": "", // Name of the index to use when none is specified diff --git a/service/tests/Core.FunctionalTests/ParallelHandlersTest.cs b/service/tests/Core.FunctionalTests/ParallelHandlersTest.cs index dbf4072b7..92cb68ae7 100644 --- a/service/tests/Core.FunctionalTests/ParallelHandlersTest.cs +++ b/service/tests/Core.FunctionalTests/ParallelHandlersTest.cs @@ -2,7 +2,7 @@ using System.Diagnostics; using Microsoft.KernelMemory; -using Microsoft.KernelMemory.ContentStorage.DevTools; +using Microsoft.KernelMemory.DocumentStorage.DevTools; using Microsoft.KernelMemory.MemoryStorage.DevTools; using Microsoft.KM.TestHelpers; using Xunit.Abstractions; diff --git a/service/tests/Core.FunctionalTests/ServerLess/SubDirFilesAndStreamsTest.cs b/service/tests/Core.FunctionalTests/ServerLess/SubDirFilesAndStreamsTest.cs index 46d418c48..ce0827c4f 100644 --- a/service/tests/Core.FunctionalTests/ServerLess/SubDirFilesAndStreamsTest.cs +++ b/service/tests/Core.FunctionalTests/ServerLess/SubDirFilesAndStreamsTest.cs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft. All rights reserved. using Microsoft.KernelMemory; -using Microsoft.KernelMemory.ContentStorage.DevTools; +using Microsoft.KernelMemory.DocumentStorage.DevTools; using Microsoft.KernelMemory.FileSystem.DevTools; using Microsoft.KernelMemory.MemoryStorage.DevTools; using Microsoft.KM.TestHelpers; diff --git a/service/tests/Core.FunctionalTests/VectorDbComparison/TestCosineSimilarity.cs b/service/tests/Core.FunctionalTests/VectorDbComparison/TestCosineSimilarity.cs index 002cf6c5d..113486f57 100644 --- a/service/tests/Core.FunctionalTests/VectorDbComparison/TestCosineSimilarity.cs +++ b/service/tests/Core.FunctionalTests/VectorDbComparison/TestCosineSimilarity.cs @@ -15,67 +15,60 @@ namespace Microsoft.KM.Core.FunctionalTests.VectorDbComparison; -public class TestCosineSimilarity(IConfiguration cfg, ITestOutputHelper log) : BaseFunctionalTestCase(cfg, log) +public class TestCosineSimilarity : BaseFunctionalTestCase { private const string IndexName = "test-cosinesimil"; - private readonly ITestOutputHelper _log = log; + // On/Off toggles + private readonly bool _azSearchEnabled = true; + private readonly bool _postgresEnabled = true; + private readonly bool _elasticsearchEnabled = false; + private readonly bool _mongoDbAtlasEnabled = false; + private readonly bool _qdrantEnabled = false; + private readonly bool _redisEnabled = false; - [Fact] - [Trait("Category", "Serverless")] - public async Task CompareCosineSimilarity() - { - bool azSearchEnabled = true; - bool mongoDbAtlasEnabled = false; - bool postgresEnabled = true; - bool qdrantEnabled = false; - bool redisEnabled = false; - bool simpleDbEnabled = true; - bool elasticsearchEnabled = true; + private readonly Dictionary _memoryDbs = new(); + private readonly FakeEmbeddingGenerator _embeddingGenerator; - // == Ctors - var embeddingGenerator = new FakeEmbeddingGenerator(); + public TestCosineSimilarity(IConfiguration cfg, ITestOutputHelper log) : base(cfg, log) + { + this._embeddingGenerator = new FakeEmbeddingGenerator(); - SimpleVectorDb? simpleVecDb = null; - if (simpleDbEnabled) { simpleVecDb = new SimpleVectorDb(this.SimpleVectorDbConfig, embeddingGenerator); } + this._memoryDbs.Add("simple", new SimpleVectorDb(this.SimpleVectorDbConfig, this._embeddingGenerator)); - AzureAISearchMemory? acs = null; - if (azSearchEnabled) { acs = new AzureAISearchMemory(this.AzureAiSearchConfig, embeddingGenerator); } + if (this._azSearchEnabled) { this._memoryDbs.Add("acs", new AzureAISearchMemory(this.AzureAiSearchConfig, this._embeddingGenerator)); } - QdrantMemory? qdrant = null; - if (qdrantEnabled) { qdrant = new QdrantMemory(this.QdrantConfig, embeddingGenerator); } + if (this._mongoDbAtlasEnabled) { this._memoryDbs.Add("mongoDb", new MongoDbAtlasMemory(this.MongoDbAtlasConfig, this._embeddingGenerator)); } - PostgresMemory? postgres = null; - if (postgresEnabled) { postgres = new PostgresMemory(this.PostgresConfig, embeddingGenerator); } + if (this._postgresEnabled) { this._memoryDbs.Add("postgres", new PostgresMemory(this.PostgresConfig, this._embeddingGenerator)); } - MongoDbAtlasMemory? atlasVectorDb = null; - if (mongoDbAtlasEnabled) { atlasVectorDb = new MongoDbAtlasMemory(this.MongoDbAtlasConfig, embeddingGenerator); } + if (this._qdrantEnabled) { this._memoryDbs.Add("qdrant", new QdrantMemory(this.QdrantConfig, this._embeddingGenerator)); } - ElasticsearchMemory? elasticsearch = null; - if (elasticsearchEnabled) { elasticsearch = new ElasticsearchMemory(this.ElasticsearchConfig, embeddingGenerator); } + if (this._elasticsearchEnabled) { this._memoryDbs.Add("es", new ElasticsearchMemory(this.ElasticsearchConfig, this._embeddingGenerator)); } - RedisMemory? redis = null; - if (redisEnabled) + if (this._redisEnabled) { // TODO: revisit RedisMemory not to need this, e.g. not to connect in ctor - var redisMux = await ConnectionMultiplexer.ConnectAsync(this.RedisConfig.ConnectionString); - redis = new RedisMemory(this.RedisConfig, redisMux, embeddingGenerator); + var redisMux = ConnectionMultiplexer.ConnectAsync(this.RedisConfig.ConnectionString); + redisMux.Wait(TimeSpan.FromSeconds(5)); + this._memoryDbs.Add("redis", new RedisMemory(this.RedisConfig, redisMux.Result, this._embeddingGenerator)); } + } - var dbs = new IMemoryDb[] { simpleVecDb!, acs!, postgres!, qdrant!, redis!, atlasVectorDb! }; + [Fact] + [Trait("Category", "Serverless")] + public async Task CompareCosineSimilarity() + { + var target = new[] { 0.01f, 0.5f, 0.41f }; + this._embeddingGenerator.Mock("text01", target); // == Delete indexes left over - - await this.DeleteIndexAsync(IndexName, dbs); - await Task.Delay(TimeSpan.FromSeconds(2)); + await this.DeleteIndexAsync(IndexName); // == Create indexes - - await this.CreateIndexAsync(IndexName, 3, dbs); - await Task.Delay(TimeSpan.FromSeconds(1)); + await this.CreateIndexAsync(IndexName, 3); // == Insert data. Note: records are inserted out of order on purpose. - var records = new Dictionary { ["3"] = new() { Id = "3", Vector = new[] { 0.1f, 0.1f, 0.1f } }, @@ -86,69 +79,68 @@ public async Task CompareCosineSimilarity() ["7"] = new() { Id = "7", Vector = new[] { 0.88f, 0.01f, 0.13f } }, ["6"] = new() { Id = "6", Vector = new[] { 0.81f, 0.12f, 0.13f } }, }; - - foreach (KeyValuePair r in records) - { - await this.UpsertAsync(IndexName, r.Value, dbs); - } - - await Task.Delay(TimeSpan.FromSeconds(2)); + await this.UpsertAsync(IndexName, records); // == Test results: test precision and ordering - - var target = new[] { 0.01f, 0.5f, 0.41f }; - embeddingGenerator.Mock("text01", target); - - await this.TestSimilarityAsync(records, dbs); + await this.TestSimilarityAsync(records); } - private async Task DeleteIndexAsync(string indexName, IMemoryDb[] memoryDbs) + private async Task DeleteIndexAsync(string indexName) { - foreach (var memoryDb in memoryDbs.Where(x => x != null)) + foreach (var memoryDb in this._memoryDbs) { - this._log.WriteLine($"Deleting index {indexName} in {memoryDb.GetType().FullName}"); - await memoryDb.DeleteIndexAsync(indexName); + Console.WriteLine($"Deleting index {indexName} in {memoryDb.Value.GetType().FullName}"); + await memoryDb.Value.DeleteIndexAsync(indexName); } + + await Task.Delay(TimeSpan.FromSeconds(2)); } - private async Task CreateIndexAsync(string indexName, int vectorSize, IMemoryDb[] memoryDbs) + private async Task CreateIndexAsync(string indexName, int vectorSize) { - foreach (var memoryDb in memoryDbs.Where(x => x != null)) + foreach (var memoryDb in this._memoryDbs) { - this._log.WriteLine($"Creating index {indexName} in {memoryDb.GetType().FullName}"); - await memoryDb.CreateIndexAsync(indexName, vectorSize); + Console.WriteLine($"Creating index {indexName} in {memoryDb.Value.GetType().FullName}"); + await memoryDb.Value.CreateIndexAsync(indexName, vectorSize); } + + await Task.Delay(TimeSpan.FromSeconds(1)); } - private async Task UpsertAsync(string indexName, MemoryRecord record, IMemoryDb[] memoryDbs) + private async Task UpsertAsync(string indexName, Dictionary records) { - foreach (var memoryDb in memoryDbs.Where(x => x != null)) + foreach (KeyValuePair record in records) { - this._log.WriteLine($"Adding record in {memoryDb.GetType().FullName}"); - await memoryDb.UpsertAsync(indexName, record); + foreach (var memoryDb in this._memoryDbs) + { + Console.WriteLine($"Adding record in {memoryDb.Value.GetType().FullName}"); + await memoryDb.Value.UpsertAsync(indexName, record.Value); + } } + + await Task.Delay(TimeSpan.FromSeconds(2)); } - private async Task TestSimilarityAsync(Dictionary records, IMemoryDb[] memoryDbs) + private async Task TestSimilarityAsync(Dictionary records) { var target = new[] { 0.01f, 0.5f, 0.41f }; - foreach (var memoryDb in memoryDbs.Where(x => x != null)) + foreach (var memoryDb in this._memoryDbs) { const double Precision = 0.000001d; var previous = "0"; - IAsyncEnumerable<(MemoryRecord, double)> list = memoryDb.GetSimilarListAsync( + IAsyncEnumerable<(MemoryRecord, double)> list = memoryDb.Value.GetSimilarListAsync( index: IndexName, text: "text01", limit: 10, withEmbeddings: true); List<(MemoryRecord, double)> results = await list.ToListAsync(); - this._log.WriteLine($"\n\n{memoryDb.GetType().FullName}: {results.Count} results"); + Console.WriteLine($"\n\n{memoryDb.Value.GetType().FullName}: {results.Count} results"); previous = "0"; foreach ((MemoryRecord? memoryRecord, double actual) in results) { var expected = CosineSim(target, records[memoryRecord.Id].Vector); var diff = expected - actual; - this._log.WriteLine($" - ID: {memoryRecord.Id}, Distance: {actual}, Expected distance: {expected}, Difference: {diff:0.0000000000}"); + Console.WriteLine($" - ID: {memoryRecord.Id}, Distance: {actual}, Expected distance: {expected}, Difference: {diff:0.0000000000}"); Assert.True(Math.Abs(diff) < Precision); Assert.True(string.Compare(memoryRecord.Id, previous, StringComparison.OrdinalIgnoreCase) > 0, "Records are not ordered by similarity"); previous = memoryRecord.Id; diff --git a/service/tests/Core.FunctionalTests/VectorDbComparison/TestMemoryFilters.cs b/service/tests/Core.FunctionalTests/VectorDbComparison/TestMemoryFilters.cs index 2e33fe61c..7ec304cae 100644 --- a/service/tests/Core.FunctionalTests/VectorDbComparison/TestMemoryFilters.cs +++ b/service/tests/Core.FunctionalTests/VectorDbComparison/TestMemoryFilters.cs @@ -4,162 +4,157 @@ using Microsoft.KernelMemory.MemoryDb.AzureAISearch; using Microsoft.KernelMemory.MemoryDb.Elasticsearch; using Microsoft.KernelMemory.MemoryDb.Qdrant; +using Microsoft.KernelMemory.MemoryDb.Redis; using Microsoft.KernelMemory.MemoryStorage; using Microsoft.KernelMemory.MemoryStorage.DevTools; using Microsoft.KernelMemory.MongoDbAtlas; using Microsoft.KernelMemory.Postgres; using Microsoft.KM.TestHelpers; +using StackExchange.Redis; using Xunit.Abstractions; // ReSharper disable MissingBlankLines namespace Microsoft.KM.Core.FunctionalTests.VectorDbComparison; -// #pragma warning disable CS8600 // by design -// #pragma warning disable CS8604 // by design -public class TestMemoryFilters(IConfiguration cfg, ITestOutputHelper log) : BaseFunctionalTestCase(cfg, log) +public class TestMemoryFilters : BaseFunctionalTestCase { private const string IndexName = "test-filters"; - private readonly ITestOutputHelper _log = log; + // On/Off toggles + private readonly bool _azSearchEnabled = true; + private readonly bool _postgresEnabled = true; + private readonly bool _elasticsearchEnabled = false; + private readonly bool _mongoDbAtlasEnabled = false; + private readonly bool _qdrantEnabled = false; + private readonly bool _redisEnabled = false; - [Fact] - [Trait("Category", "Serverless")] - public async Task TestFilters() - { - bool azSearchEnabled = true; - bool mongoDbAtlasEnabled = false; - bool postgresEnabled = true; - bool qdrantEnabled = false; - bool elasticsearchEnabled = false; - - // Booleans used for investigating test failures - const bool DeleteIndex = true; - const bool CreateIndex = true; - const bool CreateRecords = true; + private readonly Dictionary _memoryDbs = new(); - var embeddingGenerator = new FakeEmbeddingGenerator(); + public TestMemoryFilters(IConfiguration cfg, ITestOutputHelper log) : base(cfg, log) + { + FakeEmbeddingGenerator _ = new(); - AzureAISearchMemory acs = null!; - if (azSearchEnabled) { acs = new AzureAISearchMemory(this.AzureAiSearchConfig, embeddingGenerator); } + this._memoryDbs.Add("simple", new SimpleVectorDb(this.SimpleVectorDbConfig, _)); - MongoDbAtlasMemory mongoDbAtlas = null!; - if (mongoDbAtlasEnabled) { mongoDbAtlas = new MongoDbAtlasMemory(this.MongoDbAtlasConfig, embeddingGenerator); } + if (this._azSearchEnabled) { this._memoryDbs.Add("acs", new AzureAISearchMemory(this.AzureAiSearchConfig, _)); } - PostgresMemory postgres = null!; - if (postgresEnabled) { postgres = new PostgresMemory(this.PostgresConfig, embeddingGenerator); } + if (this._mongoDbAtlasEnabled) { this._memoryDbs.Add("mongoDb", new MongoDbAtlasMemory(this.MongoDbAtlasConfig, _)); } - QdrantMemory qdrant = null!; - if (qdrantEnabled) { qdrant = new QdrantMemory(this.QdrantConfig, embeddingGenerator); } + if (this._postgresEnabled) { this._memoryDbs.Add("postgres", new PostgresMemory(this.PostgresConfig, _)); } - ElasticsearchMemory elasticsearch = null!; - if (elasticsearchEnabled) - { - elasticsearch = new ElasticsearchMemory(this.ElasticsearchConfig, embeddingGenerator); - } + if (this._qdrantEnabled) { this._memoryDbs.Add("qdrant", new QdrantMemory(this.QdrantConfig, _)); } - var simpleVecDb = new SimpleVectorDb(this.SimpleVectorDbConfig, embeddingGenerator); + if (this._elasticsearchEnabled) { this._memoryDbs.Add("es", new ElasticsearchMemory(this.ElasticsearchConfig, _)); } - if (DeleteIndex) + if (this._redisEnabled) { - if (azSearchEnabled) { await acs.DeleteIndexAsync(IndexName); } - - if (qdrantEnabled) { await qdrant.DeleteIndexAsync(IndexName); } - - if (postgresEnabled) { await postgres.DeleteIndexAsync(IndexName); } - - if (mongoDbAtlasEnabled) { await mongoDbAtlas.DeleteIndexAsync(IndexName); } - - if (elasticsearchEnabled) { await elasticsearch.DeleteIndexAsync(IndexName); } - - await simpleVecDb.DeleteIndexAsync(IndexName); - - await Task.Delay(TimeSpan.FromSeconds(2)); + // TODO: revisit RedisMemory not to need this, e.g. not to connect in ctor + var redisMux = ConnectionMultiplexer.ConnectAsync(this.RedisConfig.ConnectionString); + redisMux.Wait(TimeSpan.FromSeconds(5)); + this._memoryDbs.Add("redis", new RedisMemory(this.RedisConfig, redisMux.Result, _)); } + } - if (CreateIndex) - { - if (azSearchEnabled) { await acs.CreateIndexAsync(IndexName, 3); } - - if (qdrantEnabled) { await qdrant.CreateIndexAsync(IndexName, 3); } - - if (postgresEnabled) { await postgres.CreateIndexAsync(IndexName, 3); } - - if (mongoDbAtlasEnabled) { await mongoDbAtlas!.CreateIndexAsync(IndexName, 3); } - - if (elasticsearchEnabled) { await elasticsearch.CreateIndexAsync(IndexName, 3); } - - await simpleVecDb.CreateIndexAsync(IndexName, 3); - } + [Fact] + [Trait("Category", "Serverless")] + public async Task TestFilters() + { + // Booleans used for investigating test failures + const bool DeleteIndex = true; + const bool CreateIndex = true; + const bool CreateRecords = true; - if (CreateRecords) + var records = new Dictionary { - var records = new Dictionary - { - ["1"] = new() { Id = "1", Vector = new[] { 0.25f, 0.33f, 0.29f }, Tags = new() { { "user", "Kaylee" }, { "collection", "Work" } } }, - ["2"] = new() { Id = "2", Vector = new[] { 0.25f, 0.25f, 0.35f }, Tags = new() { { "user", "Kaylee" }, { "collection", "Personal" } } }, - ["3"] = new() { Id = "3", Vector = new[] { 0.1f, 0.1f, 0.1f }, Tags = new() { { "user", "Kaylee" }, { "collection", "Family" } } }, - ["4"] = new() { Id = "4", Vector = new[] { 0.05f, 0.91f, 0.03f }, Tags = new() { { "user", "Kaylee" }, { "collection", "Family" } } }, - ["5"] = new() { Id = "5", Vector = new[] { 0.65f, 0.12f, 0.99f }, Tags = new() { { "user", "Kaylee" }, { "collection", "Family" } } }, - ["6"] = new() { Id = "6", Vector = new[] { 0.81f, 0.12f, 0.13f }, Tags = new() { { "user", "Madelynn" }, { "collection", "Personal" } } }, - ["7"] = new() { Id = "7", Vector = new[] { 0.88f, 0.01f, 0.13f }, Tags = new() { { "user", "Madelynn" }, { "collection", "Work" } } }, - }; - - foreach (KeyValuePair r in records) - { - if (azSearchEnabled) { await acs.UpsertAsync(IndexName, r.Value); } - - if (qdrantEnabled) { await qdrant.UpsertAsync(IndexName, r.Value); } - - if (postgresEnabled) { await postgres.UpsertAsync(IndexName, r.Value); } + ["1"] = new() { Id = "1", Vector = new[] { 0.25f, 0.33f, 0.29f }, Tags = new() { { "user", "Kaylee" }, { "collection", "Work" } } }, + ["2"] = new() { Id = "2", Vector = new[] { 0.25f, 0.25f, 0.35f }, Tags = new() { { "user", "Kaylee" }, { "collection", "Personal" } } }, + ["3"] = new() { Id = "3", Vector = new[] { 0.1f, 0.1f, 0.1f }, Tags = new() { { "user", "Kaylee" }, { "collection", "Family" } } }, + ["4"] = new() { Id = "4", Vector = new[] { 0.05f, 0.91f, 0.03f }, Tags = new() { { "user", "Kaylee" }, { "collection", "Family" } } }, + ["5"] = new() { Id = "5", Vector = new[] { 0.65f, 0.12f, 0.99f }, Tags = new() { { "user", "Kaylee" }, { "collection", "Family" } } }, + ["6"] = new() { Id = "6", Vector = new[] { 0.81f, 0.12f, 0.13f }, Tags = new() { { "user", "Madelynn" }, { "collection", "Personal" } } }, + ["7"] = new() { Id = "7", Vector = new[] { 0.88f, 0.01f, 0.13f }, Tags = new() { { "user", "Madelynn" }, { "collection", "Work" } } }, + }; - if (mongoDbAtlasEnabled) { await mongoDbAtlas.UpsertAsync(IndexName, r.Value); } + if (DeleteIndex) { await this.DeleteIndexAsync(IndexName); } - if (elasticsearchEnabled) { await elasticsearch.UpsertAsync(IndexName, r.Value); } + if (CreateIndex) { await this.CreateIndexAsync(IndexName, 3); } - await simpleVecDb.UpsertAsync(IndexName, r.Value); - } - - await Task.Delay(TimeSpan.FromSeconds(2)); - } + if (CreateRecords) { await this.UpsertAsync(IndexName, records); } for (int i = 1; i <= 3; i++) { - if (azSearchEnabled) + Console.WriteLine("\n----- Simple vector DB -----"); + await this.TestVectorDbFiltering(this._memoryDbs["simple"], i); + + if (this._memoryDbs.TryGetValue("acs", out IMemoryDb? acs)) { - this._log.WriteLine("----- Azure AI Search -----"); + Console.WriteLine("----- Azure AI Search -----"); await this.TestVectorDbFiltering(acs, i); } - if (qdrantEnabled) + if (this._memoryDbs.TryGetValue("qdrant", out IMemoryDb? qdrant)) { - this._log.WriteLine("\n----- Qdrant vector DB -----"); + Console.WriteLine("\n----- Qdrant vector DB -----"); await this.TestVectorDbFiltering(qdrant, i); } - if (postgresEnabled) + if (this._memoryDbs.TryGetValue("postgres", out IMemoryDb? postgres)) { - this._log.WriteLine("\n----- Postgres vector DB -----"); + Console.WriteLine("\n----- Postgres vector DB -----"); await this.TestVectorDbFiltering(postgres, i); } - if (mongoDbAtlasEnabled) + if (this._memoryDbs.TryGetValue("mongoDb", out IMemoryDb? mongoDb)) { - this._log.WriteLine("\n----- MongoDB Atlas vector DB -----"); - await this.TestVectorDbFiltering(mongoDbAtlas, i); + Console.WriteLine("\n----- MongoDB Atlas vector DB -----"); + await this.TestVectorDbFiltering(mongoDb, i); } - if (elasticsearchEnabled) + if (this._memoryDbs.TryGetValue("es", out IMemoryDb? es)) { - this._log.WriteLine("\n----- Elasticsearch vector DB -----"); - await this.TestVectorDbFiltering(elasticsearch, i); + Console.WriteLine("\n----- Elasticsearch vector DB -----"); + await this.TestVectorDbFiltering(es, i); } - this._log.WriteLine("\n----- Simple vector DB -----"); - await this.TestVectorDbFiltering(simpleVecDb, i); + Console.WriteLine("\n\n"); + } + } - this._log.WriteLine("\n\n"); + private async Task DeleteIndexAsync(string indexName) + { + foreach (var memoryDb in this._memoryDbs) + { + Console.WriteLine($"Deleting index {indexName} in {memoryDb.Value.GetType().FullName}"); + await memoryDb.Value.DeleteIndexAsync(indexName); } + + await Task.Delay(TimeSpan.FromSeconds(2)); + } + + private async Task CreateIndexAsync(string indexName, int vectorSize) + { + foreach (var memoryDb in this._memoryDbs) + { + Console.WriteLine($"Creating index {indexName} in {memoryDb.Value.GetType().FullName}"); + await memoryDb.Value.CreateIndexAsync(indexName, vectorSize); + } + + await Task.Delay(TimeSpan.FromSeconds(1)); + } + + private async Task UpsertAsync(string indexName, Dictionary records) + { + foreach (KeyValuePair record in records) + { + foreach (var memoryDb in this._memoryDbs) + { + Console.WriteLine($"Adding record in {memoryDb.Value.GetType().FullName}"); + await memoryDb.Value.UpsertAsync(indexName, record.Value); + } + } + + await Task.Delay(TimeSpan.FromSeconds(2)); } // NOTE: result order does not matter, checking result count only @@ -170,10 +165,10 @@ private async Task TestVectorDbFiltering(IMemoryDb vectorDb, int test) { var singleFilter = new List { MemoryFilters.ByTag("user", "Kaylee") }; var singleFilterResults = await vectorDb.GetListAsync(IndexName, filters: singleFilter, limit: int.MaxValue).ToListAsync(); - this._log.WriteLine($"\nSingle memory filter: {singleFilterResults.Count} results"); + Console.WriteLine($"\nSingle memory filter: {singleFilterResults.Count} results"); foreach (MemoryRecord r in singleFilterResults.OrderBy(x => x.Id)) { - this._log.WriteLine($" - ID: {r.Id}, Tags: {string.Join(", ", r.Tags.Select(t => $"{t.Key}: {string.Join(", ", t.Value)}"))}"); + Console.WriteLine($" - ID: {r.Id}, Tags: {string.Join(", ", r.Tags.Select(t => $"{t.Key}: {string.Join(", ", t.Value)}"))}"); } Assert.Equal(5, singleFilterResults.Count); @@ -184,10 +179,10 @@ private async Task TestVectorDbFiltering(IMemoryDb vectorDb, int test) { var singleFilterMultipleTags = new List { MemoryFilters.ByTag("user", "Kaylee").ByTag("collection", "Work") }; var singleFilterMultipleTagsResults = await vectorDb.GetListAsync(IndexName, filters: singleFilterMultipleTags, limit: int.MaxValue).ToListAsync(); - this._log.WriteLine($"\nSingle memory filter with multiple tags: {singleFilterMultipleTagsResults.Count} results"); + Console.WriteLine($"\nSingle memory filter with multiple tags: {singleFilterMultipleTagsResults.Count} results"); foreach (MemoryRecord r in singleFilterMultipleTagsResults.OrderBy(x => x.Id)) { - this._log.WriteLine($" - ID: {r.Id}, Tags: {string.Join(", ", r.Tags.Select(t => $"{t.Key}: {string.Join(", ", t.Value)}"))}"); + Console.WriteLine($" - ID: {r.Id}, Tags: {string.Join(", ", r.Tags.Select(t => $"{t.Key}: {string.Join(", ", t.Value)}"))}"); } Assert.Equal(1, singleFilterMultipleTagsResults.Count); @@ -202,10 +197,10 @@ private async Task TestVectorDbFiltering(IMemoryDb vectorDb, int test) MemoryFilters.ByTag("user", "Madelynn").ByTag("collection", "Personal") }; var multipleFiltersResults = await vectorDb.GetListAsync(IndexName, filters: multipleFilters, limit: int.MaxValue).ToListAsync(); - this._log.WriteLine($"\nMultiple memory filters with multiple tags: {multipleFiltersResults.Count} results"); + Console.WriteLine($"\nMultiple memory filters with multiple tags: {multipleFiltersResults.Count} results"); foreach (MemoryRecord r in multipleFiltersResults.OrderBy(x => x.Id)) { - this._log.WriteLine($" - ID: {r.Id}, Tags: {string.Join(", ", r.Tags.Select(t => $"{t.Key}: {string.Join(", ", t.Value)}"))}"); + Console.WriteLine($" - ID: {r.Id}, Tags: {string.Join(", ", r.Tags.Select(t => $"{t.Key}: {string.Join(", ", t.Value)}"))}"); } Assert.Equal(4, multipleFiltersResults.Count); diff --git a/service/tests/Core.UnitTests/KernelMemoryBuilderTest.cs b/service/tests/Core.UnitTests/KernelMemoryBuilderTest.cs index 10df06c75..033119edd 100644 --- a/service/tests/Core.UnitTests/KernelMemoryBuilderTest.cs +++ b/service/tests/Core.UnitTests/KernelMemoryBuilderTest.cs @@ -3,7 +3,7 @@ using Microsoft.Extensions.DependencyInjection; using Microsoft.KernelMemory; using Microsoft.KernelMemory.AI; -using Microsoft.KernelMemory.ContentStorage; +using Microsoft.KernelMemory.DocumentStorage; using Microsoft.KernelMemory.MemoryStorage; using Microsoft.KernelMemory.Pipeline; using Microsoft.KernelMemory.Pipeline.Queue; @@ -24,7 +24,7 @@ public KernelMemoryBuilderTest(ITestOutputHelper output) : base(output) public void ItBuildsServerlessClients() { // Arrange - var myContentStorage = new Mock(); + var myDocumentStorage = new Mock(); var myMimeTypeDetection = new Mock(); var myTextEmbeddingGenerator = new Mock(); var myTextGenerator = new Mock(); @@ -33,7 +33,7 @@ public void ItBuildsServerlessClients() myTextEmbeddingGenerator.SetupGet(x => x.MaxTokens).Returns(int.MaxValue); var target = new KernelMemoryBuilder() - .WithCustomStorage(myContentStorage.Object) + .WithCustomDocumentStorage(myDocumentStorage.Object) .WithCustomMimeTypeDetection(myMimeTypeDetection.Object) .WithCustomEmbeddingGenerator(myTextEmbeddingGenerator.Object) .WithCustomTextGenerator(myTextGenerator.Object) @@ -54,7 +54,7 @@ public void ItBuildsAsyncClients() var hostServiceCollection = new ServiceCollection(); var myQueue = new Mock(); var myQueueFactory = new QueueClientFactory(() => myQueue.Object); - var myContentStorage = new Mock(); + var myDocumentStorage = new Mock(); var myMimeTypeDetection = new Mock(); var myTextEmbeddingGenerator = new Mock(); var myTextGenerator = new Mock(); @@ -64,7 +64,7 @@ public void ItBuildsAsyncClients() var target = new KernelMemoryBuilder(hostServiceCollection) .WithCustomIngestionQueueClientFactory(myQueueFactory) - .WithCustomStorage(myContentStorage.Object) + .WithCustomDocumentStorage(myDocumentStorage.Object) .WithCustomMimeTypeDetection(myMimeTypeDetection.Object) .WithCustomEmbeddingGenerator(myTextEmbeddingGenerator.Object) .WithCustomTextGenerator(myTextGenerator.Object) @@ -82,13 +82,13 @@ public void ItBuildsAsyncClients() public void ItDetectsMissingEmbeddingGenerator() { // Arrange - var myContentStorage = new Mock(); + var myDocumentStorage = new Mock(); var myMimeTypeDetection = new Mock(); var myTextGenerator = new Mock(); var myMemoryDb = new Mock(); var target = new KernelMemoryBuilder() - .WithCustomStorage(myContentStorage.Object) + .WithCustomDocumentStorage(myDocumentStorage.Object) .WithCustomMimeTypeDetection(myMimeTypeDetection.Object) .WithCustomTextGenerator(myTextGenerator.Object) .WithCustomMemoryDb(myMemoryDb.Object); diff --git a/service/tests/TestHelpers/BaseFunctionalTestCase.cs b/service/tests/TestHelpers/BaseFunctionalTestCase.cs index 525aa5e5e..c85852a1b 100644 --- a/service/tests/TestHelpers/BaseFunctionalTestCase.cs +++ b/service/tests/TestHelpers/BaseFunctionalTestCase.cs @@ -3,7 +3,7 @@ using System.Reflection; using Microsoft.Extensions.Configuration; using Microsoft.KernelMemory; -using Microsoft.KernelMemory.ContentStorage.DevTools; +using Microsoft.KernelMemory.DocumentStorage.DevTools; using Microsoft.KernelMemory.FileSystem.DevTools; using Microsoft.KernelMemory.MemoryStorage.DevTools; using Microsoft.KernelMemory.MongoDbAtlas; diff --git a/tools/InteractiveSetup/Context.cs b/tools/InteractiveSetup/Context.cs index 51b5bea7c..f2f1c634f 100644 --- a/tools/InteractiveSetup/Context.cs +++ b/tools/InteractiveSetup/Context.cs @@ -9,9 +9,9 @@ internal sealed class Context public BoundedBoolean CfgWebService = new(); // Storage - public BoundedBoolean CfgContentStorage = new(initialState: true); + public BoundedBoolean CfgDocumentStorage = new(initialState: true); public BoundedBoolean CfgAzureBlobs = new(); - public BoundedBoolean CfgMongoDbAtlasContentStorage = new(); + public BoundedBoolean CfgMongoDbAtlasDocumentStorage = new(); public BoundedBoolean CfgSimpleFileStorage = new(); // Queues diff --git a/tools/InteractiveSetup/Main.cs b/tools/InteractiveSetup/Main.cs index 9f030ffce..0bba57019 100644 --- a/tools/InteractiveSetup/Main.cs +++ b/tools/InteractiveSetup/Main.cs @@ -32,10 +32,10 @@ public static void InteractiveSetup(string[] args) Services.RabbitMQ.Setup(ctx); SimpleQueues.Setup(ctx); - // Storage - ContentStorageTypeSetup(ctx); + // Document Storage + DocumentStorageTypeSetup(ctx); AzureBlobs.Setup(ctx); - MongoDbAtlasContentStorage.Setup(ctx); + MongoDbAtlasDocumentStorage.Setup(ctx); SimpleFileStorage.Setup(ctx); // Image support @@ -94,8 +94,8 @@ private static void ConfigureItem(Context ctx, string[] items) QueuesTypeSetup(ctx); break; - case string x when x.Equals("ContentStorageType", StringComparison.OrdinalIgnoreCase): - ContentStorageTypeSetup(ctx); + case string x when x.Equals("DocumentStorageType", StringComparison.OrdinalIgnoreCase): + DocumentStorageTypeSetup(ctx); break; case string x when x.Equals("AzureAISearch", StringComparison.OrdinalIgnoreCase): @@ -301,36 +301,36 @@ private static void QueuesTypeSetup(Context ctx) }); } - private static void ContentStorageTypeSetup(Context ctx) + private static void DocumentStorageTypeSetup(Context ctx) { - if (!ctx.CfgContentStorage.Value) { return; } + if (!ctx.CfgDocumentStorage.Value) { return; } var config = AppSettings.GetCurrentConfig(); SetupUI.AskQuestionWithOptions(new QuestionWithOptions { - Title = "Where should the service store files?", + Title = "Where should the service store files? A persistent storage is required to handle updates, downloads, etc.", Options = new List { new("Azure Blobs", - config.ContentStorageType == "AzureBlobs", + config.DocumentStorageType == "AzureBlobs", () => { - AppSettings.Change(x => { x.ContentStorageType = "AzureBlobs"; }); + AppSettings.Change(x => { x.DocumentStorageType = "AzureBlobs"; }); ctx.CfgAzureBlobs.Value = true; }), new("MongoDB Atlas", - config.ContentStorageType == "MongoDbAtlas", + config.DocumentStorageType == "MongoDbAtlas", () => { - AppSettings.Change(x => { x.ContentStorageType = "MongoDbAtlas"; }); - ctx.CfgMongoDbAtlasContentStorage.Value = true; + AppSettings.Change(x => { x.DocumentStorageType = "MongoDbAtlas"; }); + ctx.CfgMongoDbAtlasDocumentStorage.Value = true; }), new("SimpleFileStorage (only for tests, data stored in memory or disk, see config file)", - config.ContentStorageType == "SimpleFileStorage", + config.DocumentStorageType == "SimpleFileStorage", () => { - AppSettings.Change(x => { x.ContentStorageType = "SimpleFileStorage"; }); + AppSettings.Change(x => { x.DocumentStorageType = "SimpleFileStorage"; }); ctx.CfgSimpleFileStorage.Value = true; }), new("-exit-", false, SetupUI.Exit), diff --git a/tools/InteractiveSetup/Services/MongoDbAtlasContentStorage.cs b/tools/InteractiveSetup/Services/MongoDbAtlasDocumentStorage.cs similarity index 83% rename from tools/InteractiveSetup/Services/MongoDbAtlasContentStorage.cs rename to tools/InteractiveSetup/Services/MongoDbAtlasDocumentStorage.cs index 5d9a9291e..ac93b6cb1 100644 --- a/tools/InteractiveSetup/Services/MongoDbAtlasContentStorage.cs +++ b/tools/InteractiveSetup/Services/MongoDbAtlasDocumentStorage.cs @@ -5,13 +5,13 @@ namespace Microsoft.KernelMemory.InteractiveSetup.Services; -internal static class MongoDbAtlasContentStorage +internal static class MongoDbAtlasDocumentStorage { public static void Setup(Context ctx, bool force = false) { - if (!ctx.CfgMongoDbAtlasContentStorage.Value && !force) { return; } + if (!ctx.CfgMongoDbAtlasDocumentStorage.Value && !force) { return; } - ctx.CfgMongoDbAtlasContentStorage.Value = false; + ctx.CfgMongoDbAtlasDocumentStorage.Value = false; const string ServiceName = "MongoDbAtlas"; if (!AppSettings.GetCurrentConfig().Services.TryGetValue(ServiceName, out var config))