Skip to content

Commit

Permalink
New abstractions to support file download (#448)
Browse files Browse the repository at this point in the history
## Motivation and Context (Why the change? What's the scenario?)

Supporting abstractions for new File Download feature. See also PR #415

## High level description (Approach, Design)

* New version 0.40
* Breaking changes on storage interface
* New methods on orchestration interface
* New methods on memory interface
  • Loading branch information
dluc authored May 1, 2024
1 parent a487793 commit e43daf7
Show file tree
Hide file tree
Showing 20 changed files with 175 additions and 9 deletions.
1 change: 1 addition & 0 deletions KernelMemory.sln.DotSettings
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ public void It$SOMENAME$()
<s:Boolean x:Key="/Default/UserDictionary/Words/=skprompt/@EntryIndexedValue">True</s:Boolean>
<s:String x:Key="/Default/CodeInspection/Highlighting/InspectionSeverities/=Xunit_002EXunitTestWithConsoleOutput/@EntryIndexedValue">DO_NOT_SHOW</s:String>
<s:Boolean x:Key="/Default/UserDictionary/Words/=smemory/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=subdir/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=SVCS/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=syntaxes/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=testsettings/@EntryIndexedValue">True</s:Boolean>
Expand Down
7 changes: 7 additions & 0 deletions clients/dotnet/WebClient/MemoryWebClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,13 @@ public async Task<bool> IsDocumentReadyAsync(
return status;
}

#if KernelMemoryDev
public Task<StreamableFileContent> ExportFileAsync(string documentId, string fileName, string? index = null, CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
}
#endif

/// <inheritdoc />
public async Task<SearchResult> SearchAsync(
string query,
Expand Down
1 change: 1 addition & 0 deletions clients/dotnet/WebClient/WebClient.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
<AssemblyName>Microsoft.KernelMemory.WebClient</AssemblyName>
<RootNamespace>Microsoft.KernelMemory</RootNamespace>
<NoWarn>$(NoWarn);CS1591;NU5104;</NoWarn>
<DefineConstants Condition="'$(SolutionName)' == 'KernelMemoryDev'">$(DefineConstants);KernelMemoryDev</DefineConstants>
</PropertyGroup>

<ItemGroup>
Expand Down
4 changes: 2 additions & 2 deletions examples/006-curl-calling-webservice/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Content of [upload-example.sh](upload-example.sh):
```bash
../../tools/upload-file.sh -f test.pdf \
-s http://127.0.0.1:9001 \
-u curlUser \
-p curlUser \
-t "type:test" \
-i curlExample01
```
Expand All @@ -33,7 +33,7 @@ Content of [ask-example.sh](ask-example.sh):

```bash
../../tools/ask.sh -s http://127.0.0.1:9001 \
-u curlUser \
-p curlUser \
-q "tell me about Semantic Kernel" \
-f '"type":["test"]'
```
1 change: 1 addition & 0 deletions extensions/AzureBlobs/AzureBlobs.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
<AssemblyName>Microsoft.KernelMemory.ContentStorage.AzureBlobs</AssemblyName>
<RootNamespace>Microsoft.KernelMemory.ContentStorage.AzureBlobs</RootNamespace>
<NoWarn>$(NoWarn);CA1724;CS1591;</NoWarn>
<DefineConstants Condition="'$(SolutionName)' == 'KernelMemoryDev'">$(DefineConstants);KernelMemoryDev</DefineConstants>
</PropertyGroup>

<ItemGroup>
Expand Down
13 changes: 13 additions & 0 deletions extensions/AzureBlobs/AzureBlobsStorage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,18 @@ public Task WriteFileAsync(
return this.InternalWriteAsync(directoryName, fileName, streamContent, cancellationToken);
}

#if KernelMemoryDev
/// <inheritdoc />
public Task<StreamableFileContent> ReadFileAsync(
string index,
string documentId,
string fileName,
bool logErrIfNotFound = true,
CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
}
#else
/// <inheritdoc />
public async Task<BinaryData> ReadFileAsync(
string index,
Expand Down Expand Up @@ -220,6 +232,7 @@ public async Task<BinaryData> ReadFileAsync(
throw new ContentStorageFileNotFoundException("File not found", e);
}
}
#endif

#region private

Expand Down
1 change: 1 addition & 0 deletions extensions/MongoDbAtlas/MongoDbAtlas/MongoDbAtlas.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
<AssemblyName>Microsoft.KernelMemory.MongoDbAtlas</AssemblyName>
<RootNamespace>Microsoft.KernelMemory.MongoDbAtlas</RootNamespace>
<NoWarn>$(NoWarn);CA1724;CS1591;CA1308;</NoWarn>
<DefineConstants Condition="'$(SolutionName)' == 'KernelMemoryDev'">$(DefineConstants);KernelMemoryDev</DefineConstants>
</PropertyGroup>

<ItemGroup>
Expand Down
14 changes: 14 additions & 0 deletions extensions/MongoDbAtlas/MongoDbAtlas/MongoDbAtlasStorage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,19 @@ public Task CreateDocumentDirectoryAsync(string index, string documentId,
return Task.CompletedTask;
}

#if KernelMemoryDev
/// <inheritdoc />
public Task<StreamableFileContent> ReadFileAsync(
string index,
string documentId,
string fileName,
bool logErrIfNotFound = true,
CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
}
#else
/// <inheritdoc />
public async Task<BinaryData> ReadFileAsync(string index, string documentId, string fileName, bool logErrIfNotFound = true,
CancellationToken cancellationToken = new CancellationToken())
{
Expand Down Expand Up @@ -189,6 +202,7 @@ public async Task<BinaryData> ReadFileAsync(string index, string documentId, str
return new BinaryData(memoryStream.ToArray());
}
}
#endif

private async Task SaveDocumentAsync(string index, string id, BsonDocument doc, CancellationToken cancellationToken)
{
Expand Down
2 changes: 1 addition & 1 deletion extensions/TikToken/TikToken/TikToken.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<RollForward>LatestMajor</RollForward>
<AssemblyName>Microsoft.KernelMemory.AI.TikToken</AssemblyName>
<RootNamespace>Microsoft.KernelMemory.AI.TikToken</RootNamespace>
<NoWarn>$(NoWarn);</NoWarn>
<NoWarn>$(NoWarn);NU5104;</NoWarn>
</PropertyGroup>

<ItemGroup>
Expand Down
2 changes: 1 addition & 1 deletion nuget-package.props
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project>
<PropertyGroup>
<!-- Central version prefix - applies to all nuget packages. -->
<Version>0.39.0</Version>
<Version>0.40.0</Version>

<!-- These are set at the project level-->
<IsPackable>false</IsPackable>
Expand Down
4 changes: 4 additions & 0 deletions service/Abstractions/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ public static class Constants
// Form field containing the Document ID
public const string WebServiceDocumentIdField = "documentId";

// Form field containing the Filename
public const string WebServiceFilenameField = "filename";

// Form field containing the list of tags
public const string WebServiceTagsField = "tags";

Expand Down Expand Up @@ -50,6 +53,7 @@ public static class Constants
// Endpoints
public const string HttpAskEndpoint = "/ask";
public const string HttpSearchEndpoint = "/search";
public const string HttpDownloadEndpoint = "/download";
public const string HttpUploadEndpoint = "/upload";
public const string HttpUploadStatusEndpoint = "/upload-status";
public const string HttpDocumentsEndpoint = "/documents";
Expand Down
3 changes: 1 addition & 2 deletions service/Abstractions/ContentStorage/IContentStorage.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
Expand Down Expand Up @@ -84,7 +83,7 @@ Task WriteFileAsync(
/// <param name="logErrIfNotFound">Whether to log an error if the file does not exist. An exception will be raised anyway.</param>
/// <param name="cancellationToken">Async task cancellation token</param>
/// <returns>File content</returns>
Task<BinaryData> ReadFileAsync(
Task<StreamableFileContent> ReadFileAsync(
string index,
string documentId,
string fileName,
Expand Down
14 changes: 14 additions & 0 deletions service/Abstractions/IKernelMemory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,20 @@ public Task<bool> IsDocumentReadyAsync(
string? index = null,
CancellationToken cancellationToken = default);

/// <summary>
/// Export a file from content storage
/// </summary>
/// <param name="documentId">ID of the document containing the file</param>
/// <param name="fileName">File name</param>
/// <param name="index">Index containing the document</param>
/// <param name="cancellationToken">Async task cancellation token</param>
/// <returns>File content</returns>
public Task<StreamableFileContent> ExportFileAsync(
string documentId,
string fileName,
string? index = null,
CancellationToken cancellationToken = default);

/// <summary>
/// Search the given index for a list of relevant documents for the given query.
/// </summary>
Expand Down
53 changes: 53 additions & 0 deletions service/Abstractions/Models/StreamableFileContent.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.IO;
using System.Threading.Tasks;

namespace Microsoft.KernelMemory;

public sealed class StreamableFileContent : IDisposable
{
private Stream? _stream;

public string FileName { get; } = string.Empty;
public long FileSize { get; } = 0;
public string FileType { get; } = string.Empty;
public DateTimeOffset LastWrite { get; } = default;
public Func<Task<Stream>> StreamAsync { get; }

public StreamableFileContent()
{
this.StreamAsync = () => Task.FromResult<Stream>(new MemoryStream());
}

public StreamableFileContent(
string fileName,
long fileSize,
string fileType = "application/octet-stream",
DateTimeOffset lastWriteTimeUtc = default,
Func<Task<Stream>>? asyncStreamDelegate = null)
{
ArgumentNullExceptionEx.ThrowIfNullOrWhiteSpace(fileType, nameof(fileType), "File content type is empty");
ArgumentNullExceptionEx.ThrowIfNull(lastWriteTimeUtc, nameof(lastWriteTimeUtc), "File last write time is NULL");
ArgumentNullExceptionEx.ThrowIfNull(asyncStreamDelegate, nameof(asyncStreamDelegate), "asyncStreamDelegate is NULL");

this.FileName = fileName;
this.FileSize = fileSize;
this.FileType = fileType;
this.LastWrite = lastWriteTimeUtc;
this.StreamAsync = async () =>
{
this._stream = await asyncStreamDelegate().ConfigureAwait(false);
return this._stream;
};
}

public void Dispose()
{
if (this._stream == null) { return; }

this._stream.Close();
this._stream.Dispose();
}
}
9 changes: 9 additions & 0 deletions service/Abstractions/Pipeline/IPipelineOrchestrator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,15 @@ public interface IPipelineOrchestrator
/// </summary>
Task StopAllPipelinesAsync();

/// <summary>
/// Fetch a file from content storage, streaming its content and details
/// </summary>
/// <param name="pipeline">Pipeline containing the file</param>
/// <param name="fileName">Name of the file to fetch</param>
/// <param name="cancellationToken">Async task cancellation token</param>
/// <returns>File data</returns>
Task<StreamableFileContent> ReadFileAsStreamAsync(DataPipeline pipeline, string fileName, CancellationToken cancellationToken = default);

/// <summary>
/// Fetch a file from content storage
/// </summary>
Expand Down
13 changes: 13 additions & 0 deletions service/Core/ContentStorage/DevTools/SimpleFileStorage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,18 @@ public async Task WriteFileAsync(
await this._fileSystem.WriteFileAsync(volume: index, relPath: documentId, fileName: fileName, streamContent: streamContent, cancellationToken).ConfigureAwait(false);
}

#if KernelMemoryDev
/// <inheritdoc />
public Task<StreamableFileContent> ReadFileAsync(
string index,
string documentId,
string fileName,
bool logErrIfNotFound = true,
CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
}
#else
/// <inheritdoc />
public async Task<BinaryData> ReadFileAsync(
string index,
Expand All @@ -113,4 +125,5 @@ public async Task<BinaryData> ReadFileAsync(
throw new ContentStorageFileNotFoundException("File not found");
}
}
#endif
}
8 changes: 8 additions & 0 deletions service/Core/MemoryServerless.cs
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,14 @@ public async Task<bool> IsDocumentReadyAsync(
}
}

#if KernelMemoryDev
/// <inheritdoc />
public Task<StreamableFileContent> ExportFileAsync(string documentId, string fileName, string? index = null, CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
}
#endif

/// <inheritdoc />
public Task<SearchResult> SearchAsync(
string query,
Expand Down
8 changes: 8 additions & 0 deletions service/Core/MemoryService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,14 @@ public Task<bool> IsDocumentReadyAsync(
return this._orchestrator.ReadPipelineSummaryAsync(index: index, documentId, cancellationToken);
}

#if KernelMemoryDev
/// <inheritdoc />
public Task<StreamableFileContent> ExportFileAsync(string documentId, string fileName, string? index = null, CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
}
#endif

/// <inheritdoc />
public Task<SearchResult> SearchAsync(
string query,
Expand Down
20 changes: 20 additions & 0 deletions service/Core/Pipeline/BaseOrchestrator.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
// Copyright (c) Microsoft. All rights reserved.

// ReSharper disable RedundantUsingDirective
#pragma warning disable CS0162 // temp
#pragma warning disable CS1998 // temp
#pragma warning disable IDE0005 // temp

using System;
using System.Collections.Generic;
using System.Linq;
Expand Down Expand Up @@ -147,6 +152,9 @@ public DataPipeline PrepareNewDocumentUpload(
///<inheritdoc />
public async Task<DataPipeline?> ReadPipelineStatusAsync(string index, string documentId, CancellationToken cancellationToken = default)
{
#if KernelMemoryDev
throw new NotImplementedException();
#else
index = IndexName.CleanName(index, this._defaultIndexName);

try
Expand All @@ -172,6 +180,7 @@ public DataPipeline PrepareNewDocumentUpload(
{
throw new PipelineNotFoundException("Pipeline/Document not found");
}
#endif
}

///<inheritdoc />
Expand Down Expand Up @@ -212,6 +221,13 @@ public Task StopAllPipelinesAsync()
return this.CancellationTokenSource.CancelAsync();
}

#if KernelMemoryDev
public Task<StreamableFileContent> ReadFileAsStreamAsync(DataPipeline pipeline, string fileName, CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
}
#endif

///<inheritdoc />
public async Task<string> ReadTextFileAsync(DataPipeline pipeline, string fileName, CancellationToken cancellationToken = default)
{
Expand All @@ -223,7 +239,11 @@ public async Task<string> ReadTextFileAsync(DataPipeline pipeline, string fileNa
public Task<BinaryData> ReadFileAsync(DataPipeline pipeline, string fileName, CancellationToken cancellationToken = default)
{
pipeline.Index = IndexName.CleanName(pipeline.Index, this._defaultIndexName);
#if KernelMemoryDev
throw new NotImplementedException();
#else
return this._contentStorage.ReadFileAsync(pipeline.Index, pipeline.DocumentId, fileName, true, cancellationToken);
#endif
}

///<inheritdoc />
Expand Down
6 changes: 3 additions & 3 deletions tools/upload-file.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Usage:
Example:
./upload-file.sh -s http://127.0.0.1:9001 -f myFile.pdf -u me -t "type:notes" -t "type:test" -i "bash test"
./upload-file.sh -s http://127.0.0.1:9001 -f myFile.pdf -p me -t "type:notes" -t "type:test" -i "bash test"
For more information visit https://github.com/microsoft/kernel-memory
Expand Down Expand Up @@ -69,11 +69,11 @@ readParameters() {

validateParameters() {
if [ -z "$SERVICE_URL" ]; then
echo "Please specify the web service URL"
echo "Please specify the web service URL. Use -h option for instructions."
exit 1
fi
if [ -z "$FILENAME" ]; then
echo "Please specify a file to upload"
echo "Please specify a file to upload. Use -h option for instructions."
exit 3
fi
if [ -d "$FILENAME" ]; then
Expand Down

0 comments on commit e43daf7

Please sign in to comment.