From 6c3689e2488e6a28277f8e71b785299af7a65db6 Mon Sep 17 00:00:00 2001 From: Devis Lucato Date: Thu, 19 Sep 2024 11:11:59 -0700 Subject: [PATCH] Upgrade packages, fix issue with OpenXml (#792) Azure: * Azure.Storage.Blobs * Azure.Storage.Queues Document parsing: * ClosedXML 0.102.3 -> 0.104.0-rc1 * DocumentFormat.OpenXml 2.20.0 -> 3.1.0 SK: * Microsoft.SemanticKernel 1.18.2 -> 1.20.0 Misc: * AWSSDK * Microsoft.ML.Tokenizers * OllamaSharp * NRedisStack --- Directory.Build.props | 2 +- Directory.Packages.props | 48 +++++++------------ .../OpenAI/Tokenizers/DefaultGPTTokenizer.cs | 2 +- .../OpenAI/OpenAI/Tokenizers/GPT2Tokenizer.cs | 4 +- .../OpenAI/OpenAI/Tokenizers/GPT3Tokenizer.cs | 4 +- .../OpenAI/OpenAI/Tokenizers/GPT4Tokenizer.cs | 4 +- .../OpenAI/Tokenizers/GPT4oTokenizer.cs | 4 +- service/Core/Core.csproj | 2 +- 8 files changed, 29 insertions(+), 41 deletions(-) diff --git a/Directory.Build.props b/Directory.Build.props index 83ec4b64b..fba690613 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -2,7 +2,7 @@ - 0.73.0 + 0.74.0 12 diff --git a/Directory.Packages.props b/Directory.Packages.props index 1a5ae16dd..baa27d297 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -4,15 +4,17 @@ - + - - + + + - + + @@ -33,18 +35,18 @@ - - - + + + - + - + @@ -52,26 +54,12 @@ - - - - - - - - - - - + + + + @@ -89,11 +77,11 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive @@ -112,7 +100,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + \ No newline at end of file diff --git a/extensions/OpenAI/OpenAI/Tokenizers/DefaultGPTTokenizer.cs b/extensions/OpenAI/OpenAI/Tokenizers/DefaultGPTTokenizer.cs index a4f52da57..18d55447f 100644 --- a/extensions/OpenAI/OpenAI/Tokenizers/DefaultGPTTokenizer.cs +++ b/extensions/OpenAI/OpenAI/Tokenizers/DefaultGPTTokenizer.cs @@ -9,7 +9,7 @@ namespace Microsoft.KernelMemory.AI.OpenAI; public static class DefaultGPTTokenizer { - private static readonly Tokenizer s_tokenizer = Tokenizer.CreateTiktokenForModel( + private static readonly Tokenizer s_tokenizer = TiktokenTokenizer.CreateForModel( "gpt-4", new Dictionary { { "<|im_start|>", 100264 }, { "<|im_end|>", 100265 } }); public static int StaticCountTokens(string text) diff --git a/extensions/OpenAI/OpenAI/Tokenizers/GPT2Tokenizer.cs b/extensions/OpenAI/OpenAI/Tokenizers/GPT2Tokenizer.cs index 6e6e5ba78..8b3df3559 100644 --- a/extensions/OpenAI/OpenAI/Tokenizers/GPT2Tokenizer.cs +++ b/extensions/OpenAI/OpenAI/Tokenizers/GPT2Tokenizer.cs @@ -13,7 +13,7 @@ namespace Microsoft.KernelMemory.AI.OpenAI; /// public sealed class GPT2Tokenizer : ITextTokenizer { - private static readonly Tokenizer s_tokenizer = Tokenizer.CreateTiktokenForModel("gpt2"); + private static readonly Tokenizer s_tokenizer = TiktokenTokenizer.CreateForModel("gpt2"); /// public int CountTokens(string text) @@ -24,6 +24,6 @@ public int CountTokens(string text) /// public IReadOnlyList GetTokens(string text) { - return s_tokenizer.Encode(text, out string? _).Select(t => t.Value).ToList(); + return s_tokenizer.EncodeToTokens(text, out string? _).Select(t => t.Value).ToList(); } } diff --git a/extensions/OpenAI/OpenAI/Tokenizers/GPT3Tokenizer.cs b/extensions/OpenAI/OpenAI/Tokenizers/GPT3Tokenizer.cs index b7ca2dee1..e7d03d721 100644 --- a/extensions/OpenAI/OpenAI/Tokenizers/GPT3Tokenizer.cs +++ b/extensions/OpenAI/OpenAI/Tokenizers/GPT3Tokenizer.cs @@ -13,7 +13,7 @@ namespace Microsoft.KernelMemory.AI.OpenAI; /// public sealed class GPT3Tokenizer : ITextTokenizer { - private static readonly Tokenizer s_tokenizer = Tokenizer.CreateTiktokenForModel("text-davinci-003"); + private static readonly Tokenizer s_tokenizer = TiktokenTokenizer.CreateForModel("text-davinci-003"); /// public int CountTokens(string text) @@ -24,6 +24,6 @@ public int CountTokens(string text) /// public IReadOnlyList GetTokens(string text) { - return s_tokenizer.Encode(text, out string? _).Select(t => t.Value).ToList(); + return s_tokenizer.EncodeToTokens(text, out string? _).Select(t => t.Value).ToList(); } } diff --git a/extensions/OpenAI/OpenAI/Tokenizers/GPT4Tokenizer.cs b/extensions/OpenAI/OpenAI/Tokenizers/GPT4Tokenizer.cs index 0f97a13b7..5cef0f5cc 100644 --- a/extensions/OpenAI/OpenAI/Tokenizers/GPT4Tokenizer.cs +++ b/extensions/OpenAI/OpenAI/Tokenizers/GPT4Tokenizer.cs @@ -13,7 +13,7 @@ namespace Microsoft.KernelMemory.AI.OpenAI; /// public sealed class GPT4Tokenizer : ITextTokenizer { - private static readonly Tokenizer s_tokenizer = Tokenizer.CreateTiktokenForModel("gpt-4", + private static readonly Tokenizer s_tokenizer = TiktokenTokenizer.CreateForModel("gpt-4", new Dictionary { { "<|im_start|>", 100264 }, { "<|im_end|>", 100265 } }); /// @@ -25,6 +25,6 @@ public int CountTokens(string text) /// public IReadOnlyList GetTokens(string text) { - return s_tokenizer.Encode(text, out string? _).Select(t => t.Value).ToList(); + return s_tokenizer.EncodeToTokens(text, out string? _).Select(t => t.Value).ToList(); } } diff --git a/extensions/OpenAI/OpenAI/Tokenizers/GPT4oTokenizer.cs b/extensions/OpenAI/OpenAI/Tokenizers/GPT4oTokenizer.cs index 7609d3aec..a0052c803 100644 --- a/extensions/OpenAI/OpenAI/Tokenizers/GPT4oTokenizer.cs +++ b/extensions/OpenAI/OpenAI/Tokenizers/GPT4oTokenizer.cs @@ -14,7 +14,7 @@ namespace Microsoft.KernelMemory.AI.OpenAI; // ReSharper disable once InconsistentNaming public sealed class GPT4oTokenizer : ITextTokenizer { - private static readonly Tokenizer s_tokenizer = Tokenizer.CreateTiktokenForModel("gpt-4o", + private static readonly Tokenizer s_tokenizer = TiktokenTokenizer.CreateForModel("gpt-4o", new Dictionary { { "<|im_start|>", 100264 }, { "<|im_end|>", 100265 } }); /// @@ -26,6 +26,6 @@ public int CountTokens(string text) /// public IReadOnlyList GetTokens(string text) { - return s_tokenizer.Encode(text, out string? _).Select(t => t.Value).ToList(); + return s_tokenizer.EncodeToTokens(text, out string? _).Select(t => t.Value).ToList(); } } diff --git a/service/Core/Core.csproj b/service/Core/Core.csproj index e60ba40a7..ad64da9a3 100644 --- a/service/Core/Core.csproj +++ b/service/Core/Core.csproj @@ -5,7 +5,7 @@ LatestMajor Microsoft.KernelMemory.Core Microsoft.KernelMemory - $(NoWarn);KMEXP00;KMEXP01;KMEXP02;KMEXP03;KMEXP04;SKEXP0001;SKEXP0011;CA2208;CA1308;CA1724; + $(NoWarn);KMEXP00;KMEXP01;KMEXP02;KMEXP03;KMEXP04;SKEXP0001;SKEXP0011;CA2208;CA1308;CA1724;NU5104;