diff --git a/src/Http/Headers/src/ContentDispositionHeaderValue.cs b/src/Http/Headers/src/ContentDispositionHeaderValue.cs index 31b88417b603..3fa8ab2a2aa1 100644 --- a/src/Http/Headers/src/ContentDispositionHeaderValue.cs +++ b/src/Http/Headers/src/ContentDispositionHeaderValue.cs @@ -631,7 +631,7 @@ private static string Encode5987(StringSegment input) int totalBytesConsumed = 0; while (totalBytesConsumed < inputBytes.Length) { - if (inputBytes[totalBytesConsumed] <= 0x7F) + if (Ascii.IsValid(inputBytes[totalBytesConsumed])) { // This is an ASCII char. Let's handle it ourselves. diff --git a/src/Http/Routing/src/Matching/Ascii.cs b/src/Http/Routing/src/Matching/Ascii.cs deleted file mode 100644 index 081af0bb070f..000000000000 --- a/src/Http/Routing/src/Matching/Ascii.cs +++ /dev/null @@ -1,73 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -namespace Microsoft.AspNetCore.Routing.Matching; - -internal static class Ascii -{ - // case-sensitive equality comparison when we KNOW that 'a' is in the ASCII range - // and we know that the spans are the same length. - // - // Similar to https://github.com/dotnet/coreclr/blob/master/src/System.Private.CoreLib/shared/System/Globalization/CompareInfo.cs#L549 - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool AsciiIgnoreCaseEquals(ReadOnlySpan a, ReadOnlySpan b, int length) - { - // The caller should have checked the length. We enforce that here by THROWING if the - // lengths are unequal. - if (a.Length < length || b.Length < length) - { - // This should never happen, but we don't want to have undefined - // behavior if it does. - ThrowArgumentExceptionForLength(); - } - - ref var charA = ref MemoryMarshal.GetReference(a); - ref var charB = ref MemoryMarshal.GetReference(b); - - // Iterates each span for the provided length and compares each character - // case-insensitively. This looks funky because we're using unsafe operations - // to elide bounds-checks. - while (length > 0 && AsciiIgnoreCaseEquals(charA, charB)) - { - charA = ref Unsafe.Add(ref charA, 1); - charB = ref Unsafe.Add(ref charB, 1); - length--; - } - - return length == 0; - } - - // case-insensitive equality comparison for characters in the ASCII range - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool AsciiIgnoreCaseEquals(char charA, char charB) - { - const uint AsciiToLower = 0x20; - return - // Equal when chars are exactly equal - charA == charB || - - // Equal when converted to-lower AND they are letters - ((charA | AsciiToLower) == (charB | AsciiToLower) && (uint)((charA | AsciiToLower) - 'a') <= (uint)('z' - 'a')); - } - - public static bool IsAscii(string text) - { - for (var i = 0; i < text.Length; i++) - { - if (text[i] > (char)0x7F) - { - return false; - } - } - - return true; - } - - private static void ThrowArgumentExceptionForLength() - { - throw new ArgumentException("length"); - } -} diff --git a/src/Http/Routing/src/Matching/JumpTableBuilder.cs b/src/Http/Routing/src/Matching/JumpTableBuilder.cs index b08d4ec0eb05..5509ad659a7f 100644 --- a/src/Http/Routing/src/Matching/JumpTableBuilder.cs +++ b/src/Http/Routing/src/Matching/JumpTableBuilder.cs @@ -3,6 +3,7 @@ using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; +using System.Text; namespace Microsoft.AspNetCore.Routing.Matching; @@ -42,7 +43,7 @@ public static JumpTable Build(int defaultDestination, int exitDestination, (stri // The IL Emit jump table is not faster for a single entry - but we have an optimized version when all text // is ASCII - if (pathEntries.Length == 1 && Ascii.IsAscii(pathEntries[0].text)) + if (pathEntries.Length == 1 && Ascii.IsValid(pathEntries[0].text)) { var entry = pathEntries[0]; return new SingleEntryAsciiJumpTable(defaultDestination, exitDestination, entry.text, entry.destination); diff --git a/src/Http/Routing/src/Matching/SingleEntryAsciiJumpTable.cs b/src/Http/Routing/src/Matching/SingleEntryAsciiJumpTable.cs index 5fc96754543d..5c8bc5d6a8d2 100644 --- a/src/Http/Routing/src/Matching/SingleEntryAsciiJumpTable.cs +++ b/src/Http/Routing/src/Matching/SingleEntryAsciiJumpTable.cs @@ -1,6 +1,9 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics; +using System.Text; + namespace Microsoft.AspNetCore.Routing.Matching; // Optimized implementation for cases where we know that we're @@ -41,7 +44,9 @@ public override int GetDestination(string path, PathSegment segment) var a = path.AsSpan(segment.Start, length); var b = text.AsSpan(); - return Ascii.AsciiIgnoreCaseEquals(a, b, length) ? _destination : _defaultDestination; + Debug.Assert(a.Length == b.Length && b.Length == length); + + return Ascii.EqualsIgnoreCase(a, b) ? _destination : _defaultDestination; } public override string DebuggerToString() diff --git a/src/Http/Routing/test/UnitTests/Matching/AsciiTest.cs b/src/Http/Routing/test/UnitTests/Matching/AsciiTest.cs deleted file mode 100644 index bc40b2244c1c..000000000000 --- a/src/Http/Routing/test/UnitTests/Matching/AsciiTest.cs +++ /dev/null @@ -1,110 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -namespace Microsoft.AspNetCore.Routing.Matching; - -// Note that while we don't intend for this code to be used with non-ASCII test, -// we still call into these methods with some non-ASCII characters so that -// we are sure of how it behaves. -public class AsciiTest -{ - [Fact] - public void IsAscii_ReturnsTrueForAscii() - { - // Arrange - var text = "abcd\u007F"; - - // Act - var result = Ascii.IsAscii(text); - - // Assert - Assert.True(result); - } - - [Fact] - public void IsAscii_ReturnsFalseForNonAscii() - { - // Arrange - var text = "abcd\u0080"; - - // Act - var result = Ascii.IsAscii(text); - - // Assert - Assert.False(result); - } - - [Theory] - - // Identity - [InlineData('c', 'c')] - [InlineData('C', 'C')] - [InlineData('#', '#')] - [InlineData('\u0080', '\u0080')] - - // Case-insensitive - [InlineData('c', 'C')] - public void AsciiIgnoreCaseEquals_ReturnsTrue(char x, char y) - { - // Arrange - - // Act - var result = Ascii.AsciiIgnoreCaseEquals(x, y); - - // Assert - Assert.True(result); - } - - [Theory] - - // Different letter - [InlineData('c', 'd')] - [InlineData('C', 'D')] - - // Non-letter + casing difference - 'a' and 'A' are 32 bits apart and so are ' ' and '@' - [InlineData(' ', '@')] - [InlineData('\u0080', '\u0080' + 32)] // Outside of ASCII range - public void AsciiIgnoreCaseEquals_ReturnsFalse(char x, char y) - { - // Arrange - - // Act - var result = Ascii.AsciiIgnoreCaseEquals(x, y); - - // Assert - Assert.False(result); - } - - [Theory] - [InlineData("", "", 0)] - [InlineData("abCD", "abcF", 3)] - [InlineData("ab#\u0080-$%", "Ab#\u0080-$%", 7)] - public void UnsafeAsciiIgnoreCaseEquals_ReturnsTrue(string x, string y, int length) - { - // Arrange - var spanX = x.AsSpan(); - var spanY = y.AsSpan(); - - // Act - var result = Ascii.AsciiIgnoreCaseEquals(spanX, spanY, length); - - // Assert - Assert.True(result); - } - - [Theory] - [InlineData("abcD", "abCE", 4)] - [InlineData("ab#\u0080-$%", "Ab#\u0081-$%", 7)] - public void UnsafeAsciiIgnoreCaseEquals_ReturnsFalse(string x, string y, int length) - { - // Arrange - var spanX = x.AsSpan(); - var spanY = y.AsSpan(); - - // Act - var result = Ascii.AsciiIgnoreCaseEquals(spanX, spanY, length); - - // Assert - Assert.False(result); - } -} diff --git a/src/Servers/Kestrel/Core/src/Internal/Infrastructure/HttpUtilities.cs b/src/Servers/Kestrel/Core/src/Internal/Infrastructure/HttpUtilities.cs index d6b6977866fe..3fa594841221 100644 --- a/src/Servers/Kestrel/Core/src/Internal/Infrastructure/HttpUtilities.cs +++ b/src/Servers/Kestrel/Core/src/Internal/Infrastructure/HttpUtilities.cs @@ -54,7 +54,10 @@ private static ulong GetAsciiStringAsLong(string str) { Debug.Assert(str.Length == 8, "String must be exactly 8 (ASCII) characters long."); - var bytes = Encoding.ASCII.GetBytes(str); + Span bytes = stackalloc byte[8]; + OperationStatus operationStatus = Ascii.FromUtf16(str, bytes, out _); + + Debug.Assert(operationStatus == OperationStatus.Done); return BinaryPrimitives.ReadUInt64LittleEndian(bytes); } @@ -63,7 +66,11 @@ private static uint GetAsciiStringAsInt(string str) { Debug.Assert(str.Length == 4, "String must be exactly 4 (ASCII) characters long."); - var bytes = Encoding.ASCII.GetBytes(str); + Span bytes = stackalloc byte[4]; + OperationStatus operationStatus = Ascii.FromUtf16(str, bytes, out _); + + Debug.Assert(operationStatus == OperationStatus.Done); + return BinaryPrimitives.ReadUInt32LittleEndian(bytes); } diff --git a/src/Shared/ServerInfrastructure/StringUtilities.cs b/src/Shared/ServerInfrastructure/StringUtilities.cs index 0af7f27e12ba..4a14c251f768 100644 --- a/src/Shared/ServerInfrastructure/StringUtilities.cs +++ b/src/Shared/ServerInfrastructure/StringUtilities.cs @@ -411,152 +411,13 @@ out Unsafe.AsRef>(output), return isValid; } - [MethodImpl(MethodImplOptions.AggressiveOptimization)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool BytesOrdinalEqualsStringAndAscii(string previousValue, ReadOnlySpan newValue) { // previousValue is a previously materialized string which *must* have already passed validation. Debug.Assert(IsValidHeaderString(previousValue)); - // Ascii bytes => Utf-16 chars will be the same length. - // The caller should have already compared lengths before calling this method. - // However; let's double check, and early exit if they are not the same length. - if (previousValue.Length != newValue.Length) - { - // Lengths don't match, so there cannot be an exact ascii conversion between the two. - goto NotEqual; - } - - // Note: Pointer comparison is unsigned, so we use the compare pattern (offset + length <= count) - // rather than (offset <= count - length) which we'd do with signed comparison to avoid overflow. - // This isn't problematic as we know the maximum length is max string length (from test above) - // which is a signed value so half the size of the unsigned pointer value so we can safely add - // a Vector.Count to it without overflowing. - var count = (nint)newValue.Length; - var offset = (nint)0; - - // Get references to the first byte in the span, and the first char in the string. - ref var bytes = ref MemoryMarshal.GetReference(newValue); - ref var str = ref MemoryMarshal.GetReference(previousValue.AsSpan()); - - do - { - // If Vector not-accelerated or remaining less than vector size - if (!Vector.IsHardwareAccelerated || (offset + Vector.Count) > count) - { - if (IntPtr.Size == 8) // Use Intrinsic switch for branch elimination - { - // 64-bit: Loop longs by default - while ((offset + sizeof(long)) <= count) - { - if (!WidenFourAsciiBytesToUtf16AndCompareToChars( - ref Unsafe.Add(ref str, offset), - Unsafe.ReadUnaligned(ref Unsafe.Add(ref bytes, offset))) || - !WidenFourAsciiBytesToUtf16AndCompareToChars( - ref Unsafe.Add(ref str, offset + 4), - Unsafe.ReadUnaligned(ref Unsafe.Add(ref bytes, offset + 4)))) - { - goto NotEqual; - } - - offset += sizeof(long); - } - if ((offset + sizeof(int)) <= count) - { - if (!WidenFourAsciiBytesToUtf16AndCompareToChars( - ref Unsafe.Add(ref str, offset), - Unsafe.ReadUnaligned(ref Unsafe.Add(ref bytes, offset)))) - { - goto NotEqual; - } - - offset += sizeof(int); - } - } - else - { - // 32-bit: Loop ints by default - while ((offset + sizeof(int)) <= count) - { - if (!WidenFourAsciiBytesToUtf16AndCompareToChars( - ref Unsafe.Add(ref str, offset), - Unsafe.ReadUnaligned(ref Unsafe.Add(ref bytes, offset)))) - { - goto NotEqual; - } - - offset += sizeof(int); - } - } - if ((offset + sizeof(short)) <= count) - { - if (!WidenTwoAsciiBytesToUtf16AndCompareToChars( - ref Unsafe.Add(ref str, offset), - Unsafe.ReadUnaligned(ref Unsafe.Add(ref bytes, offset)))) - { - goto NotEqual; - } - - offset += sizeof(short); - } - if (offset < count) - { - var ch = (char)Unsafe.Add(ref bytes, offset); - if (((ch & 0x80) != 0) || Unsafe.Add(ref str, offset) != ch) - { - goto NotEqual; - } - } - - // End of input reached, there are no inequalities via widening; so the input bytes are both ascii - // and a match to the string if it was converted via Encoding.ASCII.GetString(...) - return true; - } - - // Create a comparision vector for all bits being equal - var AllTrue = new Vector(-1); - // do/while as entry condition already checked, remaining length must be Vector.Count or larger. - do - { - // Read a Vector length from the input as bytes - var vector = Unsafe.ReadUnaligned>(ref Unsafe.Add(ref bytes, offset)); - if (!CheckBytesInAsciiRange(vector)) - { - goto NotEqual; - } - // Widen the bytes directly to chars (ushort) as if they were ascii. - // As widening doubles the size we get two vectors back. - Vector.Widen(vector, out var vector0, out var vector1); - // Read two char vectors from the string to perform the match. - var compare0 = Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref str, offset))); - var compare1 = Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref str, offset + Vector.Count))); - - // If the string is not ascii, then the widened bytes cannot match - // as each widened byte element as chars will be in the range 0-255 - // so cannot match any higher unicode values. - - // Compare to our all bits true comparision vector - if (!AllTrue.Equals( - // BitwiseAnd the two equals together - Vector.BitwiseAnd( - // Check equality for the two widened vectors - Vector.Equals(compare0, vector0), - Vector.Equals(compare1, vector1)))) - { - goto NotEqual; - } - - offset += Vector.Count; - } while ((offset + Vector.Count) <= count); - - // Vector path done, loop back to do non-Vector - // If is a exact multiple of vector size, bail now - } while (offset < count); - - // If we get here (input is exactly a multiple of Vector length) then there are no inequalities via widening; - // so the input bytes are both ascii and a match to the string if it was converted via Encoding.ASCII.GetString(...) - return true; - NotEqual: - return false; + return Ascii.Equals(previousValue, newValue); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -578,98 +439,6 @@ private static unsafe void WidenFourAsciiBytesToUtf16AndWriteToBuffer(char* outp } } - /// - /// Given a DWORD which represents a buffer of 4 bytes, widens the buffer into 4 WORDs and - /// compares them to the WORD buffer with machine endianness. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] - private static bool WidenFourAsciiBytesToUtf16AndCompareToChars(ref char charStart, uint value) - { - if (!AllBytesInUInt32AreAscii(value)) - { - return false; - } - - // BMI2 could be used, but this variant is faster on both Intel and AMD. - if (Sse2.X64.IsSupported) - { - var vecNarrow = Sse2.ConvertScalarToVector128UInt32(value).AsByte(); - var vecWide = Sse2.UnpackLow(vecNarrow, Vector128.Zero).AsUInt64(); - return Unsafe.ReadUnaligned(ref Unsafe.As(ref charStart)) == - Sse2.X64.ConvertToUInt64(vecWide); - } - else - { - if (BitConverter.IsLittleEndian) - { - return charStart == (char)(byte)value && - Unsafe.Add(ref charStart, 1) == (char)(byte)(value >> 8) && - Unsafe.Add(ref charStart, 2) == (char)(byte)(value >> 16) && - Unsafe.Add(ref charStart, 3) == (char)(value >> 24); - } - else - { - return Unsafe.Add(ref charStart, 3) == (char)(byte)value && - Unsafe.Add(ref charStart, 2) == (char)(byte)(value >> 8) && - Unsafe.Add(ref charStart, 1) == (char)(byte)(value >> 16) && - charStart == (char)(value >> 24); - } - } - } - - /// - /// Given a WORD which represents a buffer of 2 bytes, widens the buffer into 2 WORDs and - /// compares them to the WORD buffer with machine endianness. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] - private static bool WidenTwoAsciiBytesToUtf16AndCompareToChars(ref char charStart, ushort value) - { - if (!AllBytesInUInt16AreAscii(value)) - { - return false; - } - - // BMI2 could be used, but this variant is faster on both Intel and AMD. - if (Sse2.IsSupported) - { - var vecNarrow = Sse2.ConvertScalarToVector128UInt32(value).AsByte(); - var vecWide = Sse2.UnpackLow(vecNarrow, Vector128.Zero).AsUInt32(); - return Unsafe.ReadUnaligned(ref Unsafe.As(ref charStart)) == - Sse2.ConvertToUInt32(vecWide); - } - else - { - if (BitConverter.IsLittleEndian) - { - return charStart == (char)(byte)value && - Unsafe.Add(ref charStart, 1) == (char)(byte)(value >> 8); - } - else - { - return Unsafe.Add(ref charStart, 1) == (char)(byte)value && - charStart == (char)(byte)(value >> 8); - } - } - } - - /// - /// Returns iff all bytes in are ASCII. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool AllBytesInUInt32AreAscii(uint value) - { - return ((value & 0x80808080u) == 0); - } - - /// - /// Returns iff all bytes in are ASCII. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool AllBytesInUInt16AreAscii(ushort value) - { - return ((value & 0x8080u) == 0); - } - private static bool IsValidHeaderString(string value) { // Method for Debug.Assert to ensure BytesOrdinalEqualsStringAndAscii