Skip to content

Commit

Permalink
utf simd conversion test
Browse files Browse the repository at this point in the history
  • Loading branch information
CptMoore committed Jan 18, 2025
1 parent de80b4a commit bd3c405
Show file tree
Hide file tree
Showing 7 changed files with 62,637 additions and 2 deletions.
2 changes: 1 addition & 1 deletion ModTek.Preloader/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ case ${os_type} in
#Work around used as it is a bug that is patched out in newer versions of mono.
export TERM=xterm

export LD_PRELOAD="${BASEDIR}/libdoorstop.so:${LD_PRELOAD:-}"
export LD_PRELOAD="${BASEDIR}/libdoorstop.so:${BASEDIR}/libsimdutfexport.so:${LD_PRELOAD:-}"
LD_PRELOAD="${LD_PRELOAD%:}"
;;
Darwin*)
Expand Down
135 changes: 134 additions & 1 deletion ModTek/Features/Logging/FastBuffer.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Security;
using System.Text;
using System.Threading;
using ModTek.Util.Stopwatch;

namespace ModTek.Features.Logging;
Expand Down Expand Up @@ -141,7 +143,136 @@ internal void Append(int value)
FormattingHelpers.WriteDigits(position, (uint)value, digits);
}


internal static readonly MTStopwatch AppendNativeStopwatch = new();
internal static readonly MTStopwatch AppendManagedStopwatch = new();
internal static readonly MTStopwatch AppendGetBytesNoCountingStopwatch = new();
internal static readonly MTStopwatch AppendGetBytesStopwatch = new();
private static long counter;
internal void Append(string value)
{
if (value.Length is >= 8 and < 32)
// if (value.Length < 100)
{
AppendGetBytes(value);
return;
}
var ok = Interlocked.Increment(ref counter);
if (ok == 1_000)
{
AppendNativeStopwatch.Reset();
AppendManagedStopwatch.Reset();
AppendGetBytesNoCountingStopwatch.Reset();
AppendGetBytesStopwatch.Reset();
}
var l = _length;
{
var start = MTStopwatch.GetTimestamp();
//AppendNative(value);
AppendNativeStopwatch.EndMeasurement(start);
}
_length = l;
{
var start = MTStopwatch.GetTimestamp();
AppendManaged(value);
AppendManagedStopwatch.EndMeasurement(start);
}
_length = l;
{
var start = MTStopwatch.GetTimestamp();
AppendGetBytesNoCounting(value);
AppendGetBytesNoCountingStopwatch.EndMeasurement(start);
}
_length = l;
{
var start = MTStopwatch.GetTimestamp();
AppendGetBytes(value);
AppendGetBytesStopwatch.EndMeasurement(start);
}
}

[MethodImpl(MethodImplOptions.NoInlining)]
private void AppendGetBytesNoCounting(string value)
{
const int Utf8MaxBytesPerChar = 4;
EnsureCapacity(_length + value.Length * Utf8MaxBytesPerChar);
_length += Encoding.UTF8.GetBytes(value, 0, value.Length, _buffer, _length);
}

[MethodImpl(MethodImplOptions.NoInlining)]
private void AppendGetBytes(string value)
{
const int Utf8MaxBytesPerChar = 4;
EnsureCapacity(_length + value.Length + CountNonAscii(value) * Utf8MaxBytesPerChar);
_length += Encoding.UTF8.GetBytes(value, 0, value.Length, _buffer, _length);
}

private static int CountNonAscii(string value)
{
var processingCount = value.Length;
var nonAsciiCount = 0;
fixed (char* chars = value)
{
var ptr = (ulong*)chars;

{
const int IterSize = 8;
for (; processingCount >= IterSize; processingCount -= IterSize)
{
const ulong NonAsciiBitmask =
(1ul << (7 + 8 * 7)) +
(1ul << (7 + 8 * 5)) +
(1ul << (7 + 8 * 3)) +
(1ul << (7 + 8 * 1));
if ((*ptr & NonAsciiBitmask) != 0)
{
nonAsciiCount++;
}
ptr += IterSize;
}
}
if (processingCount > 0)
{
const byte NonAsciiBitmask = 1 << 7;
if ((*(byte*)ptr & NonAsciiBitmask) != 0)
{
nonAsciiCount++;
}
}
return nonAsciiCount;
}
}

[MethodImpl(MethodImplOptions.NoInlining)]
private void AppendNative(string value)
{
var valueLength = value.Length;
if (valueLength == 0)
{
return;
}

fixed (char* chars = value)
{
var dstPtr = (IntPtr)(_bufferPtr + _length);
var srcPtr = (IntPtr)chars;

var processed = (int)convert_utf16le_to_utf8(srcPtr, (ulong)valueLength, dstPtr, (ulong)CapacityLeft);
if (processed < 0)
{
EnsureCapacity(_length + valueLength - processed);
processed = (int)convert_utf16le_to_utf8(srcPtr, (ulong)valueLength, dstPtr, (ulong)CapacityLeft);
}
_length += processed;
}
}

[DllImport("libsimdutfexport", CallingConvention = CallingConvention.Cdecl)]
[SuppressUnmanagedCodeSecurity]
private static extern long convert_utf16le_to_utf8(IntPtr utf16, ulong utf16words, IntPtr utf8, ulong utf8space);

[MethodImpl(MethodImplOptions.NoInlining)]
private void AppendManaged(string value)
{
var processingCount = value.Length;
if (processingCount == 0)
Expand Down Expand Up @@ -186,7 +317,7 @@ private static int GetLowerBytePosition()
// batching also has an effect due to fewer ops overall
// 8 is a sweat spot for unrolling and the ulong bit mask check
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool FastConvert(byte* dstPtr, byte* srcPtr, ref int processingCount)
private static bool FastConvert(byte* dstPtr, byte* srcPtr, ref int processingCount)
{
{
const int IterSize = 8;
Expand Down Expand Up @@ -284,6 +415,8 @@ private void AppendTime(int hours, int minutes, int seconds, long ticks)
return _bufferPtr + length;
}

private int CapacityLeft => _buffer.Length - _length;

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void EnsureCapacity(int targetLength)
{
Expand Down
4 changes: 4 additions & 0 deletions ModTek/Features/Logging/MTLoggerAsyncQueue.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ Async internal processing had an average latency of {latencyStats.AverageNanosec
Flushing (to disk) {AppenderFile.FlushStopWatch.GetStats()}.
Filters {AppenderFile.FiltersStopWatch.GetStats()}.
Formatter {AppenderFile.FormatterStopWatch.GetStats()}.
AppendNativeStopwatch {FastBuffer.AppendNativeStopwatch.GetStats()}.
AppendManagedStopwatch {FastBuffer.AppendManagedStopwatch.GetStats()}.
AppendGetBytesNoCountingStopwatch {FastBuffer.AppendGetBytesNoCountingStopwatch.GetStats()}.
AppendGetBytesStopwatch {FastBuffer.AppendGetBytesStopwatch.GetStats()}.
UTF8-Fallback {FastBuffer.UTF8FallbackStopwatch.GetStats()}.
Write (to OS buffers) {AppenderFile.WriteStopwatch.GetStats()}.
"""
Expand Down
15 changes: 15 additions & 0 deletions ModTek/ModTek.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,19 @@
<ItemGroup>
<ProjectReference Include="..\ModTek.Common\ModTek.Common.csproj" />
</ItemGroup>

<ItemGroup>
<ClCompile Include="..\simdutf\simdutf.cpp">
<Link>ModTek\simdutf\simdutf.cpp</Link>
</ClCompile>
<ClCompile Include="..\simdutf\simdutfexport.cpp">
<Link>ModTek\simdutf\simdutfexport.cpp</Link>
</ClCompile>
</ItemGroup>

<ItemGroup>
<ClInclude Include="..\simdutf\simdutf.h">
<Link>ModTek\simdutf\simdutf.h</Link>
</ClInclude>
</ItemGroup>
</Project>
Loading

0 comments on commit bd3c405

Please sign in to comment.