diff --git a/src/libraries/System.Numerics.Tensors/src/Resources/Strings.resx b/src/libraries/System.Numerics.Tensors/src/Resources/Strings.resx
index f9f7f45078a04..d70054a241126 100644
--- a/src/libraries/System.Numerics.Tensors/src/Resources/Strings.resx
+++ b/src/libraries/System.Numerics.Tensors/src/Resources/Strings.resx
@@ -121,6 +121,6 @@
Destination is too short.
- Length of '{0}' must be same as length of '{1}'.
+ Input span arguments must all have the same length.
-
\ No newline at end of file
+
diff --git a/src/libraries/System.Numerics.Tensors/src/System.Numerics.Tensors.csproj b/src/libraries/System.Numerics.Tensors/src/System.Numerics.Tensors.csproj
index 5c188c2e0b841..097fa244ad491 100644
--- a/src/libraries/System.Numerics.Tensors/src/System.Numerics.Tensors.csproj
+++ b/src/libraries/System.Numerics.Tensors/src/System.Numerics.Tensors.csproj
@@ -16,6 +16,14 @@
+
+
+
+
+
+
+
+
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs
index 10f29183ea286..08bd9d362217e 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs
@@ -4,7 +4,7 @@
namespace System.Numerics.Tensors
{
/// Performs primitive tensor operations over spans of memory.
- public static class TensorPrimitives
+ public static partial class TensorPrimitives
{
/// Computes the element-wise result of: + .
/// The first tensor, represented as a span.
@@ -13,23 +13,8 @@ public static class TensorPrimitives
/// Length of '' must be same as length of ''.
/// Destination is too short.
/// This method effectively does [i] = [i] + [i].
- public static void Add(ReadOnlySpan x, ReadOnlySpan y, Span destination)
- {
- if (x.Length != y.Length)
- {
- ThrowHelper.ThrowArgument_SpansMustHaveSameLength(nameof(x), nameof(y));
- }
-
- if (x.Length > destination.Length)
- {
- ThrowHelper.ThrowArgument_DestinationTooShort();
- }
-
- for (int i = 0; i < x.Length; i++)
- {
- destination[i] = x[i] + y[i];
- }
- }
+ public static unsafe void Add(ReadOnlySpan x, ReadOnlySpan y, Span destination) =>
+ InvokeSpanSpanIntoSpan(x, y, destination);
/// Computes the element-wise result of: + .
/// The first tensor, represented as a span.
@@ -37,18 +22,8 @@ public static void Add(ReadOnlySpan x, ReadOnlySpan y, Span
/// The destination tensor, represented as a span.
/// Destination is too short.
/// This method effectively does [i] = [i] + .
- public static void Add(ReadOnlySpan x, float y, Span destination)
- {
- if (x.Length > destination.Length)
- {
- ThrowHelper.ThrowArgument_DestinationTooShort();
- }
-
- for (int i = 0; i < x.Length; i++)
- {
- destination[i] = x[i] + y;
- }
- }
+ public static void Add(ReadOnlySpan x, float y, Span destination) =>
+ InvokeSpanScalarIntoSpan(x, y, destination);
/// Computes the element-wise result of: - .
/// The first tensor, represented as a span.
@@ -57,23 +32,8 @@ public static void Add(ReadOnlySpan x, float y, Span destination)
/// Length of '' must be same as length of ''.
/// Destination is too short.
/// This method effectively does [i] = [i] - [i].
- public static void Subtract(ReadOnlySpan x, ReadOnlySpan y, Span destination)
- {
- if (x.Length != y.Length)
- {
- ThrowHelper.ThrowArgument_SpansMustHaveSameLength(nameof(x), nameof(y));
- }
-
- if (x.Length > destination.Length)
- {
- ThrowHelper.ThrowArgument_DestinationTooShort();
- }
-
- for (int i = 0; i < x.Length; i++)
- {
- destination[i] = x[i] - y[i];
- }
- }
+ public static void Subtract(ReadOnlySpan x, ReadOnlySpan y, Span destination) =>
+ InvokeSpanSpanIntoSpan(x, y, destination);
/// Computes the element-wise result of: - .
/// The first tensor, represented as a span.
@@ -81,18 +41,8 @@ public static void Subtract(ReadOnlySpan x, ReadOnlySpan y, SpanThe destination tensor, represented as a span.
/// Destination is too short.
/// This method effectively does [i] = [i] - .
- public static void Subtract(ReadOnlySpan x, float y, Span destination)
- {
- if (x.Length > destination.Length)
- {
- ThrowHelper.ThrowArgument_DestinationTooShort();
- }
-
- for (int i = 0; i < x.Length; i++)
- {
- destination[i] = x[i] - y;
- }
- }
+ public static void Subtract(ReadOnlySpan x, float y, Span destination) =>
+ InvokeSpanScalarIntoSpan(x, y, destination);
/// Computes the element-wise result of: * .
/// The first tensor, represented as a span.
@@ -101,23 +51,8 @@ public static void Subtract(ReadOnlySpan x, float y, Span destinat
/// Length of '' must be same as length of ''.
/// Destination is too short.
/// This method effectively does [i] = [i] * .
- public static void Multiply(ReadOnlySpan x, ReadOnlySpan y, Span destination)
- {
- if (x.Length != y.Length)
- {
- ThrowHelper.ThrowArgument_SpansMustHaveSameLength(nameof(x), nameof(y));
- }
-
- if (x.Length > destination.Length)
- {
- ThrowHelper.ThrowArgument_DestinationTooShort();
- }
-
- for (int i = 0; i < x.Length; i++)
- {
- destination[i] = x[i] * y[i];
- }
- }
+ public static void Multiply(ReadOnlySpan x, ReadOnlySpan y, Span destination) =>
+ InvokeSpanSpanIntoSpan(x, y, destination);
/// Computes the element-wise result of: * .
/// The first tensor, represented as a span.
@@ -128,18 +63,8 @@ public static void Multiply(ReadOnlySpan x, ReadOnlySpan y, SpanThis method effectively does [i] = [i] * .
/// This method corresponds to the scal method defined by BLAS1.
///
- public static void Multiply(ReadOnlySpan x, float y, Span destination)
- {
- if (x.Length > destination.Length)
- {
- ThrowHelper.ThrowArgument_DestinationTooShort();
- }
-
- for (int i = 0; i < x.Length; i++)
- {
- destination[i] = x[i] * y;
- }
- }
+ public static void Multiply(ReadOnlySpan x, float y, Span destination) =>
+ InvokeSpanScalarIntoSpan(x, y, destination);
/// Computes the element-wise result of: / .
/// The first tensor, represented as a span.
@@ -148,23 +73,8 @@ public static void Multiply(ReadOnlySpan x, float y, Span destinat
/// Length of '' must be same as length of ''.
/// Destination is too short.
/// This method effectively does [i] = [i] / .
- public static void Divide(ReadOnlySpan x, ReadOnlySpan y, Span destination)
- {
- if (x.Length != y.Length)
- {
- ThrowHelper.ThrowArgument_SpansMustHaveSameLength(nameof(x), nameof(y));
- }
-
- if (x.Length > destination.Length)
- {
- ThrowHelper.ThrowArgument_DestinationTooShort();
- }
-
- for (int i = 0; i < x.Length; i++)
- {
- destination[i] = x[i] / y[i];
- }
- }
+ public static void Divide(ReadOnlySpan x, ReadOnlySpan y, Span destination) =>
+ InvokeSpanSpanIntoSpan(x, y, destination);
/// Computes the element-wise result of: / .
/// The first tensor, represented as a span.
@@ -172,36 +82,16 @@ public static void Divide(ReadOnlySpan x, ReadOnlySpan y, SpanThe destination tensor, represented as a span.
/// Destination is too short.
/// This method effectively does [i] = [i] / .
- public static void Divide(ReadOnlySpan x, float y, Span destination)
- {
- if (x.Length > destination.Length)
- {
- ThrowHelper.ThrowArgument_DestinationTooShort();
- }
-
- for (int i = 0; i < x.Length; i++)
- {
- destination[i] = x[i] / y;
- }
- }
+ public static void Divide(ReadOnlySpan x, float y, Span destination) =>
+ InvokeSpanScalarIntoSpan(x, y, destination);
/// Computes the element-wise result of: -.
/// The tensor, represented as a span.
/// The destination tensor, represented as a span.
/// Destination is too short.
/// This method effectively does [i] = -[i].
- public static void Negate(ReadOnlySpan x, Span destination)
- {
- if (x.Length > destination.Length)
- {
- ThrowHelper.ThrowArgument_DestinationTooShort();
- }
-
- for (int i = 0; i < x.Length; i++)
- {
- destination[i] = -x[i];
- }
- }
+ public static void Negate(ReadOnlySpan x, Span destination) =>
+ InvokeSpanIntoSpan(x, destination);
/// Computes the element-wise result of: ( + ) * .
/// The first tensor, represented as a span.
@@ -212,28 +102,8 @@ public static void Negate(ReadOnlySpan x, Span destination)
/// Length of '' must be same as length of ''.
/// Destination is too short.
/// This method effectively does [i] = ([i] + [i]) * [i].
- public static void AddMultiply(ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan multiplier, Span destination)
- {
- if (x.Length != y.Length)
- {
- ThrowHelper.ThrowArgument_SpansMustHaveSameLength(nameof(x), nameof(y));
- }
-
- if (x.Length != multiplier.Length)
- {
- ThrowHelper.ThrowArgument_SpansMustHaveSameLength(nameof(x), nameof(multiplier));
- }
-
- if (x.Length > destination.Length)
- {
- ThrowHelper.ThrowArgument_DestinationTooShort();
- }
-
- for (int i = 0; i < x.Length; i++)
- {
- destination[i] = (x[i] + y[i]) * multiplier[i];
- }
- }
+ public static void AddMultiply(ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan multiplier, Span destination) =>
+ InvokeSpanSpanSpanIntoSpan(x, y, multiplier, destination);
/// Computes the element-wise result of: ( + ) * .
/// The first tensor, represented as a span.
@@ -243,23 +113,8 @@ public static void AddMultiply(ReadOnlySpan x, ReadOnlySpan y, Rea
/// Length of '' must be same as length of ''.
/// Destination is too short.
/// This method effectively does [i] = ([i] + [i]) * .
- public static void AddMultiply(ReadOnlySpan x, ReadOnlySpan y, float multiplier, Span destination)
- {
- if (x.Length != y.Length)
- {
- ThrowHelper.ThrowArgument_SpansMustHaveSameLength(nameof(x), nameof(y));
- }
-
- if (x.Length > destination.Length)
- {
- ThrowHelper.ThrowArgument_DestinationTooShort();
- }
-
- for (int i = 0; i < x.Length; i++)
- {
- destination[i] = (x[i] + y[i]) * multiplier;
- }
- }
+ public static void AddMultiply(ReadOnlySpan x, ReadOnlySpan y, float multiplier, Span destination) =>
+ InvokeSpanSpanScalarIntoSpan(x, y, multiplier, destination);
/// Computes the element-wise result of: ( + ) * .
/// The first tensor, represented as a span.
@@ -269,23 +124,8 @@ public static void AddMultiply(ReadOnlySpan x, ReadOnlySpan y, flo
/// Length of '' must be same as length of ''.
/// Destination is too short.
/// This method effectively does [i] = ([i] + ) * [i].
- public static void AddMultiply(ReadOnlySpan x, float y, ReadOnlySpan multiplier, Span destination)
- {
- if (x.Length != multiplier.Length)
- {
- ThrowHelper.ThrowArgument_SpansMustHaveSameLength(nameof(x), nameof(multiplier));
- }
-
- if (x.Length > destination.Length)
- {
- ThrowHelper.ThrowArgument_DestinationTooShort();
- }
-
- for (int i = 0; i < x.Length; i++)
- {
- destination[i] = (x[i] + y) * multiplier[i];
- }
- }
+ public static void AddMultiply(ReadOnlySpan x, float y, ReadOnlySpan multiplier, Span destination) =>
+ InvokeSpanScalarSpanIntoSpan(x, y, multiplier, destination);
/// Computes the element-wise result of: ( * ) + .
/// The first tensor, represented as a span.
@@ -296,28 +136,8 @@ public static void AddMultiply(ReadOnlySpan x, float y, ReadOnlySpanLength of '' must be same as length of ''.
/// Destination is too short.
/// This method effectively does [i] = ([i] * [i]) + [i].
- public static void MultiplyAdd(ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan addend, Span destination)
- {
- if (x.Length != y.Length)
- {
- ThrowHelper.ThrowArgument_SpansMustHaveSameLength(nameof(x), nameof(y));
- }
-
- if (x.Length != addend.Length)
- {
- ThrowHelper.ThrowArgument_SpansMustHaveSameLength(nameof(x), nameof(addend));
- }
-
- if (x.Length > destination.Length)
- {
- ThrowHelper.ThrowArgument_DestinationTooShort();
- }
-
- for (int i = 0; i < x.Length; i++)
- {
- destination[i] = (x[i] * y[i]) + addend[i];
- }
- }
+ public static void MultiplyAdd(ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan addend, Span destination) =>
+ InvokeSpanSpanSpanIntoSpan(x, y, addend, destination);
/// Computes the element-wise result of: ( * ) + .
/// The first tensor, represented as a span.
@@ -330,23 +150,8 @@ public static void MultiplyAdd(ReadOnlySpan x, ReadOnlySpan y, Rea
/// This method effectively does [i] = ([i] * [i]) + .
/// This method corresponds to the axpy method defined by BLAS1.
///
- public static void MultiplyAdd(ReadOnlySpan x, ReadOnlySpan y, float addend, Span destination)
- {
- if (x.Length != y.Length)
- {
- ThrowHelper.ThrowArgument_SpansMustHaveSameLength(nameof(x), nameof(y));
- }
-
- if (x.Length > destination.Length)
- {
- ThrowHelper.ThrowArgument_DestinationTooShort();
- }
-
- for (int i = 0; i < x.Length; i++)
- {
- destination[i] = (x[i] * y[i]) + addend;
- }
- }
+ public static void MultiplyAdd(ReadOnlySpan x, ReadOnlySpan y, float addend, Span destination) =>
+ InvokeSpanSpanScalarIntoSpan(x, y, addend, destination);
/// Computes the element-wise result of: ( * ) + .
/// The first tensor, represented as a span.
@@ -356,23 +161,8 @@ public static void MultiplyAdd(ReadOnlySpan x, ReadOnlySpan y, flo
/// Length of '' must be same as length of ''.
/// Destination is too short.
/// This method effectively does [i] = ([i] * ) + [i].
- public static void MultiplyAdd(ReadOnlySpan x, float y, ReadOnlySpan addend, Span destination)
- {
- if (x.Length != addend.Length)
- {
- ThrowHelper.ThrowArgument_SpansMustHaveSameLength(nameof(x), nameof(addend));
- }
-
- if (x.Length > destination.Length)
- {
- ThrowHelper.ThrowArgument_DestinationTooShort();
- }
-
- for (int i = 0; i < x.Length; i++)
- {
- destination[i] = (x[i] * y) + addend[i];
- }
- }
+ public static void MultiplyAdd(ReadOnlySpan x, float y, ReadOnlySpan addend, Span destination) =>
+ InvokeSpanScalarSpanIntoSpan(x, y, addend, destination);
/// Computes the element-wise result of: pow(e, ).
/// The tensor, represented as a span.
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs
new file mode 100644
index 0000000000000..1233f54901c80
--- /dev/null
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs
@@ -0,0 +1,793 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+namespace System.Numerics.Tensors
+{
+ public static partial class TensorPrimitives
+ {
+ private static unsafe void InvokeSpanIntoSpan(
+ ReadOnlySpan x, Span destination)
+ where TUnaryOperator : IUnaryOperator
+ {
+ if (x.Length > destination.Length)
+ {
+ ThrowHelper.ThrowArgument_DestinationTooShort();
+ }
+
+ ref float xRef = ref MemoryMarshal.GetReference(x);
+ ref float dRef = ref MemoryMarshal.GetReference(destination);
+ int i = 0, oneVectorFromEnd;
+
+#if NET8_0_OR_GREATER
+ if (Vector512.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector512.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ // Loop handling one vector at a time.
+ do
+ {
+ TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i)).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector512.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector512.Count);
+ TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+#endif
+
+ if (Vector256.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector256.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ // Loop handling one vector at a time.
+ do
+ {
+ TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, (uint)i)).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector256.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector256.Count);
+ TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+
+ if (Vector128.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector128.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ // Loop handling one vector at a time.
+ do
+ {
+ TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)i)).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector128.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector128.Count);
+ TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+
+ while (i < x.Length)
+ {
+ Unsafe.Add(ref dRef, i) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, i));
+
+ i++;
+ }
+ }
+
+ private static unsafe void InvokeSpanSpanIntoSpan(
+ ReadOnlySpan x, ReadOnlySpan y, Span destination)
+ where TBinaryOperator : IBinaryOperator
+ {
+ if (x.Length != y.Length)
+ {
+ ThrowHelper.ThrowArgument_SpansMustHaveSameLength();
+ }
+
+ if (x.Length > destination.Length)
+ {
+ ThrowHelper.ThrowArgument_DestinationTooShort();
+ }
+
+ ref float xRef = ref MemoryMarshal.GetReference(x);
+ ref float yRef = ref MemoryMarshal.GetReference(y);
+ ref float dRef = ref MemoryMarshal.GetReference(destination);
+ int i = 0, oneVectorFromEnd;
+
+#if NET8_0_OR_GREATER
+ if (Vector512.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector512.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ // Loop handling one vector at a time.
+ do
+ {
+ TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i),
+ Vector512.LoadUnsafe(ref yRef, (uint)i)).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector512.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector512.Count);
+ TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex),
+ Vector512.LoadUnsafe(ref yRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+#endif
+
+ if (Vector256.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector256.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ // Loop handling one vector at a time.
+ do
+ {
+ TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, (uint)i),
+ Vector256.LoadUnsafe(ref yRef, (uint)i)).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector256.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector256.Count);
+ TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex),
+ Vector256.LoadUnsafe(ref yRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+
+ if (Vector128.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector128.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ // Loop handling one vector at a time.
+ do
+ {
+ TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)i),
+ Vector128.LoadUnsafe(ref yRef, (uint)i)).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector128.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector128.Count);
+ TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex),
+ Vector128.LoadUnsafe(ref yRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+
+ while (i < x.Length)
+ {
+ Unsafe.Add(ref dRef, i) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, i),
+ Unsafe.Add(ref yRef, i));
+
+ i++;
+ }
+ }
+
+ private static unsafe void InvokeSpanScalarIntoSpan(
+ ReadOnlySpan x, float y, Span destination)
+ where TBinaryOperator : IBinaryOperator
+ {
+ if (x.Length > destination.Length)
+ {
+ ThrowHelper.ThrowArgument_DestinationTooShort();
+ }
+
+ ref float xRef = ref MemoryMarshal.GetReference(x);
+ ref float dRef = ref MemoryMarshal.GetReference(destination);
+ int i = 0, oneVectorFromEnd;
+
+#if NET8_0_OR_GREATER
+ if (Vector512.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector512.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ Vector512 yVec = Vector512.Create(y);
+
+ // Loop handling one vector at a time.
+ do
+ {
+ TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i),
+ yVec).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector512.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector512.Count);
+ TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex),
+ yVec).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+#endif
+
+ if (Vector256.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector256.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ Vector256 yVec = Vector256.Create(y);
+
+ // Loop handling one vector at a time.
+ do
+ {
+ TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, (uint)i),
+ yVec).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector256.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector256.Count);
+ TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex),
+ yVec).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+
+ if (Vector128.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector128.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ Vector128 yVec = Vector128.Create(y);
+
+ // Loop handling one vector at a time.
+ do
+ {
+ TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)i),
+ yVec).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector128.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector128.Count);
+ TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex),
+ yVec).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+
+ while (i < x.Length)
+ {
+ Unsafe.Add(ref dRef, i) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, i),
+ y);
+
+ i++;
+ }
+ }
+
+ private static unsafe void InvokeSpanSpanSpanIntoSpan(
+ ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan z, Span destination)
+ where TTernaryOperator : ITernaryOperator
+ {
+ if (x.Length != y.Length || x.Length != z.Length)
+ {
+ ThrowHelper.ThrowArgument_SpansMustHaveSameLength();
+ }
+
+ if (x.Length > destination.Length)
+ {
+ ThrowHelper.ThrowArgument_DestinationTooShort();
+ }
+
+ ref float xRef = ref MemoryMarshal.GetReference(x);
+ ref float yRef = ref MemoryMarshal.GetReference(y);
+ ref float zRef = ref MemoryMarshal.GetReference(z);
+ ref float dRef = ref MemoryMarshal.GetReference(destination);
+ int i = 0, oneVectorFromEnd;
+
+#if NET8_0_OR_GREATER
+ if (Vector512.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector512.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ // Loop handling one vector at a time.
+ do
+ {
+ TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i),
+ Vector512.LoadUnsafe(ref yRef, (uint)i),
+ Vector512.LoadUnsafe(ref zRef, (uint)i)).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector512.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector512.Count);
+ TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex),
+ Vector512.LoadUnsafe(ref yRef, lastVectorIndex),
+ Vector512.LoadUnsafe(ref zRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+#endif
+
+ if (Vector256.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector256.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ // Loop handling one vector at a time.
+ do
+ {
+ TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, (uint)i),
+ Vector256.LoadUnsafe(ref yRef, (uint)i),
+ Vector256.LoadUnsafe(ref zRef, (uint)i)).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector256.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector256.Count);
+ TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex),
+ Vector256.LoadUnsafe(ref yRef, lastVectorIndex),
+ Vector256.LoadUnsafe(ref zRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+
+ if (Vector128.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector128.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ // Loop handling one vector at a time.
+ do
+ {
+ TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)i),
+ Vector128.LoadUnsafe(ref yRef, (uint)i),
+ Vector128.LoadUnsafe(ref zRef, (uint)i)).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector128.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector128.Count);
+ TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex),
+ Vector128.LoadUnsafe(ref yRef, lastVectorIndex),
+ Vector128.LoadUnsafe(ref zRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+
+ while (i < x.Length)
+ {
+ Unsafe.Add(ref dRef, i) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, i),
+ Unsafe.Add(ref yRef, i),
+ Unsafe.Add(ref zRef, i));
+
+ i++;
+ }
+ }
+
+ private static unsafe void InvokeSpanSpanScalarIntoSpan(
+ ReadOnlySpan x, ReadOnlySpan y, float z, Span destination)
+ where TTernaryOperator : ITernaryOperator
+ {
+ if (x.Length != y.Length)
+ {
+ ThrowHelper.ThrowArgument_SpansMustHaveSameLength();
+ }
+
+ if (x.Length > destination.Length)
+ {
+ ThrowHelper.ThrowArgument_DestinationTooShort();
+ }
+
+ ref float xRef = ref MemoryMarshal.GetReference(x);
+ ref float yRef = ref MemoryMarshal.GetReference(y);
+ ref float dRef = ref MemoryMarshal.GetReference(destination);
+ int i = 0, oneVectorFromEnd;
+
+#if NET8_0_OR_GREATER
+ if (Vector512.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector512.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ Vector512 zVec = Vector512.Create(z);
+
+ // Loop handling one vector at a time.
+ do
+ {
+ TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i),
+ Vector512.LoadUnsafe(ref yRef, (uint)i),
+ zVec).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector512.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector512.Count);
+ TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex),
+ Vector512.LoadUnsafe(ref yRef, lastVectorIndex),
+ zVec).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+#endif
+
+ if (Vector256.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector256.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ Vector256 zVec = Vector256.Create(z);
+
+ // Loop handling one vector at a time.
+ do
+ {
+ TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, (uint)i),
+ Vector256.LoadUnsafe(ref yRef, (uint)i),
+ zVec).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector256.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector256.Count);
+ TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex),
+ Vector256.LoadUnsafe(ref yRef, lastVectorIndex),
+ zVec).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+
+ if (Vector128.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector128.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ Vector128 zVec = Vector128.Create(z);
+
+ // Loop handling one vector at a time.
+ do
+ {
+ TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)i),
+ Vector128.LoadUnsafe(ref yRef, (uint)i),
+ zVec).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector128.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector128.Count);
+ TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex),
+ Vector128.LoadUnsafe(ref yRef, lastVectorIndex),
+ zVec).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+
+ while (i < x.Length)
+ {
+ Unsafe.Add(ref dRef, i) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, i),
+ Unsafe.Add(ref yRef, i),
+ z);
+
+ i++;
+ }
+ }
+
+ private static unsafe void InvokeSpanScalarSpanIntoSpan(
+ ReadOnlySpan x, float y, ReadOnlySpan z, Span destination)
+ where TTernaryOperator : ITernaryOperator
+ {
+ if (x.Length != z.Length)
+ {
+ ThrowHelper.ThrowArgument_SpansMustHaveSameLength();
+ }
+
+ if (x.Length > destination.Length)
+ {
+ ThrowHelper.ThrowArgument_DestinationTooShort();
+ }
+
+ ref float xRef = ref MemoryMarshal.GetReference(x);
+ ref float zRef = ref MemoryMarshal.GetReference(z);
+ ref float dRef = ref MemoryMarshal.GetReference(destination);
+ int i = 0, oneVectorFromEnd;
+
+#if NET8_0_OR_GREATER
+ if (Vector512.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector512.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ Vector512 yVec = Vector512.Create(y);
+
+ // Loop handling one vector at a time.
+ do
+ {
+ TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i),
+ yVec,
+ Vector512.LoadUnsafe(ref zRef, (uint)i)).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector512.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector512.Count);
+ TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex),
+ yVec,
+ Vector512.LoadUnsafe(ref zRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+#endif
+
+ if (Vector256.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector256.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ Vector256 yVec = Vector256.Create(y);
+
+ // Loop handling one vector at a time.
+ do
+ {
+ TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, (uint)i),
+ yVec,
+ Vector256.LoadUnsafe(ref zRef, (uint)i)).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector256.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector256.Count);
+ TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex),
+ yVec,
+ Vector256.LoadUnsafe(ref zRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+
+ if (Vector128.IsHardwareAccelerated)
+ {
+ oneVectorFromEnd = x.Length - Vector128.Count;
+ if (i <= oneVectorFromEnd)
+ {
+ Vector128 yVec = Vector128.Create(y);
+
+ // Loop handling one vector at a time.
+ do
+ {
+ TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)i),
+ yVec,
+ Vector128.LoadUnsafe(ref zRef, (uint)i)).StoreUnsafe(ref dRef, (uint)i);
+
+ i += Vector128.Count;
+ }
+ while (i <= oneVectorFromEnd);
+
+ // Handle any remaining elements with a final vector.
+ if (i != x.Length)
+ {
+ uint lastVectorIndex = (uint)(x.Length - Vector128.Count);
+ TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex),
+ yVec,
+ Vector128.LoadUnsafe(ref zRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+ }
+
+ return;
+ }
+ }
+
+ while (i < x.Length)
+ {
+ Unsafe.Add(ref dRef, i) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, i),
+ y,
+ Unsafe.Add(ref zRef, i));
+
+ i++;
+ }
+ }
+
+ private readonly struct AddOperator : IBinaryOperator
+ {
+ public static float Invoke(float x, float y) => x + y;
+ public static Vector128 Invoke(Vector128 x, Vector128 y) => x + y;
+ public static Vector256 Invoke(Vector256 x, Vector256 y) => x + y;
+#if NET8_0_OR_GREATER
+ public static Vector512 Invoke(Vector512 x, Vector512 y) => x + y;
+#endif
+ }
+
+ private readonly struct SubtractOperator : IBinaryOperator
+ {
+ public static float Invoke(float x, float y) => x - y;
+ public static Vector128 Invoke(Vector128 x, Vector128 y) => x - y;
+ public static Vector256 Invoke(Vector256 x, Vector256 y) => x - y;
+#if NET8_0_OR_GREATER
+ public static Vector512 Invoke(Vector512 x, Vector512 y) => x - y;
+#endif
+ }
+
+ private readonly struct MultiplyOperator : IBinaryOperator
+ {
+ public static float Invoke(float x, float y) => x * y;
+ public static Vector128 Invoke(Vector128 x, Vector128 y) => x * y;
+ public static Vector256 Invoke(Vector256 x, Vector256 y) => x * y;
+#if NET8_0_OR_GREATER
+ public static Vector512 Invoke(Vector512 x, Vector512 y) => x * y;
+#endif
+ }
+
+ private readonly struct DivideOperator : IBinaryOperator
+ {
+ public static float Invoke(float x, float y) => x / y;
+ public static Vector128 Invoke(Vector128 x, Vector128 y) => x / y;
+ public static Vector256 Invoke(Vector256 x, Vector256 y) => x / y;
+#if NET8_0_OR_GREATER
+ public static Vector512 Invoke(Vector512 x, Vector512 y) => x / y;
+#endif
+ }
+
+ private readonly struct NegateOperator : IUnaryOperator
+ {
+ public static float Invoke(float x) => -x;
+ public static Vector128 Invoke(Vector128 x) => -x;
+ public static Vector256 Invoke(Vector256