diff --git a/onnxruntime/core/mlas/lib/dwconv.cpp b/onnxruntime/core/mlas/lib/dwconv.cpp index 19951a629f847..87effeac5fd9f 100644 --- a/onnxruntime/core/mlas/lib/dwconv.cpp +++ b/onnxruntime/core/mlas/lib/dwconv.cpp @@ -14,7 +14,6 @@ Module Name: --*/ - #include "fp16_common.h" #ifdef MLAS_F16VEC_INTRINSICS_SUPPORTED @@ -30,7 +29,7 @@ MlasConvDepthwiseKernel( size_t OutputCount, size_t KernelSize, MLAS_HALF_GEMM_POSTPROCESSOR* PostProc - ) +) { while (OutputCount > 0) { size_t ChannelOffset = 0; @@ -87,8 +86,7 @@ MlasConvDepthwiseKernel( Output += c; } if (PostProc) { - PostProc->Process(reinterpret_cast(Output - Channels), 0, 0, 1, Channels, - Channels); + PostProc->Process(reinterpret_cast(Output - Channels), 0, 0, 1, Channels, Channels); } Input += KernelSize; OutputCount -= 1; @@ -108,7 +106,7 @@ MlasConvDepthwiseKernel( size_t OutputCount, size_t KernelSize, MLAS_HALF_GEMM_POSTPROCESSOR* PostProc - ) +) { while (OutputCount > 0) { for (size_t ChannelOffset = 0; ChannelOffset < Channels; ChannelOffset++) { @@ -122,16 +120,14 @@ MlasConvDepthwiseKernel( *Output++ = MLAS_Float2Half(Accumulator); } if (PostProc) { - PostProc->Process(reinterpret_cast(Output - Channels), 0, 0, 1, Channels, - Channels); + PostProc->Process(reinterpret_cast(Output - Channels), 0, 0, 1, Channels, Channels); } Input += KernelSize; OutputCount -= 1; } } -#endif // MLAS_F16VEC_INTRINSICS_SUPPORTED - +#endif // MLAS_F16VEC_INTRINSICS_SUPPORTED void MLASCALL @@ -144,7 +140,7 @@ MlasConvDepthwise( size_t OutputCount, size_t KernelSize, MLAS_HALF_GEMM_POSTPROCESSOR* PostProc - ) +) { MlasConvDepthwiseKernel( reinterpret_cast(Input), @@ -154,5 +150,6 @@ MlasConvDepthwise( Channels, OutputCount, KernelSize, - PostProc); + PostProc + ); } diff --git a/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc b/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc index 5a2660c05fe35..f723bd10ec581 100644 --- a/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc +++ b/onnxruntime/test/providers/cpu/nn/conv_fp16_test.cc @@ -732,8 +732,7 @@ TEST(ConvFp16Test, Depthwise2D_Bias) { MLFloat16(9.0f), MLFloat16(10.0f), MLFloat16(11.0f), MLFloat16(12.0f), MLFloat16(13.0f), MLFloat16(14.0f), - MLFloat16(15.0f), MLFloat16(16.0f), MLFloat16(17.0f) - }; + MLFloat16(15.0f), MLFloat16(16.0f), MLFloat16(17.0f)}; vector X_shape = {1, 2, 3, 3}; vector W = {MLFloat16(1.0f), MLFloat16(2.0f)}; vector W_shape = {2, 1, 1, 1}; @@ -747,8 +746,7 @@ TEST(ConvFp16Test, Depthwise2D_Bias) { MLFloat16(17.0f), MLFloat16(19.0f), MLFloat16(21.0f), MLFloat16(23.0f), MLFloat16(25.0f), MLFloat16(27.0f), - MLFloat16(29.0f), MLFloat16(31.0f), MLFloat16(33.0f) - }; + MLFloat16(29.0f), MLFloat16(31.0f), MLFloat16(33.0f)}; TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape, true); @@ -874,16 +872,36 @@ TEST(ConvFp16Test, Depthwise2D_Bias_Complex) { }; vector W_shape = {13, 1, 2, 2}; vector B = { - MLFloat16(1.0f), MLFloat16(2.0f), MLFloat16(3.0f), MLFloat16(4.0f), MLFloat16(5.0f), MLFloat16(6.0f), - MLFloat16(7.0f), MLFloat16(8.0f), MLFloat16(9.0f), MLFloat16(10.0f), MLFloat16(11.0f), MLFloat16(12.0f), + MLFloat16(1.0f), + MLFloat16(2.0f), + MLFloat16(3.0f), + MLFloat16(4.0f), + MLFloat16(5.0f), + MLFloat16(6.0f), + MLFloat16(7.0f), + MLFloat16(8.0f), + MLFloat16(9.0f), + MLFloat16(10.0f), + MLFloat16(11.0f), + MLFloat16(12.0f), MLFloat16(13.0f), }; vector B_shape = {13}; vector Y_shape = {1, 13, 1, 1}; auto expected_vals = { - MLFloat16(15.0f), MLFloat16(128.0f), MLFloat16(369.0f), MLFloat16(738.0f), MLFloat16(1235.0f), - MLFloat16(1860.0f), MLFloat16(2613.0f), MLFloat16(3494.0f), MLFloat16(4503.0f), MLFloat16(5640.0f), - MLFloat16(6905.0f), MLFloat16(8298.0f), MLFloat16(9819.0f), + MLFloat16(15.0f), // 0.0*0.0 + 1.0*1.0 + 2.0*2.0 + 3.0*3.0 + 1.0 + MLFloat16(128.0f), + MLFloat16(369.0f), + MLFloat16(738.0f), + MLFloat16(1235.0f), + MLFloat16(1860.0f), + MLFloat16(2613.0f), // 24.0*24.0 + 25.0*25.0 + 26.0*26.0 + 27.0*27.0 + 7.0 + MLFloat16(3494.0f), + MLFloat16(4503.0f), + MLFloat16(5640.0f), + MLFloat16(6905.0f), + MLFloat16(8298.0f), + MLFloat16(9819.0f), // 48.0*48.0 + 49.0*49.0 + 50.0*50.0 + 51.0*51.0 + 13.0 }; TestConvFp16Op(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape); @@ -1252,4 +1270,4 @@ TEST(ConvFp16Test, SharedPrepackedWeights) { } // namespace test } // namespace onnxruntime -#endif // MLAS_F16VEC_INTRINSICS_SUPPORTED \ No newline at end of file +#endif // MLAS_F16VEC_INTRINSICS_SUPPORTED diff --git a/onnxruntime/test/providers/cpu/nn/conv_op_test.cc b/onnxruntime/test/providers/cpu/nn/conv_op_test.cc index 3905019cf57d6..8cf8c027bdde2 100644 --- a/onnxruntime/test/providers/cpu/nn/conv_op_test.cc +++ b/onnxruntime/test/providers/cpu/nn/conv_op_test.cc @@ -796,7 +796,19 @@ TEST(ConvTest, Depthwise2D_Bias_Complex) { vector B_shape = {13}; vector Y_shape = {1, 13, 1, 1}; auto expected_vals = { - 15.0f, 128.0f, 369.0f, 738.0f, 1235.0f, 1860.0f, 2613.0f, 3494.0f, 4503.0f, 5640.0f, 6905.0f, 8298.0f, 9819.0f + 15.0f, // 0.0*0.0 + 1.0*1.0 + 2.0*2.0 + 3.0*3.0 + 1.0 + 128.0f, + 369.0f, + 738.0f, + 1235.0f, + 1860.0f, + 2613.0f, // 24.0*24.0 + 25.0*25.0 + 26.0*26.0 + 27.0*27.0 + 7.0 + 3494.0f, + 4503.0f, + 5640.0f, + 6905.0f, + 8298.0f, + 9819.0f, // 48.0*48.0 + 49.0*49.0 + 50.0*50.0 + 51.0*51.0 + 13.0 }; TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape);