Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

windows clang ci #5469

Merged
merged 16 commits into from
May 23, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix...
  • Loading branch information
nihui committed May 23, 2024
commit 7b5c14cd1be8685cad2df1298724e9704ad326ce
32 changes: 31 additions & 1 deletion cmake/ncnn_add_layer.cmake
Original file line number Diff line number Diff line change
@@ -247,7 +247,7 @@ macro(ncnn_add_layer class)
endif()

if(NCNN_TARGET_ARCH STREQUAL "arm" AND (CMAKE_SIZEOF_VOID_P EQUAL 8 OR NCNN_TARGET_ILP32))
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
if(NCNN_VFPV4)
ncnn_add_arch_opt_source(${class} vfpv4 " ")
endif()
@@ -277,6 +277,36 @@ macro(ncnn_add_layer class)
endif()
if(NCNN_ARM86SVEF32MM)
endif()
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")
if(NCNN_VFPV4)
ncnn_add_arch_opt_source(${class} vfpv4 " ")
endif()
if(NCNN_ARM82)
ncnn_add_arch_opt_source(${class} asimdhp "/arch:armv8.2 -march=armv8.2-a+fp16 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC")
endif()
if(NCNN_ARM82DOT)
ncnn_add_arch_opt_source(${class} asimddp "/arch:armv8.2 -march=armv8.2-a+fp16+dotprod /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD")
endif()
if(NCNN_ARM82FP16FML)
ncnn_add_arch_opt_source(${class} asimdfhm "/arch:armv8.2 -march=armv8.2-a+fp16+fp16fml /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_FP16_FML")
endif()
if(NCNN_ARM84BF16)
ncnn_add_arch_opt_source(${class} bf16 "/arch:armv8.4 -march=armv8.4-a+fp16+dotprod+bf16 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_BF16_VECTOR_ARITHMETIC")
endif()
if(NCNN_ARM84I8MM)
ncnn_add_arch_opt_source(${class} i8mm "/arch:armv8.4 -march=armv8.4-a+fp16+dotprod+i8mm /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_MATMUL_INT8")
endif()
# TODO add support for sve family
if(NCNN_ARM86SVE)
endif()
if(NCNN_ARM86SVE2)
endif()
if(NCNN_ARM86SVEBF16)
endif()
if(NCNN_ARM86SVEI8MM)
endif()
if(NCNN_ARM86SVEF32MM)
endif()
else()
if(NCNN_VFPV4)
ncnn_add_arch_opt_source(${class} vfpv4 " ")
9 changes: 6 additions & 3 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -545,7 +545,8 @@ if(NCNN_TARGET_ARCH STREQUAL "arm" AND (CMAKE_SIZEOF_VOID_P EQUAL 8 OR NCNN_TARG
endif()
if(NCNN_ARM86SVEF32MM)
endif()
else()
endif()
if(NOT CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
set(ARM_MARCH_FLAG "-march=armv8.6-a+fp16+dotprod+sve")
if(NCNN_ARM86SVE2)
set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+sve2")
@@ -569,7 +570,8 @@ if(NCNN_TARGET_ARCH STREQUAL "arm" AND (CMAKE_SIZEOF_VOID_P EQUAL 8 OR NCNN_TARG
if(NCNN_ARM84I8MM)
target_compile_options(ncnn PRIVATE /D__ARM_FEATURE_MATMUL_INT8)
endif()
else()
endif()
if(NOT CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
set(ARM_MARCH_FLAG "-march=armv8.4-a+fp16+dotprod")
if(NCNN_ARM84BF16)
set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+bf16")
@@ -587,7 +589,8 @@ if(NCNN_TARGET_ARCH STREQUAL "arm" AND (CMAKE_SIZEOF_VOID_P EQUAL 8 OR NCNN_TARG
if(NCNN_ARM82FP16FML)
target_compile_options(ncnn PRIVATE /D__ARM_FEATURE_FP16_FML)
endif()
else()
endif()
if(NOT CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
set(ARM_MARCH_FLAG "-march=armv8.2-a+fp16")
if(NCNN_ARM82DOT)
set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+dotprod")
4 changes: 2 additions & 2 deletions src/layer/arm/arm_activation.h
Original file line number Diff line number Diff line change
@@ -126,7 +126,7 @@ static inline float16x4_t activation_ps_f16(float16x4_t _v, int activation_type,
else if (activation_type == 2)
{
const float16x4_t _zero = vdup_n_f16(0.f);
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
const float16x4_t _slope = vcvt_f16_f32(vdupq_n_f32(activation_params[0]));
#else
const float16x4_t _slope = vdup_n_f16((__fp16)activation_params[0]);
@@ -176,7 +176,7 @@ static inline float16x8_t activation_ps_f16(float16x8_t _v, int activation_type,
else if (activation_type == 2)
{
const float16x8_t _zero = vdupq_n_f16(0.f);
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
const float16x4_t _slope0 = vcvt_f16_f32(vdupq_n_f32(activation_params[0]));
const float16x8_t _slope = vcombine_f16(_slope0, _slope0);
#else
2 changes: 1 addition & 1 deletion src/layer/arm/arm_usability.h
Original file line number Diff line number Diff line change
@@ -123,7 +123,7 @@ static inline int8x8_t float2int8leakyrelu(float32x4_t _vlow, float32x4_t _vhigh
}

#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#ifdef _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
struct __fp16
{
__fp16()
4 changes: 2 additions & 2 deletions src/layer/arm/batchnorm_arm_asimdhp.cpp
Original file line number Diff line number Diff line change
@@ -369,7 +369,7 @@ int BatchNorm_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& op
__fp16 b = (__fp16)b_data[i];

float16x4_t _a = vdup_n_f16(a);
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _b = vcvt_f16_f32(vdupq_n_f32(b_data[i]));
#else
float16x4_t _b = vdup_n_f16(b);
@@ -410,7 +410,7 @@ int BatchNorm_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& op
__fp16 b = (__fp16)b_data[q];

float16x4_t _a = vdup_n_f16(a);
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _b = vcvt_f16_f32(vdupq_n_f32(b_data[q]));
#else
float16x4_t _b = vdup_n_f16(b);
16 changes: 8 additions & 8 deletions src/layer/arm/neon_mathfun_fp16s.h
Original file line number Diff line number Diff line change
@@ -201,7 +201,7 @@ static inline float16x4_t exp_ps_f16(float16x4_t x)
x = vmax_f16(x, vdup_n_f16(c_exp_lo_f16));

/* express exp(x) as exp(g + n*log(2)) */
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
fx = vfma_f16(vdup_n_f16(0.5f), x, vcvt_f16_f32(vdupq_n_f32(c_cephes_LOG2EF)));
#else
fx = vfma_f16(vdup_n_f16(0.5f), x, vdup_n_f16(c_cephes_LOG2EF));
@@ -216,7 +216,7 @@ static inline float16x4_t exp_ps_f16(float16x4_t x)

fx = vsub_f16(tmp, (float16x4_t)(mask));

#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
tmp = vmul_f16(fx, vcvt_f16_f32(vdupq_n_f32(c_cephes_exp_C1)));
float16x4_t z = vmul_f16(fx, vcvt_f16_f32(vdupq_n_f32(c_cephes_exp_C2)));
#else
@@ -258,7 +258,7 @@ static inline float16x8_t exp_ps_f16(float16x8_t x)
x = vmaxq_f16(x, vdupq_n_f16(c_exp_lo_f16));

/* express exp(x) as exp(g + n*log(2)) */
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _c_cephes_LOG2EF = vcvt_f16_f32(vdupq_n_f32(c_cephes_LOG2EF));
fx = vfmaq_f16(vdupq_n_f16(0.5f), x, vcombine_f16(_c_cephes_LOG2EF, _c_cephes_LOG2EF));
#else
@@ -274,7 +274,7 @@ static inline float16x8_t exp_ps_f16(float16x8_t x)

fx = vsubq_f16(tmp, vreinterpretq_f16_u16(mask));

#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _c_cephes_exp_C1 = vcvt_f16_f32(vdupq_n_f32(c_cephes_exp_C1));
tmp = vmulq_f16(fx, vcombine_f16(_c_cephes_exp_C1, _c_cephes_exp_C1));
float16x4_t _c_cephes_exp_C2 = vcvt_f16_f32(vdupq_n_f32(c_cephes_exp_C2));
@@ -347,7 +347,7 @@ static inline void sincos_ps_f16(float16x4_t x, float16x4_t* ysin, float16x4_t*
x = vabs_f16(x);

/* scale by 4/Pi */
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _c_cephes_FOPI = vcvt_f16_f32(vdupq_n_f32(c_cephes_FOPI));
y = vmul_f16(x, _c_cephes_FOPI);
#else
@@ -371,7 +371,7 @@ static inline void sincos_ps_f16(float16x4_t x, float16x4_t* ysin, float16x4_t*

/* The magic pass: "Extended precision modular arithmetic"
* x = ((x - y * DP1) - y * DP2) - y * DP3; */
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _c_minus_cephes_DP1 = vcvt_f16_f32(vdupq_n_f32(c_minus_cephes_DP1));
float16x4_t _c_minus_cephes_DP2 = vcvt_f16_f32(vdupq_n_f32(c_minus_cephes_DP2));
float16x4_t _c_minus_cephes_DP3 = vcvt_f16_f32(vdupq_n_f32(c_minus_cephes_DP3));
@@ -422,7 +422,7 @@ static inline void sincos_ps_f16(float16x8_t x, float16x8_t* ysin, float16x8_t*
x = vabsq_f16(x);

/* scale by 4/Pi */
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _c_cephes_FOPI = vcvt_f16_f32(vdupq_n_f32(c_cephes_FOPI));
y = vmulq_f16(x, vcombine_f16(_c_cephes_FOPI, _c_cephes_FOPI));
#else
@@ -446,7 +446,7 @@ static inline void sincos_ps_f16(float16x8_t x, float16x8_t* ysin, float16x8_t*

/* The magic pass: "Extended precision modular arithmetic"
* x = ((x - y * DP1) - y * DP2) - y * DP3; */
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _c_minus_cephes_DP1 = vcvt_f16_f32(vdupq_n_f32(c_minus_cephes_DP1));
float16x4_t _c_minus_cephes_DP2 = vcvt_f16_f32(vdupq_n_f32(c_minus_cephes_DP2));
float16x4_t _c_minus_cephes_DP3 = vcvt_f16_f32(vdupq_n_f32(c_minus_cephes_DP3));
8 changes: 4 additions & 4 deletions src/layer/arm/pooling_arm_asimdhp.cpp
Original file line number Diff line number Diff line change
@@ -612,7 +612,7 @@ int Pooling_arm::forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Opt
}
}

#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _inv_area0 = vcvt_f16_f32(vdupq_n_f32(1.f / area));
float16x8_t _inv_area = vcombine_f16(_inv_area0, _inv_area0);
#else
@@ -672,7 +672,7 @@ int Pooling_arm::forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Opt
}
}

#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _inv_area = vcvt_f16_f32(vdupq_n_f32(1.f / area));
#else
float16x4_t _inv_area = vdup_n_f16((__fp16)(1.f / area));
@@ -750,7 +750,7 @@ int Pooling_arm::forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Opt
const Mat m = bottom_blob_bordered.channel(q);
__fp16* outptr = top_blob.channel(q);

#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _inv_maxk0 = vcvt_f16_f32(vdupq_n_f32(1.f / maxk));
float16x8_t _inv_maxk = vcombine_f16(_inv_maxk0, _inv_maxk0);
#else
@@ -788,7 +788,7 @@ int Pooling_arm::forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Opt
const Mat m = bottom_blob_bordered.channel(q);
__fp16* outptr = top_blob.channel(q);

#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _inv_maxk = vcvt_f16_f32(vdupq_n_f32(1.f / maxk));
#else
float16x4_t _inv_maxk = vdup_n_f16((__fp16)(1.f / maxk));
14 changes: 7 additions & 7 deletions src/layer/arm/prelu_arm_asimdhp.cpp
Original file line number Diff line number Diff line change
@@ -266,7 +266,7 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
}
else
{
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _slope0 = vcvt_f16_f32(vdupq_n_f32(slope_data[0]));
float16x8_t _slope = vcombine_f16(_slope0, _slope0);
#else
@@ -344,7 +344,7 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
{
if (dims == 1)
{
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x8_t _zero = vdupq_n_f16(0.f);
#else
float16x4_t _zero = vdup_n_f16(0.f);
@@ -363,7 +363,7 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c

float16x4_t _p = vld1_f16(ptr);
float16x4_t _slope = vcvt_f16_f32(vld1q_f32(slope + i * 4));
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
uint16x4_t _lemask = vcle_f16(_p, vget_low_f16(_zero));
#else
uint16x4_t _lemask = vcle_f16(_p, _zero);
@@ -375,7 +375,7 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
}
else
{
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x8_t _slope = vdupq_n_f16((__fp16)slope_data[0]);
#else
float16x4_t _slope = vdup_n_f16((__fp16)slope_data[0]);
@@ -387,7 +387,7 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
__fp16* ptr = (__fp16*)bottom_top_blob + i * 4;

float16x4_t _p = vld1_f16(ptr);
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
uint16x4_t _lemask = vcle_f16(_p, vget_low_f16(_zero));
float16x4_t _ps = vmul_f16(_p, vget_low_f16(_slope));
#else
@@ -500,7 +500,7 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
const float slope = num_slope > 1 ? slope_data[i] : slope_data[0];

float16x4_t _zero = vdup_n_f16(0.f);
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _slope = vcvt_f16_f32(vdupq_n_f32(slope));
#else
float16x4_t _slope = vdup_n_f16((__fp16)slope);
@@ -543,7 +543,7 @@ int PReLU_arm::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) c
const float slope = num_slope > 1 ? slope_data[q] : slope_data[0];

float16x4_t _zero = vdup_n_f16(0.f);
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _slope = vcvt_f16_f32(vdupq_n_f32(slope));
#else
float16x4_t _slope = vdup_n_f16((__fp16)slope);
6 changes: 3 additions & 3 deletions src/layer/arm/quantize_arm_asimdhp.cpp
Original file line number Diff line number Diff line change
@@ -440,7 +440,7 @@ int Quantize_arm::forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Op

if (scale_data_size == 1)
{
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _scale0 = vcvt_f16_f32(vdupq_n_f32(scale_data[0]));
float16x8_t _scale = vcombine_f16(_scale0, _scale0);
#else
@@ -485,7 +485,7 @@ int Quantize_arm::forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Op

if (scale_data_size == 1)
{
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _scale0 = vcvt_f16_f32(vdupq_n_f32(scale_data[0]));
float16x8_t _scale = vcombine_f16(_scale0, _scale0);
#else
@@ -545,7 +545,7 @@ int Quantize_arm::forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Op

if (scale_data_size == 1)
{
#if _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
float16x4_t _scale0 = vcvt_f16_f32(vdupq_n_f32(scale_data[0]));
float16x8_t _scale = vcombine_f16(_scale0, _scale0);
#else
Loading