Merge branch 'master' into riscv-xtheadvector

Tencent · Dec 23, 2024 · 0d0099b · 0d0099b
2 parents a7127f2 + 66cd40e
commit 0d0099b
Show file tree

Hide file tree

Showing 12 changed files with 88 additions and 20 deletions.
diff --git a/.github/workflows/android.yml b/.github/workflows/android.yml
@@ -117,8 +117,10 @@ jobs:
       env:
         DEBIAN_FRONTEND: noninteractive
       run: |
-        sudo apt-get update
-        sudo apt-get install -y libncurses5
+        pushd /usr/lib/x86_64-linux-gnu/
+        sudo ln -s libncurses.so.6 libncurses.so.5
+        sudo ln -s libtinfo.so.6 libtinfo.so.5
+        popd
         wget -q https://dl.google.com/android/repository/android-ndk-r16b-linux-x86_64.zip -O $GITHUB_WORKSPACE/android-ndk-r16b-linux-x86_64.zip
         cd $GITHUB_WORKSPACE && unzip -q android-ndk-r16b-linux-x86_64.zip
 

diff --git a/.github/workflows/pnnx.yml b/.github/workflows/pnnx.yml
@@ -0,0 +1,56 @@
+name: pnnx
+on:
+  push:
+    branches: [master]
+    paths:
+    - '.github/workflows/pnnx.yml'
+    - 'tools/pnnx/**'
+    - '!tools/pnnx/README.md'
+  pull_request:
+    branches: [master]
+    paths:
+    - '.github/workflows/pnnx.yml'
+    - 'tools/pnnx/**'
+    - '!tools/pnnx/README.md'
+concurrency:
+  group: pnnx-${{ github.ref }}
+  cancel-in-progress: true
+permissions:
+  contents: read
+
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+
+    env:
+      PYTHONUSERBASE: ${{ github.workspace }}/torch
+      UseMultiToolTask: true
+    steps:
+    - uses: actions/checkout@v4
+
+    - uses: actions/setup-python@v5
+      with:
+        python-version: 3.12
+
+    - name: setup-pytorch
+      run: |
+        python3 -m pip config set global.break-system-packages true
+        pip3 install --user torch --index-url https://download.pytorch.org/whl/cpu
+        pip3 install --user numpy packaging
+
+    - name: build-pnnx
+      run: |
+        cd tools/pnnx
+        mkdir build && cd build
+        cmake -DCMAKE_BUILD_TYPE=Release ..
+        cmake --build . --config Release -j 4
+
+    - name: quick-test
+      if: matrix.os != 'windows-latest'
+      run: |
+        cd tools/pnnx
+        cd build && ctest -C Release --output-on-failure -R test_nn_Conv
diff --git a/.github/workflows/web-assembly.yml b/.github/workflows/web-assembly.yml
@@ -62,7 +62,7 @@ jobs:
     - name: test-simd
       run: |
         cd build-simd
-        TESTS_EXECUTABLE_LOADER=node TESTS_EXECUTABLE_LOADER_ARGUMENTS="--experimental-wasm-simd" ctest --output-on-failure -j $(nproc)
+        TESTS_EXECUTABLE_LOADER=node ctest --output-on-failure -j $(nproc)
     - name: build-simd-omp
       run: |
         source emsdk/emsdk_env.sh
@@ -73,4 +73,4 @@ jobs:
     - name: test-simd-omp
       run: |
         cd build-simd-omp
-        TESTS_EXECUTABLE_LOADER=node TESTS_EXECUTABLE_LOADER_ARGUMENTS="--experimental-wasm-simd;--experimental-wasm-threads" ctest --output-on-failure -j $(nproc)
+        TESTS_EXECUTABLE_LOADER=node ctest --output-on-failure -j $(nproc)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -139,6 +139,7 @@ endif()
 ##############################################
 
 include(CheckCXXCompilerFlag)
+set(CMAKE_TRY_COMPILE_CONFIGURATION release)
 
 # gnu inline assembly in clang msvc does not work actually
 if(NOT (CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")))
@@ -523,7 +524,7 @@ else()
         check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512i _s, _a, _b; _s = _mm512_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)
 
         set(CMAKE_REQUIRED_FLAGS "/arch:AVX512")
-        check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)
+        check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }\n__m512i t(__m512 a) { __m256i _a = (__m256i)_mm512_cvtneps_pbh(a); return _mm512_inserti32x8(_mm512_castsi256_si512(_a), _a, 1); }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)
 
         set(CMAKE_REQUIRED_FLAGS "/arch:AVX512")
         check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)
@@ -560,7 +561,7 @@ else()
         check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512i _s, _a, _b; _s = _mm512_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)
 
         set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16")
-        check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)
+        check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }\n__m512i t(__m512 a) { __m256i _a = (__m256i)_mm512_cvtneps_pbh(a); return _mm512_inserti32x8(_mm512_castsi256_si512(_a), _a, 1); }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)
 
         set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16")
         check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)
@@ -595,7 +596,7 @@ else()
         check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512i _s, _a, _b; _s = _mm512_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)
 
         set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16")
-        check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)
+        check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }\n__m512i t(__m512 a) { __m256i _a = (__m256i)_mm512_cvtneps_pbh(a); return _mm512_inserti32x8(_mm512_castsi256_si512(_a), _a, 1); }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)
 
         set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16")
         check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)

diff --git a/src/layer/x86/gemm_int8.h b/src/layer/x86/gemm_int8.h
@@ -2014,7 +2014,7 @@ static void pack_A_tile_fp32_to_int8(const Mat& A, Mat& AT, int i, int max_ii, i
 
                 __m256i _pp = combine4x2_epi32(_pp0, _pp1);
 #if !__AVXVNNIINT8__
-                _w_shift = _mm256_dpbusd_epi32(_w_shift, _v127, _pp);
+                _w_shift = _mm256_comp_dpbusd_epi32(_w_shift, _v127, _pp);
 #endif // !__AVXVNNIINT8__
                 _mm256_storeu_si256((__m256i*)pp, _pp);
 
@@ -2108,7 +2108,7 @@ static void pack_A_tile_fp32_to_int8(const Mat& A, Mat& AT, int i, int max_ii, i
 
                 __m256i _pp = combine4x2_epi32(_pp0, _pp1);
 #if !__AVXVNNIINT8__
-                _w_shift = _mm256_dpbusd_epi32(_w_shift, _v127, _pp);
+                _w_shift = _mm256_comp_dpbusd_epi32(_w_shift, _v127, _pp);
 #endif // !__AVXVNNIINT8__
                 _mm256_storeu_si256((__m256i*)pp, _pp);
 

diff --git a/src/layer/x86/x86_usability.h b/src/layer/x86/x86_usability.h
@@ -1490,9 +1490,9 @@ static NCNN_FORCEINLINE __m256i float2bfloat_avx512(const __m512& v0)
 static NCNN_FORCEINLINE __m512i float2bfloat_avx512(const __m512& v0, const __m512& v1)
 {
 #if __AVX512BF16__
-    __m256bh _v0 = _mm512_cvtneps_pbh(v0);
-    __m256bh _v1 = _mm512_cvtneps_pbh(v1);
-    __m512i _v = _mm512_inserti32x8(_mm512_castsi256_si512((__m256i)_v0), (__m256i)_v1, 1);
+    __m256i _v0 = (__m256i)_mm512_cvtneps_pbh(v0);
+    __m256i _v1 = (__m256i)_mm512_cvtneps_pbh(v1);
+    __m512i _v = _mm512_inserti32x8(_mm512_castsi256_si512(_v0), _v1, 1);
 #else
     __m512i _a = _mm512_castps_si512(v0);
     __m512i _b = _mm512_castps_si512(v1);

diff --git a/tools/pnnx/CMakeLists.txt b/tools/pnnx/CMakeLists.txt
@@ -19,6 +19,11 @@ if(MSVC AND NOT CMAKE_VERSION VERSION_LESS "3.15")
     endif()
 endif()
 
+if(POLICY CMP0094)
+    cmake_policy(SET CMP0094 NEW)  # FindPython should return the first matching Python
+endif()
+set(Python_FIND_REGISTRY "LAST")
+set(Python_FIND_FRAMEWORK "LAST")
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
 include(PNNXPyTorch)
 

diff --git a/tools/pnnx/src/CMakeLists.txt b/tools/pnnx/src/CMakeLists.txt
@@ -625,6 +625,10 @@ if(PROTOBUF_FOUND)
             set_property(TARGET onnxruntime::onnxruntime APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_BINARY_DIR})
             set_property(TARGET onnxruntime::onnxruntime APPEND PROPERTY INTERFACE_LINK_LIBRARIES protobuf::libprotobuf)
         endif()
+
+        if(APPLE)
+            set_property(TARGET onnxruntime::onnxruntime APPEND PROPERTY INTERFACE_LINK_LIBRARIES "-framework CoreFoundation")
+        endif()
     endif()
 endif()
 

diff --git a/tools/pnnx/src/pass_ncnn/torch_istft.cpp b/tools/pnnx/src/pass_ncnn/torch_istft.cpp
@@ -106,10 +106,10 @@ static int detect_window_type(const std::vector<float>& window_data)
         if (!NearlyEqual(window_data[i], 1.f, 0.001))
             is_one = false;
 
-        if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * M_PI * i / winlen)), 0.001))
+        if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * 3.14159265358979323846 * i / winlen)), 0.001))
             is_hann = false;
 
-        if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * M_PI * i / winlen), 0.001))
+        if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * 3.14159265358979323846 * i / winlen), 0.001))
             is_hamming = false;
     }
 

diff --git a/tools/pnnx/src/pass_ncnn/torch_stft.cpp b/tools/pnnx/src/pass_ncnn/torch_stft.cpp
@@ -93,10 +93,10 @@ static int detect_window_type(const std::vector<float>& window_data)
         if (!NearlyEqual(window_data[i], 1.f, 0.001))
             is_one = false;
 
-        if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * M_PI * i / winlen)), 0.001))
+        if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * 3.14159265358979323846 * i / winlen)), 0.001))
             is_hann = false;
 
-        if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * M_PI * i / winlen), 0.001))
+        if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * 3.14159265358979323846 * i / winlen), 0.001))
             is_hamming = false;
     }
 

diff --git a/tools/pnnx/src/pass_ncnn/torchaudio_F_inverse_spectrogram.cpp b/tools/pnnx/src/pass_ncnn/torchaudio_F_inverse_spectrogram.cpp
@@ -43,10 +43,10 @@ static int detect_window_type(const std::vector<float>& window_data)
         if (!NearlyEqual(window_data[i], 1.f, 0.001))
             is_one = false;
 
-        if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * M_PI * i / winlen)), 0.001))
+        if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * 3.14159265358979323846 * i / winlen)), 0.001))
             is_hann = false;
 
-        if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * M_PI * i / winlen), 0.001))
+        if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * 3.14159265358979323846 * i / winlen), 0.001))
             is_hamming = false;
     }
 

diff --git a/tools/pnnx/src/pass_ncnn/torchaudio_F_spectrogram.cpp b/tools/pnnx/src/pass_ncnn/torchaudio_F_spectrogram.cpp
@@ -43,10 +43,10 @@ static int detect_window_type(const std::vector<float>& window_data)
         if (!NearlyEqual(window_data[i], 1.f, 0.001))
             is_one = false;
 
-        if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * M_PI * i / winlen)), 0.001))
+        if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * 3.14159265358979323846 * i / winlen)), 0.001))
             is_hann = false;
 
-        if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * M_PI * i / winlen), 0.001))
+        if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * 3.14159265358979323846 * i / winlen), 0.001))
             is_hamming = false;
     }