Skip to content

Commit

Permalink
Merge branch 'master' into riscv-xtheadvector
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui authored Dec 23, 2024
2 parents a7127f2 + 66cd40e commit 0d0099b
Show file tree
Hide file tree
Showing 12 changed files with 88 additions and 20 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/android.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,10 @@ jobs:
env:
DEBIAN_FRONTEND: noninteractive
run: |
sudo apt-get update
sudo apt-get install -y libncurses5
pushd /usr/lib/x86_64-linux-gnu/
sudo ln -s libncurses.so.6 libncurses.so.5
sudo ln -s libtinfo.so.6 libtinfo.so.5
popd
wget -q https://dl.google.com/android/repository/android-ndk-r16b-linux-x86_64.zip -O $GITHUB_WORKSPACE/android-ndk-r16b-linux-x86_64.zip
cd $GITHUB_WORKSPACE && unzip -q android-ndk-r16b-linux-x86_64.zip
Expand Down
56 changes: 56 additions & 0 deletions .github/workflows/pnnx.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
name: pnnx
on:
push:
branches: [master]
paths:
- '.github/workflows/pnnx.yml'
- 'tools/pnnx/**'
- '!tools/pnnx/README.md'
pull_request:
branches: [master]
paths:
- '.github/workflows/pnnx.yml'
- 'tools/pnnx/**'
- '!tools/pnnx/README.md'
concurrency:
group: pnnx-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read

jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]

env:
PYTHONUSERBASE: ${{ github.workspace }}/torch
UseMultiToolTask: true
steps:
- uses: actions/checkout@v4

- uses: actions/setup-python@v5
with:
python-version: 3.12

- name: setup-pytorch
run: |
python3 -m pip config set global.break-system-packages true
pip3 install --user torch --index-url https://download.pytorch.org/whl/cpu
pip3 install --user numpy packaging
- name: build-pnnx
run: |
cd tools/pnnx
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=Release ..
cmake --build . --config Release -j 4
- name: quick-test
if: matrix.os != 'windows-latest'
run: |
cd tools/pnnx
cd build && ctest -C Release --output-on-failure -R test_nn_Conv
4 changes: 2 additions & 2 deletions .github/workflows/web-assembly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
- name: test-simd
run: |
cd build-simd
TESTS_EXECUTABLE_LOADER=node TESTS_EXECUTABLE_LOADER_ARGUMENTS="--experimental-wasm-simd" ctest --output-on-failure -j $(nproc)
TESTS_EXECUTABLE_LOADER=node ctest --output-on-failure -j $(nproc)
- name: build-simd-omp
run: |
source emsdk/emsdk_env.sh
Expand All @@ -73,4 +73,4 @@ jobs:
- name: test-simd-omp
run: |
cd build-simd-omp
TESTS_EXECUTABLE_LOADER=node TESTS_EXECUTABLE_LOADER_ARGUMENTS="--experimental-wasm-simd;--experimental-wasm-threads" ctest --output-on-failure -j $(nproc)
TESTS_EXECUTABLE_LOADER=node ctest --output-on-failure -j $(nproc)
7 changes: 4 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ endif()
##############################################

include(CheckCXXCompilerFlag)
set(CMAKE_TRY_COMPILE_CONFIGURATION release)

# gnu inline assembly in clang msvc does not work actually
if(NOT (CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")))
Expand Down Expand Up @@ -523,7 +524,7 @@ else()
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512i _s, _a, _b; _s = _mm512_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX512")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }\n__m512i t(__m512 a) { __m256i _a = (__m256i)_mm512_cvtneps_pbh(a); return _mm512_inserti32x8(_mm512_castsi256_si512(_a), _a, 1); }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX512")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)
Expand Down Expand Up @@ -560,7 +561,7 @@ else()
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512i _s, _a, _b; _s = _mm512_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }\n__m512i t(__m512 a) { __m256i _a = (__m256i)_mm512_cvtneps_pbh(a); return _mm512_inserti32x8(_mm512_castsi256_si512(_a), _a, 1); }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)
Expand Down Expand Up @@ -595,7 +596,7 @@ else()
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512i _s, _a, _b; _s = _mm512_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)

set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }\n__m512i t(__m512 a) { __m256i _a = (__m256i)_mm512_cvtneps_pbh(a); return _mm512_inserti32x8(_mm512_castsi256_si512(_a), _a, 1); }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)

set(CMAKE_REQUIRED_FLAGS "-mfma -mf16c -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)
Expand Down
4 changes: 2 additions & 2 deletions src/layer/x86/gemm_int8.h
Original file line number Diff line number Diff line change
Expand Up @@ -2014,7 +2014,7 @@ static void pack_A_tile_fp32_to_int8(const Mat& A, Mat& AT, int i, int max_ii, i

__m256i _pp = combine4x2_epi32(_pp0, _pp1);
#if !__AVXVNNIINT8__
_w_shift = _mm256_dpbusd_epi32(_w_shift, _v127, _pp);
_w_shift = _mm256_comp_dpbusd_epi32(_w_shift, _v127, _pp);
#endif // !__AVXVNNIINT8__
_mm256_storeu_si256((__m256i*)pp, _pp);

Expand Down Expand Up @@ -2108,7 +2108,7 @@ static void pack_A_tile_fp32_to_int8(const Mat& A, Mat& AT, int i, int max_ii, i

__m256i _pp = combine4x2_epi32(_pp0, _pp1);
#if !__AVXVNNIINT8__
_w_shift = _mm256_dpbusd_epi32(_w_shift, _v127, _pp);
_w_shift = _mm256_comp_dpbusd_epi32(_w_shift, _v127, _pp);
#endif // !__AVXVNNIINT8__
_mm256_storeu_si256((__m256i*)pp, _pp);

Expand Down
6 changes: 3 additions & 3 deletions src/layer/x86/x86_usability.h
Original file line number Diff line number Diff line change
Expand Up @@ -1490,9 +1490,9 @@ static NCNN_FORCEINLINE __m256i float2bfloat_avx512(const __m512& v0)
static NCNN_FORCEINLINE __m512i float2bfloat_avx512(const __m512& v0, const __m512& v1)
{
#if __AVX512BF16__
__m256bh _v0 = _mm512_cvtneps_pbh(v0);
__m256bh _v1 = _mm512_cvtneps_pbh(v1);
__m512i _v = _mm512_inserti32x8(_mm512_castsi256_si512((__m256i)_v0), (__m256i)_v1, 1);
__m256i _v0 = (__m256i)_mm512_cvtneps_pbh(v0);
__m256i _v1 = (__m256i)_mm512_cvtneps_pbh(v1);
__m512i _v = _mm512_inserti32x8(_mm512_castsi256_si512(_v0), _v1, 1);
#else
__m512i _a = _mm512_castps_si512(v0);
__m512i _b = _mm512_castps_si512(v1);
Expand Down
5 changes: 5 additions & 0 deletions tools/pnnx/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ if(MSVC AND NOT CMAKE_VERSION VERSION_LESS "3.15")
endif()
endif()

if(POLICY CMP0094)
cmake_policy(SET CMP0094 NEW) # FindPython should return the first matching Python
endif()
set(Python_FIND_REGISTRY "LAST")
set(Python_FIND_FRAMEWORK "LAST")
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
include(PNNXPyTorch)

Expand Down
4 changes: 4 additions & 0 deletions tools/pnnx/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,10 @@ if(PROTOBUF_FOUND)
set_property(TARGET onnxruntime::onnxruntime APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_BINARY_DIR})
set_property(TARGET onnxruntime::onnxruntime APPEND PROPERTY INTERFACE_LINK_LIBRARIES protobuf::libprotobuf)
endif()

if(APPLE)
set_property(TARGET onnxruntime::onnxruntime APPEND PROPERTY INTERFACE_LINK_LIBRARIES "-framework CoreFoundation")
endif()
endif()
endif()

Expand Down
4 changes: 2 additions & 2 deletions tools/pnnx/src/pass_ncnn/torch_istft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,10 @@ static int detect_window_type(const std::vector<float>& window_data)
if (!NearlyEqual(window_data[i], 1.f, 0.001))
is_one = false;

if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * M_PI * i / winlen)), 0.001))
if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * 3.14159265358979323846 * i / winlen)), 0.001))
is_hann = false;

if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * M_PI * i / winlen), 0.001))
if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * 3.14159265358979323846 * i / winlen), 0.001))
is_hamming = false;
}

Expand Down
4 changes: 2 additions & 2 deletions tools/pnnx/src/pass_ncnn/torch_stft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,10 @@ static int detect_window_type(const std::vector<float>& window_data)
if (!NearlyEqual(window_data[i], 1.f, 0.001))
is_one = false;

if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * M_PI * i / winlen)), 0.001))
if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * 3.14159265358979323846 * i / winlen)), 0.001))
is_hann = false;

if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * M_PI * i / winlen), 0.001))
if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * 3.14159265358979323846 * i / winlen), 0.001))
is_hamming = false;
}

Expand Down
4 changes: 2 additions & 2 deletions tools/pnnx/src/pass_ncnn/torchaudio_F_inverse_spectrogram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ static int detect_window_type(const std::vector<float>& window_data)
if (!NearlyEqual(window_data[i], 1.f, 0.001))
is_one = false;

if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * M_PI * i / winlen)), 0.001))
if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * 3.14159265358979323846 * i / winlen)), 0.001))
is_hann = false;

if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * M_PI * i / winlen), 0.001))
if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * 3.14159265358979323846 * i / winlen), 0.001))
is_hamming = false;
}

Expand Down
4 changes: 2 additions & 2 deletions tools/pnnx/src/pass_ncnn/torchaudio_F_spectrogram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ static int detect_window_type(const std::vector<float>& window_data)
if (!NearlyEqual(window_data[i], 1.f, 0.001))
is_one = false;

if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * M_PI * i / winlen)), 0.001))
if (!NearlyEqual(window_data[i], 0.5f * (1 - cos(2 * 3.14159265358979323846 * i / winlen)), 0.001))
is_hann = false;

if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * M_PI * i / winlen), 0.001))
if (!NearlyEqual(window_data[i], 0.54f - 0.46f * cos(2 * 3.14159265358979323846 * i / winlen), 0.001))
is_hamming = false;
}

Expand Down

0 comments on commit 0d0099b

Please sign in to comment.