Skip to content

Commit

Permalink
rnn/lstm/gru dynamic quantization (#5435)
Browse files Browse the repository at this point in the history
nihui authored May 8, 2024
1 parent be15dbe commit 08b7d99
Showing 44 changed files with 11,129 additions and 1,726 deletions.
1 change: 1 addition & 0 deletions .ci/test-coverage.yml
Original file line number Diff line number Diff line change
@@ -187,6 +187,7 @@ jobs:
- { SSE2: 'ON', AVX: 'OFF', XOP: 'OFF', F16C: 'OFF', FMA: 'OFF', AVX2: 'OFF', AVX512: 'OFF', AVX512VNNI: 'OFF', AVXVNNI: 'OFF', AVX512BF16: 'OFF', AVX512FP16: 'OFF'}
- { SSE2: 'ON', AVX: 'ON', XOP: 'OFF', F16C: 'OFF', FMA: 'OFF', AVX2: 'OFF', AVX512: 'OFF', AVX512VNNI: 'OFF', AVXVNNI: 'OFF', AVX512BF16: 'OFF', AVX512FP16: 'OFF'}
- { SSE2: 'ON', AVX: 'ON', XOP: 'OFF', F16C: 'ON', FMA: 'ON', AVX2: 'ON', AVX512: 'OFF', AVX512VNNI: 'OFF', AVXVNNI: 'OFF', AVX512BF16: 'OFF', AVX512FP16: 'OFF'}
- { SSE2: 'ON', AVX: 'ON', XOP: 'OFF', F16C: 'ON', FMA: 'ON', AVX2: 'ON', AVX512: 'ON', AVX512VNNI: 'OFF', AVXVNNI: 'OFF', AVX512BF16: 'OFF', AVX512FP16: 'OFF'}
- { SSE2: 'ON', AVX: 'ON', XOP: 'OFF', F16C: 'ON', FMA: 'ON', AVX2: 'ON', AVX512: 'ON', AVX512VNNI: 'ON', AVXVNNI: 'OFF', AVX512BF16: 'OFF', AVX512FP16: 'OFF'}

runs-on:
20 changes: 10 additions & 10 deletions cmake/ncnn_add_layer.cmake
Original file line number Diff line number Diff line change
@@ -136,34 +136,34 @@ macro(ncnn_add_layer class)
if(NCNN_TARGET_ARCH STREQUAL "x86")
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
if(NCNN_RUNTIME_CPU AND NCNN_AVX512)
ncnn_add_arch_opt_layer(${class} avx512 "/arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__")
ncnn_add_arch_opt_layer(${class} avx512 "/arch:AVX512 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
endif()
if(NCNN_RUNTIME_CPU AND NCNN_FMA)
ncnn_add_arch_opt_layer(${class} fma "/arch:AVX /D__SSE4_1__ /D__FMA__ /D__F16C__")
ncnn_add_arch_opt_layer(${class} fma "/arch:AVX /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
endif()
if(NCNN_RUNTIME_CPU AND NCNN_AVX)
ncnn_add_arch_opt_layer(${class} avx "/arch:AVX /D__SSE4_1__")
ncnn_add_arch_opt_layer(${class} avx "/arch:AVX /D__SSSE3__ /D__SSE4_1__")
endif()
if(NCNN_AVX512VNNI)
ncnn_add_arch_opt_source(${class} avx512vnni "/arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512VNNI__")
ncnn_add_arch_opt_source(${class} avx512vnni "/arch:AVX512 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512VNNI__")
endif()
if(NCNN_AVX512BF16)
ncnn_add_arch_opt_source(${class} avx512bf16 "/arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512BF16__")
ncnn_add_arch_opt_source(${class} avx512bf16 "/arch:AVX512 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512BF16__")
endif()
if(NCNN_AVX512FP16)
ncnn_add_arch_opt_source(${class} avx512fp16 "/arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512FP16__")
ncnn_add_arch_opt_source(${class} avx512fp16 "/arch:AVX512 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512FP16__")
endif()
if(NCNN_AVXVNNI)
ncnn_add_arch_opt_source(${class} avxvnni "/arch:AVX2 /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVXVNNI__")
ncnn_add_arch_opt_source(${class} avxvnni "/arch:AVX2 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVXVNNI__")
endif()
if(NCNN_AVX2)
ncnn_add_arch_opt_source(${class} avx2 "/arch:AVX2 /D__SSE4_1__ /D__FMA__ /D__F16C__")
ncnn_add_arch_opt_source(${class} avx2 "/arch:AVX2 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
endif()
if(NCNN_XOP)
ncnn_add_arch_opt_source(${class} xop "/arch:AVX /D__SSE4_1__ /D__XOP__")
ncnn_add_arch_opt_source(${class} xop "/arch:AVX /D__SSSE3__ /D__SSE4_1__ /D__XOP__")
endif()
if(NCNN_F16C)
ncnn_add_arch_opt_source(${class} f16c "/arch:AVX /D__SSE4_1__ /D__F16C__")
ncnn_add_arch_opt_source(${class} f16c "/arch:AVX /D__SSSE3__ /D__SSE4_1__ /D__F16C__")
endif()
else()
if(NCNN_RUNTIME_CPU AND NCNN_AVX512)
6 changes: 6 additions & 0 deletions docs/how-to-use-and-FAQ/quantized-int8-inference.md
Original file line number Diff line number Diff line change
@@ -48,6 +48,12 @@ If your model has multiple input nodes, you can use multiple list files and othe
./ncnn2int8 mobilenet-opt.param mobilenet-opt.bin mobilenet-int8.param mobilenet-int8.bin mobilenet.table
```

If you don’t need static quantization, ncnn supports RNN/LSTM/GRU dynamic quantization. In this case, you can omit the table file.

```shell
./ncnn2int8 rnn-model.param rnn-model.bin rnn-model-int8.param rnn-model-int8.bin
```

## use ncnn int8 inference

the ncnn library would use int8 inference automatically, nothing changed in your code
8 changes: 4 additions & 4 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -404,7 +404,7 @@ if(NCNN_TARGET_ARCH STREQUAL "x86")

if(NOT NCNN_RUNTIME_CPU AND NCNN_AVX512)
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
target_compile_options(ncnn PRIVATE /arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__)
target_compile_options(ncnn PRIVATE /arch:AVX512 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__)
if(NCNN_AVX512VNNI)
target_compile_options(ncnn PRIVATE /D__AVX512VNNI__)
endif()
@@ -423,9 +423,9 @@ if(NCNN_TARGET_ARCH STREQUAL "x86")
elseif(NOT NCNN_RUNTIME_CPU AND NCNN_FMA)
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
if(NCNN_AVX2)
target_compile_options(ncnn PRIVATE /arch:AVX2 /D__SSE4_1__ /D__FMA__)
target_compile_options(ncnn PRIVATE /arch:AVX2 /D__SSSE3__ /D__SSE4_1__ /D__FMA__)
else()
target_compile_options(ncnn PRIVATE /arch:AVX /D__SSE4_1__ /D__FMA__)
target_compile_options(ncnn PRIVATE /arch:AVX /D__SSSE3__ /D__SSE4_1__ /D__FMA__)
endif()
if(NCNN_AVXVNNI)
target_compile_options(ncnn PRIVATE /D__AVXVNNI__)
@@ -452,7 +452,7 @@ if(NCNN_TARGET_ARCH STREQUAL "x86")
endif()
elseif(NOT NCNN_RUNTIME_CPU AND NCNN_AVX)
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
target_compile_options(ncnn PRIVATE /arch:AVX /D__SSE4_1__)
target_compile_options(ncnn PRIVATE /arch:AVX /D__SSSE3__ /D__SSE4_1__)
if(NCNN_XOP)
target_compile_options(ncnn PRIVATE /D__XOP__)
endif()
Loading

0 comments on commit 08b7d99

Please sign in to comment.