From 8a4c891e0425e2fa71cef51978b32a1e8307ba09 Mon Sep 17 00:00:00 2001 From: Daniel Gregory Date: Wed, 10 Jul 2024 14:14:17 +0000 Subject: [PATCH 1/3] build: Add riscv64 support Use the base implementations for every function. Signed-off-by: Daniel Gregory --- Makefile.am | 14 ++++++++++++++ configure.ac | 2 ++ crc/Makefile.am | 1 + erasure_code/Makefile.am | 1 + igzip/Makefile.am | 1 + mem/Makefile.am | 1 + raid/Makefile.am | 1 + 7 files changed, 21 insertions(+) diff --git a/Makefile.am b/Makefile.am index ce22ebe4..4fdcec6a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -29,10 +29,12 @@ other_tests_x86_64= other_tests_x86_32= other_tests_aarch64= other_tests_ppc64le= +other_tests_riscv64= lsrc_x86_64= lsrc_x86_32= lsrc_aarch64= lsrc_ppc64le= +lsrc_riscv64= lsrc_base_aliases= lsrc32= unit_tests32= @@ -83,6 +85,12 @@ libisal_la_SOURCES += ${lsrc_ppc64le} other_tests += ${other_tests_ppc64le} endif +if CPU_RISCV64 +ARCH=-Driscv64 +libisal_la_SOURCES += ${lsrc_riscv64} +other_tests += ${other_tests_riscv64} +endif + if CPU_UNDEFINED libisal_la_SOURCES += ${lsrc_base_aliases} endif @@ -131,6 +139,9 @@ endif if CPU_AARCH64 as_filter = $(CC) -D__ASSEMBLY__ endif +if CPU_RISCV64 + as_filter = $(CC) -D__ASSEMBLY__ +endif CCAS = $(as_filter) EXTRA_DIST += tools/yasm-filter.sh tools/nasm-filter.sh @@ -142,6 +153,9 @@ AM_CCASFLAGS = ${AM_CFLAGS} else AM_CCASFLAGS = ${yasm_args} ${INCLUDE} ${src_include} ${DEFS} ${D} endif +if CPU_RISCV64 +AM_CCASFLAGS = ${AM_CFLAGS} +endif .asm.s: @echo " MKTMP " $@; diff --git a/configure.ac b/configure.ac index 72500eec..62aae78a 100644 --- a/configure.ac +++ b/configure.ac @@ -31,11 +31,13 @@ AS_CASE([$host_cpu], [arm64], [CPU="aarch64"], [powerpc64le], [CPU="ppc64le"], [ppc64le], [CPU="ppc64le"], + [riscv64], [CPU="riscv64"], ) AM_CONDITIONAL([CPU_X86_64], [test "$CPU" = "x86_64"]) AM_CONDITIONAL([CPU_X86_32], [test "$CPU" = "x86_32"]) AM_CONDITIONAL([CPU_AARCH64], [test "$CPU" = "aarch64"]) AM_CONDITIONAL([CPU_PPC64LE], [test "$CPU" = "ppc64le"]) +AM_CONDITIONAL([CPU_RISCV64], [test "$CPU" = "riscv64"]) AM_CONDITIONAL([CPU_UNDEFINED], [test "x$CPU" = "x"]) if test "$CPU" = "x86_64"; then diff --git a/crc/Makefile.am b/crc/Makefile.am index 6aed74d9..72be45a0 100644 --- a/crc/Makefile.am +++ b/crc/Makefile.am @@ -36,6 +36,7 @@ lsrc += \ lsrc_base_aliases += crc/crc_base_aliases.c lsrc_x86_32 += crc/crc_base_aliases.c lsrc_ppc64le += crc/crc_base_aliases.c +lsrc_riscv64 += crc/crc_base_aliases.c lsrc_x86_64 += \ crc/crc16_t10dif_01.asm \ diff --git a/erasure_code/Makefile.am b/erasure_code/Makefile.am index 8f334462..15bb4265 100644 --- a/erasure_code/Makefile.am +++ b/erasure_code/Makefile.am @@ -34,6 +34,7 @@ include erasure_code/ppc64le/Makefile.am lsrc += erasure_code/ec_base.c lsrc_base_aliases += erasure_code/ec_base_aliases.c +lsrc_riscv64 += erasure_code/ec_base_aliases.c lsrc_x86_64 += \ erasure_code/ec_highlevel_func.c \ erasure_code/gf_vect_mul_sse.asm \ diff --git a/igzip/Makefile.am b/igzip/Makefile.am index bec359ab..01622914 100644 --- a/igzip/Makefile.am +++ b/igzip/Makefile.am @@ -39,6 +39,7 @@ lsrc += igzip/igzip.c \ lsrc_base_aliases += igzip/igzip_base_aliases.c igzip/proc_heap_base.c lsrc_x86_32 += igzip/igzip_base_aliases.c igzip/proc_heap_base.c lsrc_ppc64le += igzip/igzip_base_aliases.c igzip/proc_heap_base.c +lsrc_riscv64 += igzip/igzip_base_aliases.c igzip/proc_heap_base.c lsrc_aarch64 += igzip/aarch64/igzip_inflate_multibinary_arm64.S \ igzip/aarch64/igzip_multibinary_arm64.S \ diff --git a/mem/Makefile.am b/mem/Makefile.am index a49fc0c5..6c537de4 100644 --- a/mem/Makefile.am +++ b/mem/Makefile.am @@ -33,6 +33,7 @@ lsrc += mem/mem_zero_detect_base.c lsrc_base_aliases += mem/mem_zero_detect_base_aliases.c lsrc_ppc64le += mem/mem_zero_detect_base_aliases.c +lsrc_riscv64 += mem/mem_zero_detect_base_aliases.c lsrc_x86_64 += mem/mem_zero_detect_avx512.asm \ mem/mem_zero_detect_avx2.asm \ diff --git a/raid/Makefile.am b/raid/Makefile.am index 854f258e..63ab6a21 100644 --- a/raid/Makefile.am +++ b/raid/Makefile.am @@ -33,6 +33,7 @@ lsrc += raid/raid_base.c lsrc_base_aliases += raid/raid_base_aliases.c lsrc_ppc64le += raid/raid_base_aliases.c +lsrc_riscv64 += raid/raid_base_aliases.c lsrc_x86_64 += \ raid/xor_gen_sse.asm \ From b9e602283fe31d3debc74f8ad331d463b9e1720d Mon Sep 17 00:00:00 2001 From: Daniel Gregory Date: Wed, 10 Jul 2024 14:24:29 +0000 Subject: [PATCH 2/3] riscv64: Implement optimised crc using zbc and zbb The Zbc extension defines instructions for carryless multiplication that can be used to accelerate the calculation of CRC checksums. This technique is described in Intel's whitepaper, "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction". The Zbb extension defines, among other bit manipulation operations, an instruction for byte-reversing a register (rev8). This is used when doing endianness swaps. crc_fold_common_clmul.h defines a macro that reduces a double-word aligned buffer to 128 bits by folding four 128-bit chunks in parallel then folding a single 128-bit chunk until less than two remain. This macro can be reused for all the CRC algorithms with some parametrisation controlling: - where the seed is xor-ed into the first fold - whether an endianness swap is needed on double-words read in - whether the algorithm is reflected, which affects whether clmulh gives back the high double word of a result or the low double word Where the algorithms differ more is in how the final 128-bits is reduced to a 32/64 bit result (which also changes if the algorithm is reflected) and how the buffer is made to be double-word aligned. 32-bit CRCs use a Barrett's reduction to reduce the buffer enough to be double-word aligned and to reduce any excess leftover after folding. As the different CRC32 algorithms isa-l supports differ in whether the seed is inverted and function signature, the alignment, excess and 128-bit reduction are defined as macros in crc32_*_common_clmul.h that the implementations (crc32_*.S) include and surround with algorithm-specific assembly and precomputed constants. This also makes it straightforward to reuse the macros to calculate crc16_t10dif. 64-bit CRCs use a table-based reduction to align the buffer and handle excess. All isa-l's CRC64 algorithms pass arguments in the same order and invert the seed before & after folding, so crc64_*_common_clmul.h both contain a macro for defining a CRC64 function with a particular name. Then each of the crc64_*.S contain a call to that macro along with the precomputed constants and lookup table. The .h header files added don't contain C code and so are excluded from Clang formatting, similarly to the header files defined for aarch64. Signed-off-by: Daniel Gregory --- .clang-format-ignore | 2 + crc/Makefile.am | 2 +- crc/riscv64/Makefile.am | 43 ++++ crc/riscv64/crc16_t10dif.S | 73 ++++++ crc/riscv64/crc32_gzip_refl.S | 75 ++++++ crc/riscv64/crc32_ieee.S | 78 ++++++ crc/riscv64/crc32_iscsi.S | 79 ++++++ crc/riscv64/crc32_norm_common_clmul.h | 198 +++++++++++++++ crc/riscv64/crc32_refl_common_clmul.h | 180 ++++++++++++++ crc/riscv64/crc64_ecma_norm.S | 179 ++++++++++++++ crc/riscv64/crc64_ecma_refl.S | 179 ++++++++++++++ crc/riscv64/crc64_iso_norm.S | 178 ++++++++++++++ crc/riscv64/crc64_iso_refl.S | 179 ++++++++++++++ crc/riscv64/crc64_jones_norm.S | 179 ++++++++++++++ crc/riscv64/crc64_jones_refl.S | 179 ++++++++++++++ crc/riscv64/crc64_norm_common_clmul.h | 104 ++++++++ crc/riscv64/crc64_refl_common_clmul.h | 104 ++++++++ crc/riscv64/crc64_rocksoft_norm.S | 179 ++++++++++++++ crc/riscv64/crc64_rocksoft_refl.S | 179 ++++++++++++++ crc/riscv64/crc_fold_common_clmul.h | 342 ++++++++++++++++++++++++++ 20 files changed, 2710 insertions(+), 1 deletion(-) create mode 100644 crc/riscv64/Makefile.am create mode 100644 crc/riscv64/crc16_t10dif.S create mode 100644 crc/riscv64/crc32_gzip_refl.S create mode 100644 crc/riscv64/crc32_ieee.S create mode 100644 crc/riscv64/crc32_iscsi.S create mode 100644 crc/riscv64/crc32_norm_common_clmul.h create mode 100644 crc/riscv64/crc32_refl_common_clmul.h create mode 100644 crc/riscv64/crc64_ecma_norm.S create mode 100644 crc/riscv64/crc64_ecma_refl.S create mode 100644 crc/riscv64/crc64_iso_norm.S create mode 100644 crc/riscv64/crc64_iso_refl.S create mode 100644 crc/riscv64/crc64_jones_norm.S create mode 100644 crc/riscv64/crc64_jones_refl.S create mode 100644 crc/riscv64/crc64_norm_common_clmul.h create mode 100644 crc/riscv64/crc64_refl_common_clmul.h create mode 100644 crc/riscv64/crc64_rocksoft_norm.S create mode 100644 crc/riscv64/crc64_rocksoft_refl.S create mode 100644 crc/riscv64/crc_fold_common_clmul.h diff --git a/.clang-format-ignore b/.clang-format-ignore index e2f8c370..9fdf5928 100644 --- a/.clang-format-ignore +++ b/.clang-format-ignore @@ -1,3 +1,5 @@ include/aarch64_multibinary.h include/aarch64_label.h **/aarch64/*.h + +**/riscv64/*.h diff --git a/crc/Makefile.am b/crc/Makefile.am index 72be45a0..5264e005 100644 --- a/crc/Makefile.am +++ b/crc/Makefile.am @@ -28,6 +28,7 @@ ######################################################################## include crc/aarch64/Makefile.am +include crc/riscv64/Makefile.am lsrc += \ crc/crc_base.c \ @@ -36,7 +37,6 @@ lsrc += \ lsrc_base_aliases += crc/crc_base_aliases.c lsrc_x86_32 += crc/crc_base_aliases.c lsrc_ppc64le += crc/crc_base_aliases.c -lsrc_riscv64 += crc/crc_base_aliases.c lsrc_x86_64 += \ crc/crc16_t10dif_01.asm \ diff --git a/crc/riscv64/Makefile.am b/crc/riscv64/Makefile.am new file mode 100644 index 00000000..b2ea4573 --- /dev/null +++ b/crc/riscv64/Makefile.am @@ -0,0 +1,43 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +lsrc_riscv64 += \ + crc/riscv64/crc16_t10dif.S \ + crc/riscv64/crc32_gzip_refl.S \ + crc/riscv64/crc32_ieee.S \ + crc/riscv64/crc32_iscsi.S \ + crc/riscv64/crc64_ecma_norm.S \ + crc/riscv64/crc64_ecma_refl.S \ + crc/riscv64/crc64_iso_norm.S \ + crc/riscv64/crc64_iso_refl.S \ + crc/riscv64/crc64_jones_norm.S \ + crc/riscv64/crc64_jones_refl.S \ + crc/riscv64/crc64_rocksoft_norm.S \ + crc/riscv64/crc64_rocksoft_refl.S + diff --git a/crc/riscv64/crc16_t10dif.S b/crc/riscv64/crc16_t10dif.S new file mode 100644 index 00000000..e3dfdf82 --- /dev/null +++ b/crc/riscv64/crc16_t10dif.S @@ -0,0 +1,73 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc32_norm_common_clmul.h" + +/* uint16_t crc16_t10dif(uint16_t init_crc, uint8_t *buf, uint64_t len) */ +.text +.align 1 +.global crc16_t10dif +.type crc16_t10dif, %function +crc16_t10dif: + /* load precomputed constants */ + ld POLY, .poly + ld MU, .mu + + /* shift 16-bit seed into upper nibble */ + slli SEED, SEED, 16 + + /* align and fold as though we're calculating a 32-bit crc */ + crc32_norm_align + + crc_fold_loop 32 1 0 + crc32_norm_fold_reduction + + crc32_norm_excess + + /* shift back down result */ + srli SEED, SEED, 16 + ret + +/* precomputed constants */ +.poly: + .dword 0x000000018bb70000 +.mu: + .dword 0x00000001f65a57f8 +.k1: + .dword 0x00000000371d0000 +.k2: + .dword 0x0000000087e70000 +.k3: + .dword 0x000000004c1a0000 +.k4: + .dword 0x00000000fb0b0000 +.k5: + .dword 0x000000002d560000 +.k6: + .dword 0x0000000013680000 diff --git a/crc/riscv64/crc32_gzip_refl.S b/crc/riscv64/crc32_gzip_refl.S new file mode 100644 index 00000000..9241a72e --- /dev/null +++ b/crc/riscv64/crc32_gzip_refl.S @@ -0,0 +1,75 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc32_refl_common_clmul.h" + +/* uint32_t crc32_gzip_refl(uint32_t seed, uint8_t *buf, uint64_t len) */ +.text +.align 1 +.global crc32_gzip_refl +.type crc32_gzip_refl, %function +crc32_gzip_refl: + /* load precomputed constants */ + ld POLY, .poly_refl + ld MU, .mu + + /* invert and zero-extend seed */ + not SEED, SEED + slli SEED, SEED, 32 + srli SEED, SEED, 32 + + /* align buffer to 128-bits, then fold */ + crc32_refl_align + + crc_fold_loop 32 0 1 + crc32_refl_fold_reduction + + /* handle any excess */ + crc32_refl_excess + + /* sign-extend and reflect result */ + sext.w SEED, SEED + not SEED, SEED + ret + +/* precomputed constants */ +.poly_refl: + .dword 0x00000001db710641 +.mu: + .dword 0xb4e5b025f7011641 +.k1: + .dword 0x0000000154442bd4 +.k2: + .dword 0x00000001c6e41596 +.k3: + .dword 0x00000001751997d0 +.k4: + .dword 0x00000000ccaa009e +.k5: + .dword 0x0000000163cd6124 diff --git a/crc/riscv64/crc32_ieee.S b/crc/riscv64/crc32_ieee.S new file mode 100644 index 00000000..af6c2ef5 --- /dev/null +++ b/crc/riscv64/crc32_ieee.S @@ -0,0 +1,78 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc32_norm_common_clmul.h" + +/* uint32_t crc32_ieee(uint32_t init_crc, uint8_t *buf, uint64_t len) */ +.text +.align 1 +.global crc32_ieee +.type crc32_ieee, %function +crc32_ieee: + /* load precomputed constants */ + ld POLY, .poly + ld MU, .mu + + /* reflect and zero-extend seed (riscv calling convention has uint32_t + * passed in and returned sign-extended) + */ + not SEED, SEED + slli SEED, SEED, 32 + srli SEED, SEED, 32 + + /* align and fold buffer */ + crc32_norm_align + + crc_fold_loop 32 1 0 + crc32_norm_fold_reduction + + crc32_norm_excess + + /* sign-extend and reflect result */ + sext.w SEED, SEED + not SEED, SEED + ret + +/* precomputed constants */ +.poly: + .dword 0x0000000104c11db7 +.mu: + .dword 0x0000000104d101df +.k1: + .dword 0x000000008833794c +.k2: + .dword 0x00000000e6228b11 +.k3: + .dword 0x00000000c5b9cd4c +.k4: + .dword 0x00000000e8a45605 +.k5: + .dword 0x00000000f200aa66 +.k6: + .dword 0x00000000490d678d diff --git a/crc/riscv64/crc32_iscsi.S b/crc/riscv64/crc32_iscsi.S new file mode 100644 index 00000000..abcf14d5 --- /dev/null +++ b/crc/riscv64/crc32_iscsi.S @@ -0,0 +1,79 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc32_refl_common_clmul.h" + +/* uint32_t crc32_iscsi(uint8_t *buffer, int len, uint32_t init_crc) */ +.text +.align 1 +.global crc32_iscsi +.type crc32_iscsi, %function +crc32_iscsi: + /* switch around arguments to match common crc functions */ + mv t0, a2 + mv a2, a1 + mv a1, a0 + mv a0, t0 + + /* load precomputed constants */ + ld POLY, .poly_refl + ld MU, .mu + + /* zero-extend seed */ + slli SEED, SEED, 32 + srli SEED, SEED, 32 + + /* align buffer to 128-bits, then fold */ + crc32_refl_align + + crc_fold_loop 32 0 1 + crc32_refl_fold_reduction + + /* handle any remaining excess */ + crc32_refl_excess + + /* sign-extend result */ + sext.w SEED, SEED + ret + +/* precomputed constants */ +.poly_refl: + .dword 0x0000000105ec76f1 +.mu: + .dword 0x4869ec38dea713f1 +.k1: + .dword 0x00000000740eef02 +.k2: + .dword 0x000000009e4addf8 +.k3: + .dword 0x00000000f20c0dfe +.k4: + .dword 0x000000014cd00bd6 +.k5: + .dword 0x00000000dd45aab8 diff --git a/crc/riscv64/crc32_norm_common_clmul.h b/crc/riscv64/crc32_norm_common_clmul.h new file mode 100644 index 00000000..96dc1559 --- /dev/null +++ b/crc/riscv64/crc32_norm_common_clmul.h @@ -0,0 +1,198 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc_fold_common_clmul.h" + +/* folding final reduction */ +/* expects 128-bit value in HIGH:LOW (t0:t1), puts return value in SEED (a0) */ +/* trashes t2, t3, a5, a6 and t5, t6 */ +.macro crc32_norm_fold_reduction + /* precomputed constants */ + ld K5, .k5 + ld K6, .k6 + + /* fold remaining 128 bits into 96 */ + clmulh t2, K5, HIGH + clmul t3, K5, HIGH + srli a5, LOW, 32 + slli a6, LOW, 32 + xor HIGH, t2, a5 + xor LOW, t3, a6 + + /* fold remaining 96 bits into 64 */ + clmul t0, K6, t0 + xor t1, t1, t0 + + /* barrett's reduce the 64-bits */ + clmulh HIGH, LOW, MU + clmul HIGH, HIGH, POLY + xor SEED, HIGH, LOW + +.fold_1_done: +.endm + +/* barrett's reduction on a \bits bit-length value, returning result in seed */ +/* bits must be 32, 16 or 8 */ +/* expects SEED (a0), MU (a3) and POLY (a4) to hold corresponding values */ +/* value and seed must be zero-extended */ +/* trashes t0 and t1 */ +.macro crc32_norm_barrett_reduce value:req, bits:req + /* combine value with seed */ +.if (\bits < 32) + srli t0, SEED, (32 - \bits) + xor t0, t0, \value +.else + xor t0, SEED, \value +.endif + + slli t0, t0, 32 + clmulh t0, t0, MU + clmul t0, t0, POLY + + /* subtract seed from original for smaller sizes */ +.if (\bits < 32) + slli t1, SEED, \bits + xor t0, t0, t1 +.endif + + /* zero-extend 32-bit return value */ + slli t0, t0, 32 + srli SEED, t0, 32 +.endm + +/* align buffer to 64-bits, updating seed */ +/* expects SEED (a0), BUF (a1), LEN (a2), MU (a3), POLY (a4) to hold values */ +/* expects crc32_norm_excess to be called later */ +/* trashes t0 and t1 */ +.macro crc32_norm_align + /* is buffer already aligned? */ + and t0, BUF, 0b111 + beqz t0, .align_done + +.align_8: + /* is enough buffer left? */ + li t0, 1 + bltu LEN, t0, .excess_done + + /* is buffer misaligned by one byte? */ + andi t0, BUF, 0b001 + beqz t0, .align_16 + + /* perform barrett's reduction on one byte */ + lbu t1, (BUF) + crc32_norm_barrett_reduce t1, 8 + addi LEN, LEN, -1 + addi BUF, BUF, 1 + +.align_16: + li t0, 2 + bltu LEN, t0, .excess_8 + + andi t0, BUF, 0b010 + beqz t0, .align_32 + + /* byte reverse the next halfword */ + lhu t1, (BUF) + rev8 t1, t1 + srli t1, t1, 48 + + crc32_norm_barrett_reduce t1, 16 + addi LEN, LEN, -2 + addi BUF, BUF, 2 + +.align_32: + li t0, 4 + bltu LEN, t0, .excess_16 + + andi t0, BUF, 0b100 + beqz t0, .align_done + + /* byte reverse the next word */ + lwu t1, (BUF) + rev8 t1, t1 + srli t1, t1, 32 + + crc32_norm_barrett_reduce t1, 32 + addi LEN, LEN, -4 + addi BUF, BUF, 4 + +.align_done: +.endm + +/* barrett's reduce excess buffer left following fold */ +/* expects SEED (a0), BUF (a1), LEN (a2), MU (a3), POLY (a4) to hold values */ +/* expects less than 127 bits to be left in doubleword-aligned buffer */ +/* trashes t0, t1 and t3 */ +.macro crc32_norm_excess + /* is there any excess left? */ + beqz LEN, .excess_done + +.excess_64: + andi t0, LEN, 0b1000 + beqz t0, .excess_32 + /* read in 64-bits and perform two 32-bit reductions */ + ld t3, (BUF) + rev8 t3, t3 + srli t1, t3, 32 + crc32_norm_barrett_reduce t1, 32 + slli t3, t3, 32 + srli t1, t3, 32 + crc32_norm_barrett_reduce t1, 32 + addi BUF, BUF, 8 + +.excess_32: + andi t0, LEN, 0b0100 + beqz t0, .excess_16 + + lwu t1, (BUF) + rev8 t1, t1 + srli t1, t1, 32 + + crc32_norm_barrett_reduce t1, 32 + addi BUF, BUF, 4 + +.excess_16: + andi t0, LEN, 0b0010 + beqz t0, .excess_8 + + lhu t1, (BUF) + rev8 t1, t1 + srli t1, t1, 48 + + crc32_norm_barrett_reduce t1, 16 + addi BUF, BUF, 2 + +.excess_8: + andi t0, LEN, 0b0001 + beqz t0, .excess_done + lbu t1, (BUF) + crc32_norm_barrett_reduce t1, 8 + +.excess_done: +.endm diff --git a/crc/riscv64/crc32_refl_common_clmul.h b/crc/riscv64/crc32_refl_common_clmul.h new file mode 100644 index 00000000..70c1f647 --- /dev/null +++ b/crc/riscv64/crc32_refl_common_clmul.h @@ -0,0 +1,180 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc_fold_common_clmul.h" + +/* folding reflected final reduction */ +/* expects 128-bit value in HIGH:LOW (t0:t1), puts return value in SEED (a0) */ +/* trashes t2, t3, a5, a6 and t5, t6 */ +.macro crc32_refl_fold_reduction + /* load precalculated constants */ + ld K4, .k4 + ld K5, .k5 + + /* fold remaining 128 bits into 96 */ + clmul t3, K4, t0 + xor t1, t3, t1 + clmulh t0, K4, t0 + + /* high = (low >> 32) | (high << 32) */ + slli t0, t0, 32 + srli t3, t1, 32 + or t0, t0, t3 + + /* fold last 96 bits into 64 */ + slli t1, t1, 32 + srli t1, t1, 32 + clmul t1, K5, t1 + xor t1, t1, t0 + + /* barrett's reduce 64 bits */ + clmul t0, MU, t1 + slli t0, t0, 32 + srli t0, t0, 32 + clmul t0, POLY, t0 + xor t0, t1, t0 + srli SEED, t0, 32 + +.fold_1_done: +.endm + +/* barrett's reduction on a \bits bit-length value, returning result in seed */ +/* bits must be 64, 32, 16 or 8 */ +/* value and seed must be zero-extended */ +.macro barrett_reduce seed:req, value:req, bits:req + /* combine value with seed */ + xor t0, \seed, \value +.if (\bits < 64) + slli t0, t0, (64 - \bits) +.endif + + /* multiply by mu, which is 2^96 divided by our polynomial */ + clmul t0, t0, MU + +.if (\bits == 16) || (\bits == 8) + clmulh t0, t0, POLY + /* subtract from original for smaller sizes */ + srli t1, \seed, \bits + xor \seed, t0, t1 +.else + clmulh \seed, t0, POLY +.endif + +.endm + +/* align buffer to 64-bits updating seed */ +/* expects SEED (a0), BUF (a1), LEN (a2), MU (a3), POLY (a4) to hold values */ +/* expects crc32_refl_excess to be called later */ +/* trashes t0 and t1 */ +.macro crc32_refl_align + /* is buffer already aligned to 128-bits? */ + andi t0, BUF, 0b111 + beqz t0, .align_done + +.align_8: + /* is enough buffer left? */ + li t0, 1 + bltu LEN, t0, .excess_done + + /* is buffer misaligned by one byte? */ + andi t0, BUF, 0b001 + beqz t0, .align_16 + + /* perform barrett's reduction on one byte */ + lbu t1, (BUF) + barrett_reduce SEED, t1, 8 + addi LEN, LEN, -1 + addi BUF, BUF, 1 + +.align_16: + li t0, 2 + bltu LEN, t0, .excess_8 + + andi t0, BUF, 0b010 + beqz t0, .align_32 + + lhu t1, (BUF) + barrett_reduce SEED, t1, 16 + addi LEN, LEN, -2 + addi BUF, BUF, 2 + +.align_32: + li t0, 4 + bltu LEN, t0, .excess_16 + + andi t0, BUF, 0b100 + beqz t0, .align_done + + lwu t1, (BUF) + barrett_reduce SEED, t1, 32 + addi LEN, LEN, -4 + addi BUF, BUF, 4 + +.align_done: +.endm + +/* barrett's reduce excess buffer left following fold */ +/* expects SEED (a0), BUF (a1), LEN (a2), MU (a3), POLY (a4) to hold values */ +/* expects less than 127 bits to be left in doubleword-aligned buffer */ +/* trashes t0, t1 and t3 */ +.macro crc32_refl_excess + /* do we have any excess left? */ + beqz LEN, .excess_done + + /* barret's reduce the remaining excess */ + /* at most there is 127 bytes left */ +.excess_64: + andi t0, LEN, 0b1000 + beqz t0, .excess_32 + ld t1, (BUF) + barrett_reduce SEED, t1, 64 + addi BUF, BUF, 8 + +.excess_32: + andi t0, LEN, 0b0100 + beqz t0, .excess_16 + lwu t1, (BUF) + barrett_reduce SEED, t1, 32 + addi BUF, BUF, 4 + +.excess_16: + andi t0, LEN, 0b0010 + beqz t0, .excess_8 + lhu t1, (BUF) + barrett_reduce SEED, t1, 16 + addi BUF, BUF, 2 + +.excess_8: + andi t0, LEN, 0b0001 + beqz t0, .excess_done + lbu t1, (BUF) + barrett_reduce SEED, t1, 8 + +.excess_done: +.endm diff --git a/crc/riscv64/crc64_ecma_norm.S b/crc/riscv64/crc64_ecma_norm.S new file mode 100644 index 00000000..e7e0554f --- /dev/null +++ b/crc/riscv64/crc64_ecma_norm.S @@ -0,0 +1,179 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc64_norm_common_clmul.h" + +/* uint64_t crc64_ecma_norm(uint64_t init_crc, uint8_t *buf, uint64_t len) */ +crc64_func_norm crc64_ecma_norm + +/* precomputed folding constants */ +.poly: + .dword 0x42f0e1eba9ea3693 /* excludes leading 1 */ +.mu: + .dword 0x578d29d06cc4f872 /* excludes leading 1 */ +.k1: + .dword 0xddf4b6981205b83f +.k2: + .dword 0x5f6843ca540df020 +.k3: + .dword 0x4eb938a7d257740e +.k4: +.k5: + .dword 0x05f5c3c7eb52fab6 + +/* lookup table */ +.crc64_table: + .dword 0x0000000000000000, 0x42f0e1eba9ea3693 + .dword 0x85e1c3d753d46d26, 0xc711223cfa3e5bb5 + .dword 0x493366450e42ecdf, 0x0bc387aea7a8da4c + .dword 0xccd2a5925d9681f9, 0x8e224479f47cb76a + .dword 0x9266cc8a1c85d9be, 0xd0962d61b56fef2d + .dword 0x17870f5d4f51b498, 0x5577eeb6e6bb820b + .dword 0xdb55aacf12c73561, 0x99a54b24bb2d03f2 + .dword 0x5eb4691841135847, 0x1c4488f3e8f96ed4 + .dword 0x663d78ff90e185ef, 0x24cd9914390bb37c + .dword 0xe3dcbb28c335e8c9, 0xa12c5ac36adfde5a + .dword 0x2f0e1eba9ea36930, 0x6dfeff5137495fa3 + .dword 0xaaefdd6dcd770416, 0xe81f3c86649d3285 + .dword 0xf45bb4758c645c51, 0xb6ab559e258e6ac2 + .dword 0x71ba77a2dfb03177, 0x334a9649765a07e4 + .dword 0xbd68d2308226b08e, 0xff9833db2bcc861d + .dword 0x388911e7d1f2dda8, 0x7a79f00c7818eb3b + .dword 0xcc7af1ff21c30bde, 0x8e8a101488293d4d + .dword 0x499b3228721766f8, 0x0b6bd3c3dbfd506b + .dword 0x854997ba2f81e701, 0xc7b97651866bd192 + .dword 0x00a8546d7c558a27, 0x4258b586d5bfbcb4 + .dword 0x5e1c3d753d46d260, 0x1cecdc9e94ace4f3 + .dword 0xdbfdfea26e92bf46, 0x990d1f49c77889d5 + .dword 0x172f5b3033043ebf, 0x55dfbadb9aee082c + .dword 0x92ce98e760d05399, 0xd03e790cc93a650a + .dword 0xaa478900b1228e31, 0xe8b768eb18c8b8a2 + .dword 0x2fa64ad7e2f6e317, 0x6d56ab3c4b1cd584 + .dword 0xe374ef45bf6062ee, 0xa1840eae168a547d + .dword 0x66952c92ecb40fc8, 0x2465cd79455e395b + .dword 0x3821458aada7578f, 0x7ad1a461044d611c + .dword 0xbdc0865dfe733aa9, 0xff3067b657990c3a + .dword 0x711223cfa3e5bb50, 0x33e2c2240a0f8dc3 + .dword 0xf4f3e018f031d676, 0xb60301f359dbe0e5 + .dword 0xda050215ea6c212f, 0x98f5e3fe438617bc + .dword 0x5fe4c1c2b9b84c09, 0x1d14202910527a9a + .dword 0x93366450e42ecdf0, 0xd1c685bb4dc4fb63 + .dword 0x16d7a787b7faa0d6, 0x5427466c1e109645 + .dword 0x4863ce9ff6e9f891, 0x0a932f745f03ce02 + .dword 0xcd820d48a53d95b7, 0x8f72eca30cd7a324 + .dword 0x0150a8daf8ab144e, 0x43a04931514122dd + .dword 0x84b16b0dab7f7968, 0xc6418ae602954ffb + .dword 0xbc387aea7a8da4c0, 0xfec89b01d3679253 + .dword 0x39d9b93d2959c9e6, 0x7b2958d680b3ff75 + .dword 0xf50b1caf74cf481f, 0xb7fbfd44dd257e8c + .dword 0x70eadf78271b2539, 0x321a3e938ef113aa + .dword 0x2e5eb66066087d7e, 0x6cae578bcfe24bed + .dword 0xabbf75b735dc1058, 0xe94f945c9c3626cb + .dword 0x676dd025684a91a1, 0x259d31cec1a0a732 + .dword 0xe28c13f23b9efc87, 0xa07cf2199274ca14 + .dword 0x167ff3eacbaf2af1, 0x548f120162451c62 + .dword 0x939e303d987b47d7, 0xd16ed1d631917144 + .dword 0x5f4c95afc5edc62e, 0x1dbc74446c07f0bd + .dword 0xdaad56789639ab08, 0x985db7933fd39d9b + .dword 0x84193f60d72af34f, 0xc6e9de8b7ec0c5dc + .dword 0x01f8fcb784fe9e69, 0x43081d5c2d14a8fa + .dword 0xcd2a5925d9681f90, 0x8fdab8ce70822903 + .dword 0x48cb9af28abc72b6, 0x0a3b7b1923564425 + .dword 0x70428b155b4eaf1e, 0x32b26afef2a4998d + .dword 0xf5a348c2089ac238, 0xb753a929a170f4ab + .dword 0x3971ed50550c43c1, 0x7b810cbbfce67552 + .dword 0xbc902e8706d82ee7, 0xfe60cf6caf321874 + .dword 0xe224479f47cb76a0, 0xa0d4a674ee214033 + .dword 0x67c58448141f1b86, 0x253565a3bdf52d15 + .dword 0xab1721da49899a7f, 0xe9e7c031e063acec + .dword 0x2ef6e20d1a5df759, 0x6c0603e6b3b7c1ca + .dword 0xf6fae5c07d3274cd, 0xb40a042bd4d8425e + .dword 0x731b26172ee619eb, 0x31ebc7fc870c2f78 + .dword 0xbfc9838573709812, 0xfd39626eda9aae81 + .dword 0x3a28405220a4f534, 0x78d8a1b9894ec3a7 + .dword 0x649c294a61b7ad73, 0x266cc8a1c85d9be0 + .dword 0xe17dea9d3263c055, 0xa38d0b769b89f6c6 + .dword 0x2daf4f0f6ff541ac, 0x6f5faee4c61f773f + .dword 0xa84e8cd83c212c8a, 0xeabe6d3395cb1a19 + .dword 0x90c79d3fedd3f122, 0xd2377cd44439c7b1 + .dword 0x15265ee8be079c04, 0x57d6bf0317edaa97 + .dword 0xd9f4fb7ae3911dfd, 0x9b041a914a7b2b6e + .dword 0x5c1538adb04570db, 0x1ee5d94619af4648 + .dword 0x02a151b5f156289c, 0x4051b05e58bc1e0f + .dword 0x87409262a28245ba, 0xc5b073890b687329 + .dword 0x4b9237f0ff14c443, 0x0962d61b56fef2d0 + .dword 0xce73f427acc0a965, 0x8c8315cc052a9ff6 + .dword 0x3a80143f5cf17f13, 0x7870f5d4f51b4980 + .dword 0xbf61d7e80f251235, 0xfd913603a6cf24a6 + .dword 0x73b3727a52b393cc, 0x31439391fb59a55f + .dword 0xf652b1ad0167feea, 0xb4a25046a88dc879 + .dword 0xa8e6d8b54074a6ad, 0xea16395ee99e903e + .dword 0x2d071b6213a0cb8b, 0x6ff7fa89ba4afd18 + .dword 0xe1d5bef04e364a72, 0xa3255f1be7dc7ce1 + .dword 0x64347d271de22754, 0x26c49cccb40811c7 + .dword 0x5cbd6cc0cc10fafc, 0x1e4d8d2b65facc6f + .dword 0xd95caf179fc497da, 0x9bac4efc362ea149 + .dword 0x158e0a85c2521623, 0x577eeb6e6bb820b0 + .dword 0x906fc95291867b05, 0xd29f28b9386c4d96 + .dword 0xcedba04ad0952342, 0x8c2b41a1797f15d1 + .dword 0x4b3a639d83414e64, 0x09ca82762aab78f7 + .dword 0x87e8c60fded7cf9d, 0xc51827e4773df90e + .dword 0x020905d88d03a2bb, 0x40f9e43324e99428 + .dword 0x2cffe7d5975e55e2, 0x6e0f063e3eb46371 + .dword 0xa91e2402c48a38c4, 0xebeec5e96d600e57 + .dword 0x65cc8190991cb93d, 0x273c607b30f68fae + .dword 0xe02d4247cac8d41b, 0xa2dda3ac6322e288 + .dword 0xbe992b5f8bdb8c5c, 0xfc69cab42231bacf + .dword 0x3b78e888d80fe17a, 0x7988096371e5d7e9 + .dword 0xf7aa4d1a85996083, 0xb55aacf12c735610 + .dword 0x724b8ecdd64d0da5, 0x30bb6f267fa73b36 + .dword 0x4ac29f2a07bfd00d, 0x08327ec1ae55e69e + .dword 0xcf235cfd546bbd2b, 0x8dd3bd16fd818bb8 + .dword 0x03f1f96f09fd3cd2, 0x41011884a0170a41 + .dword 0x86103ab85a2951f4, 0xc4e0db53f3c36767 + .dword 0xd8a453a01b3a09b3, 0x9a54b24bb2d03f20 + .dword 0x5d45907748ee6495, 0x1fb5719ce1045206 + .dword 0x919735e51578e56c, 0xd367d40ebc92d3ff + .dword 0x1476f63246ac884a, 0x568617d9ef46bed9 + .dword 0xe085162ab69d5e3c, 0xa275f7c11f7768af + .dword 0x6564d5fde549331a, 0x279434164ca30589 + .dword 0xa9b6706fb8dfb2e3, 0xeb46918411358470 + .dword 0x2c57b3b8eb0bdfc5, 0x6ea7525342e1e956 + .dword 0x72e3daa0aa188782, 0x30133b4b03f2b111 + .dword 0xf7021977f9cceaa4, 0xb5f2f89c5026dc37 + .dword 0x3bd0bce5a45a6b5d, 0x79205d0e0db05dce + .dword 0xbe317f32f78e067b, 0xfcc19ed95e6430e8 + .dword 0x86b86ed5267cdbd3, 0xc4488f3e8f96ed40 + .dword 0x0359ad0275a8b6f5, 0x41a94ce9dc428066 + .dword 0xcf8b0890283e370c, 0x8d7be97b81d4019f + .dword 0x4a6acb477bea5a2a, 0x089a2aacd2006cb9 + .dword 0x14dea25f3af9026d, 0x562e43b4931334fe + .dword 0x913f6188692d6f4b, 0xd3cf8063c0c759d8 + .dword 0x5dedc41a34bbeeb2, 0x1f1d25f19d51d821 + .dword 0xd80c07cd676f8394, 0x9afce626ce85b507 diff --git a/crc/riscv64/crc64_ecma_refl.S b/crc/riscv64/crc64_ecma_refl.S new file mode 100644 index 00000000..4efa22f7 --- /dev/null +++ b/crc/riscv64/crc64_ecma_refl.S @@ -0,0 +1,179 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc64_refl_common_clmul.h" + +/* uint64_t crc64_ecma_refl(uint64_t init_crc, uint8_t *buf, unt64_t len) */ +crc64_func_refl crc64_ecma_refl + +/* precomputed folding constants */ +.poly: + .dword 0x92d8af2baf0e1e85 /* poly reflected, excluding leading 1 */ +.mu: + .dword 0x9c3e466c172963d5 +.k1: + .dword 0x6ae3efbb9dd441f3 +.k2: + .dword 0x081f6054a7842df4 +.k3: + .dword 0xe05dd497ca393ae4 +.k4: +.k5: + .dword 0xdabe95afc7875f40 + +/* lookup table */ +.crc64_table: + .dword 0x0000000000000000, 0xb32e4cbe03a75f6f + .dword 0xf4843657a840a05b, 0x47aa7ae9abe7ff34 + .dword 0x7bd0c384ff8f5e33, 0xc8fe8f3afc28015c + .dword 0x8f54f5d357cffe68, 0x3c7ab96d5468a107 + .dword 0xf7a18709ff1ebc66, 0x448fcbb7fcb9e309 + .dword 0x0325b15e575e1c3d, 0xb00bfde054f94352 + .dword 0x8c71448d0091e255, 0x3f5f08330336bd3a + .dword 0x78f572daa8d1420e, 0xcbdb3e64ab761d61 + .dword 0x7d9ba13851336649, 0xceb5ed8652943926 + .dword 0x891f976ff973c612, 0x3a31dbd1fad4997d + .dword 0x064b62bcaebc387a, 0xb5652e02ad1b6715 + .dword 0xf2cf54eb06fc9821, 0x41e11855055bc74e + .dword 0x8a3a2631ae2dda2f, 0x39146a8fad8a8540 + .dword 0x7ebe1066066d7a74, 0xcd905cd805ca251b + .dword 0xf1eae5b551a2841c, 0x42c4a90b5205db73 + .dword 0x056ed3e2f9e22447, 0xb6409f5cfa457b28 + .dword 0xfb374270a266cc92, 0x48190ecea1c193fd + .dword 0x0fb374270a266cc9, 0xbc9d3899098133a6 + .dword 0x80e781f45de992a1, 0x33c9cd4a5e4ecdce + .dword 0x7463b7a3f5a932fa, 0xc74dfb1df60e6d95 + .dword 0x0c96c5795d7870f4, 0xbfb889c75edf2f9b + .dword 0xf812f32ef538d0af, 0x4b3cbf90f69f8fc0 + .dword 0x774606fda2f72ec7, 0xc4684a43a15071a8 + .dword 0x83c230aa0ab78e9c, 0x30ec7c140910d1f3 + .dword 0x86ace348f355aadb, 0x3582aff6f0f2f5b4 + .dword 0x7228d51f5b150a80, 0xc10699a158b255ef + .dword 0xfd7c20cc0cdaf4e8, 0x4e526c720f7dab87 + .dword 0x09f8169ba49a54b3, 0xbad65a25a73d0bdc + .dword 0x710d64410c4b16bd, 0xc22328ff0fec49d2 + .dword 0x85895216a40bb6e6, 0x36a71ea8a7ace989 + .dword 0x0adda7c5f3c4488e, 0xb9f3eb7bf06317e1 + .dword 0xfe5991925b84e8d5, 0x4d77dd2c5823b7ba + .dword 0x64b62bcaebc387a1, 0xd7986774e864d8ce + .dword 0x90321d9d438327fa, 0x231c512340247895 + .dword 0x1f66e84e144cd992, 0xac48a4f017eb86fd + .dword 0xebe2de19bc0c79c9, 0x58cc92a7bfab26a6 + .dword 0x9317acc314dd3bc7, 0x2039e07d177a64a8 + .dword 0x67939a94bc9d9b9c, 0xd4bdd62abf3ac4f3 + .dword 0xe8c76f47eb5265f4, 0x5be923f9e8f53a9b + .dword 0x1c4359104312c5af, 0xaf6d15ae40b59ac0 + .dword 0x192d8af2baf0e1e8, 0xaa03c64cb957be87 + .dword 0xeda9bca512b041b3, 0x5e87f01b11171edc + .dword 0x62fd4976457fbfdb, 0xd1d305c846d8e0b4 + .dword 0x96797f21ed3f1f80, 0x2557339fee9840ef + .dword 0xee8c0dfb45ee5d8e, 0x5da24145464902e1 + .dword 0x1a083bacedaefdd5, 0xa9267712ee09a2ba + .dword 0x955cce7fba6103bd, 0x267282c1b9c65cd2 + .dword 0x61d8f8281221a3e6, 0xd2f6b4961186fc89 + .dword 0x9f8169ba49a54b33, 0x2caf25044a02145c + .dword 0x6b055fede1e5eb68, 0xd82b1353e242b407 + .dword 0xe451aa3eb62a1500, 0x577fe680b58d4a6f + .dword 0x10d59c691e6ab55b, 0xa3fbd0d71dcdea34 + .dword 0x6820eeb3b6bbf755, 0xdb0ea20db51ca83a + .dword 0x9ca4d8e41efb570e, 0x2f8a945a1d5c0861 + .dword 0x13f02d374934a966, 0xa0de61894a93f609 + .dword 0xe7741b60e174093d, 0x545a57dee2d35652 + .dword 0xe21ac88218962d7a, 0x5134843c1b317215 + .dword 0x169efed5b0d68d21, 0xa5b0b26bb371d24e + .dword 0x99ca0b06e7197349, 0x2ae447b8e4be2c26 + .dword 0x6d4e3d514f59d312, 0xde6071ef4cfe8c7d + .dword 0x15bb4f8be788911c, 0xa6950335e42fce73 + .dword 0xe13f79dc4fc83147, 0x521135624c6f6e28 + .dword 0x6e6b8c0f1807cf2f, 0xdd45c0b11ba09040 + .dword 0x9aefba58b0476f74, 0x29c1f6e6b3e0301b + .dword 0xc96c5795d7870f42, 0x7a421b2bd420502d + .dword 0x3de861c27fc7af19, 0x8ec62d7c7c60f076 + .dword 0xb2bc941128085171, 0x0192d8af2baf0e1e + .dword 0x4638a2468048f12a, 0xf516eef883efae45 + .dword 0x3ecdd09c2899b324, 0x8de39c222b3eec4b + .dword 0xca49e6cb80d9137f, 0x7967aa75837e4c10 + .dword 0x451d1318d716ed17, 0xf6335fa6d4b1b278 + .dword 0xb199254f7f564d4c, 0x02b769f17cf11223 + .dword 0xb4f7f6ad86b4690b, 0x07d9ba1385133664 + .dword 0x4073c0fa2ef4c950, 0xf35d8c442d53963f + .dword 0xcf273529793b3738, 0x7c0979977a9c6857 + .dword 0x3ba3037ed17b9763, 0x888d4fc0d2dcc80c + .dword 0x435671a479aad56d, 0xf0783d1a7a0d8a02 + .dword 0xb7d247f3d1ea7536, 0x04fc0b4dd24d2a59 + .dword 0x3886b22086258b5e, 0x8ba8fe9e8582d431 + .dword 0xcc0284772e652b05, 0x7f2cc8c92dc2746a + .dword 0x325b15e575e1c3d0, 0x8175595b76469cbf + .dword 0xc6df23b2dda1638b, 0x75f16f0cde063ce4 + .dword 0x498bd6618a6e9de3, 0xfaa59adf89c9c28c + .dword 0xbd0fe036222e3db8, 0x0e21ac88218962d7 + .dword 0xc5fa92ec8aff7fb6, 0x76d4de52895820d9 + .dword 0x317ea4bb22bfdfed, 0x8250e80521188082 + .dword 0xbe2a516875702185, 0x0d041dd676d77eea + .dword 0x4aae673fdd3081de, 0xf9802b81de97deb1 + .dword 0x4fc0b4dd24d2a599, 0xfceef8632775faf6 + .dword 0xbb44828a8c9205c2, 0x086ace348f355aad + .dword 0x34107759db5dfbaa, 0x873e3be7d8faa4c5 + .dword 0xc094410e731d5bf1, 0x73ba0db070ba049e + .dword 0xb86133d4dbcc19ff, 0x0b4f7f6ad86b4690 + .dword 0x4ce50583738cb9a4, 0xffcb493d702be6cb + .dword 0xc3b1f050244347cc, 0x709fbcee27e418a3 + .dword 0x3735c6078c03e797, 0x841b8ab98fa4b8f8 + .dword 0xadda7c5f3c4488e3, 0x1ef430e13fe3d78c + .dword 0x595e4a08940428b8, 0xea7006b697a377d7 + .dword 0xd60abfdbc3cbd6d0, 0x6524f365c06c89bf + .dword 0x228e898c6b8b768b, 0x91a0c532682c29e4 + .dword 0x5a7bfb56c35a3485, 0xe955b7e8c0fd6bea + .dword 0xaeffcd016b1a94de, 0x1dd181bf68bdcbb1 + .dword 0x21ab38d23cd56ab6, 0x9285746c3f7235d9 + .dword 0xd52f0e859495caed, 0x6601423b97329582 + .dword 0xd041dd676d77eeaa, 0x636f91d96ed0b1c5 + .dword 0x24c5eb30c5374ef1, 0x97eba78ec690119e + .dword 0xab911ee392f8b099, 0x18bf525d915feff6 + .dword 0x5f1528b43ab810c2, 0xec3b640a391f4fad + .dword 0x27e05a6e926952cc, 0x94ce16d091ce0da3 + .dword 0xd3646c393a29f297, 0x604a2087398eadf8 + .dword 0x5c3099ea6de60cff, 0xef1ed5546e415390 + .dword 0xa8b4afbdc5a6aca4, 0x1b9ae303c601f3cb + .dword 0x56ed3e2f9e224471, 0xe5c372919d851b1e + .dword 0xa26908783662e42a, 0x114744c635c5bb45 + .dword 0x2d3dfdab61ad1a42, 0x9e13b115620a452d + .dword 0xd9b9cbfcc9edba19, 0x6a978742ca4ae576 + .dword 0xa14cb926613cf817, 0x1262f598629ba778 + .dword 0x55c88f71c97c584c, 0xe6e6c3cfcadb0723 + .dword 0xda9c7aa29eb3a624, 0x69b2361c9d14f94b + .dword 0x2e184cf536f3067f, 0x9d36004b35545910 + .dword 0x2b769f17cf112238, 0x9858d3a9ccb67d57 + .dword 0xdff2a94067518263, 0x6cdce5fe64f6dd0c + .dword 0x50a65c93309e7c0b, 0xe388102d33392364 + .dword 0xa4226ac498dedc50, 0x170c267a9b79833f + .dword 0xdcd7181e300f9e5e, 0x6ff954a033a8c131 + .dword 0x28532e49984f3e05, 0x9b7d62f79be8616a + .dword 0xa707db9acf80c06d, 0x14299724cc279f02 + .dword 0x5383edcd67c06036, 0xe0ada17364673f59 diff --git a/crc/riscv64/crc64_iso_norm.S b/crc/riscv64/crc64_iso_norm.S new file mode 100644 index 00000000..435851ce --- /dev/null +++ b/crc/riscv64/crc64_iso_norm.S @@ -0,0 +1,178 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc64_norm_common_clmul.h" + +/* uint64_t crc64_iso_norm(uint64_t init_crc, uint8_t *buf, uint64_t len) */ +crc64_func_norm crc64_iso_norm + +/* precomputed folding constants */ +.poly: +.mu: + .dword 0x000000000000001b /* excludes leading 1 */ +.k1: + .dword 0x0000001b1b001b1b +.k2: + .dword 0x0000000101000101 +.k3: + .dword 0x0000000000001db7 +.k4: +.k5: + .dword 0x0000000000000145 + +/* lookup table */ +.crc64_table: + .dword 0x0000000000000000, 0x000000000000001b + .dword 0x0000000000000036, 0x000000000000002d + .dword 0x000000000000006c, 0x0000000000000077 + .dword 0x000000000000005a, 0x0000000000000041 + .dword 0x00000000000000d8, 0x00000000000000c3 + .dword 0x00000000000000ee, 0x00000000000000f5 + .dword 0x00000000000000b4, 0x00000000000000af + .dword 0x0000000000000082, 0x0000000000000099 + .dword 0x00000000000001b0, 0x00000000000001ab + .dword 0x0000000000000186, 0x000000000000019d + .dword 0x00000000000001dc, 0x00000000000001c7 + .dword 0x00000000000001ea, 0x00000000000001f1 + .dword 0x0000000000000168, 0x0000000000000173 + .dword 0x000000000000015e, 0x0000000000000145 + .dword 0x0000000000000104, 0x000000000000011f + .dword 0x0000000000000132, 0x0000000000000129 + .dword 0x0000000000000360, 0x000000000000037b + .dword 0x0000000000000356, 0x000000000000034d + .dword 0x000000000000030c, 0x0000000000000317 + .dword 0x000000000000033a, 0x0000000000000321 + .dword 0x00000000000003b8, 0x00000000000003a3 + .dword 0x000000000000038e, 0x0000000000000395 + .dword 0x00000000000003d4, 0x00000000000003cf + .dword 0x00000000000003e2, 0x00000000000003f9 + .dword 0x00000000000002d0, 0x00000000000002cb + .dword 0x00000000000002e6, 0x00000000000002fd + .dword 0x00000000000002bc, 0x00000000000002a7 + .dword 0x000000000000028a, 0x0000000000000291 + .dword 0x0000000000000208, 0x0000000000000213 + .dword 0x000000000000023e, 0x0000000000000225 + .dword 0x0000000000000264, 0x000000000000027f + .dword 0x0000000000000252, 0x0000000000000249 + .dword 0x00000000000006c0, 0x00000000000006db + .dword 0x00000000000006f6, 0x00000000000006ed + .dword 0x00000000000006ac, 0x00000000000006b7 + .dword 0x000000000000069a, 0x0000000000000681 + .dword 0x0000000000000618, 0x0000000000000603 + .dword 0x000000000000062e, 0x0000000000000635 + .dword 0x0000000000000674, 0x000000000000066f + .dword 0x0000000000000642, 0x0000000000000659 + .dword 0x0000000000000770, 0x000000000000076b + .dword 0x0000000000000746, 0x000000000000075d + .dword 0x000000000000071c, 0x0000000000000707 + .dword 0x000000000000072a, 0x0000000000000731 + .dword 0x00000000000007a8, 0x00000000000007b3 + .dword 0x000000000000079e, 0x0000000000000785 + .dword 0x00000000000007c4, 0x00000000000007df + .dword 0x00000000000007f2, 0x00000000000007e9 + .dword 0x00000000000005a0, 0x00000000000005bb + .dword 0x0000000000000596, 0x000000000000058d + .dword 0x00000000000005cc, 0x00000000000005d7 + .dword 0x00000000000005fa, 0x00000000000005e1 + .dword 0x0000000000000578, 0x0000000000000563 + .dword 0x000000000000054e, 0x0000000000000555 + .dword 0x0000000000000514, 0x000000000000050f + .dword 0x0000000000000522, 0x0000000000000539 + .dword 0x0000000000000410, 0x000000000000040b + .dword 0x0000000000000426, 0x000000000000043d + .dword 0x000000000000047c, 0x0000000000000467 + .dword 0x000000000000044a, 0x0000000000000451 + .dword 0x00000000000004c8, 0x00000000000004d3 + .dword 0x00000000000004fe, 0x00000000000004e5 + .dword 0x00000000000004a4, 0x00000000000004bf + .dword 0x0000000000000492, 0x0000000000000489 + .dword 0x0000000000000d80, 0x0000000000000d9b + .dword 0x0000000000000db6, 0x0000000000000dad + .dword 0x0000000000000dec, 0x0000000000000df7 + .dword 0x0000000000000dda, 0x0000000000000dc1 + .dword 0x0000000000000d58, 0x0000000000000d43 + .dword 0x0000000000000d6e, 0x0000000000000d75 + .dword 0x0000000000000d34, 0x0000000000000d2f + .dword 0x0000000000000d02, 0x0000000000000d19 + .dword 0x0000000000000c30, 0x0000000000000c2b + .dword 0x0000000000000c06, 0x0000000000000c1d + .dword 0x0000000000000c5c, 0x0000000000000c47 + .dword 0x0000000000000c6a, 0x0000000000000c71 + .dword 0x0000000000000ce8, 0x0000000000000cf3 + .dword 0x0000000000000cde, 0x0000000000000cc5 + .dword 0x0000000000000c84, 0x0000000000000c9f + .dword 0x0000000000000cb2, 0x0000000000000ca9 + .dword 0x0000000000000ee0, 0x0000000000000efb + .dword 0x0000000000000ed6, 0x0000000000000ecd + .dword 0x0000000000000e8c, 0x0000000000000e97 + .dword 0x0000000000000eba, 0x0000000000000ea1 + .dword 0x0000000000000e38, 0x0000000000000e23 + .dword 0x0000000000000e0e, 0x0000000000000e15 + .dword 0x0000000000000e54, 0x0000000000000e4f + .dword 0x0000000000000e62, 0x0000000000000e79 + .dword 0x0000000000000f50, 0x0000000000000f4b + .dword 0x0000000000000f66, 0x0000000000000f7d + .dword 0x0000000000000f3c, 0x0000000000000f27 + .dword 0x0000000000000f0a, 0x0000000000000f11 + .dword 0x0000000000000f88, 0x0000000000000f93 + .dword 0x0000000000000fbe, 0x0000000000000fa5 + .dword 0x0000000000000fe4, 0x0000000000000fff + .dword 0x0000000000000fd2, 0x0000000000000fc9 + .dword 0x0000000000000b40, 0x0000000000000b5b + .dword 0x0000000000000b76, 0x0000000000000b6d + .dword 0x0000000000000b2c, 0x0000000000000b37 + .dword 0x0000000000000b1a, 0x0000000000000b01 + .dword 0x0000000000000b98, 0x0000000000000b83 + .dword 0x0000000000000bae, 0x0000000000000bb5 + .dword 0x0000000000000bf4, 0x0000000000000bef + .dword 0x0000000000000bc2, 0x0000000000000bd9 + .dword 0x0000000000000af0, 0x0000000000000aeb + .dword 0x0000000000000ac6, 0x0000000000000add + .dword 0x0000000000000a9c, 0x0000000000000a87 + .dword 0x0000000000000aaa, 0x0000000000000ab1 + .dword 0x0000000000000a28, 0x0000000000000a33 + .dword 0x0000000000000a1e, 0x0000000000000a05 + .dword 0x0000000000000a44, 0x0000000000000a5f + .dword 0x0000000000000a72, 0x0000000000000a69 + .dword 0x0000000000000820, 0x000000000000083b + .dword 0x0000000000000816, 0x000000000000080d + .dword 0x000000000000084c, 0x0000000000000857 + .dword 0x000000000000087a, 0x0000000000000861 + .dword 0x00000000000008f8, 0x00000000000008e3 + .dword 0x00000000000008ce, 0x00000000000008d5 + .dword 0x0000000000000894, 0x000000000000088f + .dword 0x00000000000008a2, 0x00000000000008b9 + .dword 0x0000000000000990, 0x000000000000098b + .dword 0x00000000000009a6, 0x00000000000009bd + .dword 0x00000000000009fc, 0x00000000000009e7 + .dword 0x00000000000009ca, 0x00000000000009d1 + .dword 0x0000000000000948, 0x0000000000000953 + .dword 0x000000000000097e, 0x0000000000000965 + .dword 0x0000000000000924, 0x000000000000093f + .dword 0x0000000000000912, 0x0000000000000909 diff --git a/crc/riscv64/crc64_iso_refl.S b/crc/riscv64/crc64_iso_refl.S new file mode 100644 index 00000000..3f88dfc2 --- /dev/null +++ b/crc/riscv64/crc64_iso_refl.S @@ -0,0 +1,179 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc64_refl_common_clmul.h" + +/* uint64_t crc64_iso_refl(uint64_t init_crc, uint8_t *buf, unt64_t len) */ +crc64_func_refl crc64_iso_refl + +/* precomputed folding constants */ +.poly: + .dword 0xb000000000000001 /* poly reflected, excluding leading 1 */ +.mu: + .dword 0xb000000000000001 +.k1: + .dword 0x01b001b1b0000001 +.k2: + .dword 0xb100010100000001 +.k3: + .dword 0x6b70000000000001 +.k4: +.k5: + .dword 0xf500000000000001 + +/* lookup table */ +.crc64_table: + .dword 0x0000000000000000, 0x01b0000000000000 + .dword 0x0360000000000000, 0x02d0000000000000 + .dword 0x06c0000000000000, 0x0770000000000000 + .dword 0x05a0000000000000, 0x0410000000000000 + .dword 0x0d80000000000000, 0x0c30000000000000 + .dword 0x0ee0000000000000, 0x0f50000000000000 + .dword 0x0b40000000000000, 0x0af0000000000000 + .dword 0x0820000000000000, 0x0990000000000000 + .dword 0x1b00000000000000, 0x1ab0000000000000 + .dword 0x1860000000000000, 0x19d0000000000000 + .dword 0x1dc0000000000000, 0x1c70000000000000 + .dword 0x1ea0000000000000, 0x1f10000000000000 + .dword 0x1680000000000000, 0x1730000000000000 + .dword 0x15e0000000000000, 0x1450000000000000 + .dword 0x1040000000000000, 0x11f0000000000000 + .dword 0x1320000000000000, 0x1290000000000000 + .dword 0x3600000000000000, 0x37b0000000000000 + .dword 0x3560000000000000, 0x34d0000000000000 + .dword 0x30c0000000000000, 0x3170000000000000 + .dword 0x33a0000000000000, 0x3210000000000000 + .dword 0x3b80000000000000, 0x3a30000000000000 + .dword 0x38e0000000000000, 0x3950000000000000 + .dword 0x3d40000000000000, 0x3cf0000000000000 + .dword 0x3e20000000000000, 0x3f90000000000000 + .dword 0x2d00000000000000, 0x2cb0000000000000 + .dword 0x2e60000000000000, 0x2fd0000000000000 + .dword 0x2bc0000000000000, 0x2a70000000000000 + .dword 0x28a0000000000000, 0x2910000000000000 + .dword 0x2080000000000000, 0x2130000000000000 + .dword 0x23e0000000000000, 0x2250000000000000 + .dword 0x2640000000000000, 0x27f0000000000000 + .dword 0x2520000000000000, 0x2490000000000000 + .dword 0x6c00000000000000, 0x6db0000000000000 + .dword 0x6f60000000000000, 0x6ed0000000000000 + .dword 0x6ac0000000000000, 0x6b70000000000000 + .dword 0x69a0000000000000, 0x6810000000000000 + .dword 0x6180000000000000, 0x6030000000000000 + .dword 0x62e0000000000000, 0x6350000000000000 + .dword 0x6740000000000000, 0x66f0000000000000 + .dword 0x6420000000000000, 0x6590000000000000 + .dword 0x7700000000000000, 0x76b0000000000000 + .dword 0x7460000000000000, 0x75d0000000000000 + .dword 0x71c0000000000000, 0x7070000000000000 + .dword 0x72a0000000000000, 0x7310000000000000 + .dword 0x7a80000000000000, 0x7b30000000000000 + .dword 0x79e0000000000000, 0x7850000000000000 + .dword 0x7c40000000000000, 0x7df0000000000000 + .dword 0x7f20000000000000, 0x7e90000000000000 + .dword 0x5a00000000000000, 0x5bb0000000000000 + .dword 0x5960000000000000, 0x58d0000000000000 + .dword 0x5cc0000000000000, 0x5d70000000000000 + .dword 0x5fa0000000000000, 0x5e10000000000000 + .dword 0x5780000000000000, 0x5630000000000000 + .dword 0x54e0000000000000, 0x5550000000000000 + .dword 0x5140000000000000, 0x50f0000000000000 + .dword 0x5220000000000000, 0x5390000000000000 + .dword 0x4100000000000000, 0x40b0000000000000 + .dword 0x4260000000000000, 0x43d0000000000000 + .dword 0x47c0000000000000, 0x4670000000000000 + .dword 0x44a0000000000000, 0x4510000000000000 + .dword 0x4c80000000000000, 0x4d30000000000000 + .dword 0x4fe0000000000000, 0x4e50000000000000 + .dword 0x4a40000000000000, 0x4bf0000000000000 + .dword 0x4920000000000000, 0x4890000000000000 + .dword 0xd800000000000000, 0xd9b0000000000000 + .dword 0xdb60000000000000, 0xdad0000000000000 + .dword 0xdec0000000000000, 0xdf70000000000000 + .dword 0xdda0000000000000, 0xdc10000000000000 + .dword 0xd580000000000000, 0xd430000000000000 + .dword 0xd6e0000000000000, 0xd750000000000000 + .dword 0xd340000000000000, 0xd2f0000000000000 + .dword 0xd020000000000000, 0xd190000000000000 + .dword 0xc300000000000000, 0xc2b0000000000000 + .dword 0xc060000000000000, 0xc1d0000000000000 + .dword 0xc5c0000000000000, 0xc470000000000000 + .dword 0xc6a0000000000000, 0xc710000000000000 + .dword 0xce80000000000000, 0xcf30000000000000 + .dword 0xcde0000000000000, 0xcc50000000000000 + .dword 0xc840000000000000, 0xc9f0000000000000 + .dword 0xcb20000000000000, 0xca90000000000000 + .dword 0xee00000000000000, 0xefb0000000000000 + .dword 0xed60000000000000, 0xecd0000000000000 + .dword 0xe8c0000000000000, 0xe970000000000000 + .dword 0xeba0000000000000, 0xea10000000000000 + .dword 0xe380000000000000, 0xe230000000000000 + .dword 0xe0e0000000000000, 0xe150000000000000 + .dword 0xe540000000000000, 0xe4f0000000000000 + .dword 0xe620000000000000, 0xe790000000000000 + .dword 0xf500000000000000, 0xf4b0000000000000 + .dword 0xf660000000000000, 0xf7d0000000000000 + .dword 0xf3c0000000000000, 0xf270000000000000 + .dword 0xf0a0000000000000, 0xf110000000000000 + .dword 0xf880000000000000, 0xf930000000000000 + .dword 0xfbe0000000000000, 0xfa50000000000000 + .dword 0xfe40000000000000, 0xfff0000000000000 + .dword 0xfd20000000000000, 0xfc90000000000000 + .dword 0xb400000000000000, 0xb5b0000000000000 + .dword 0xb760000000000000, 0xb6d0000000000000 + .dword 0xb2c0000000000000, 0xb370000000000000 + .dword 0xb1a0000000000000, 0xb010000000000000 + .dword 0xb980000000000000, 0xb830000000000000 + .dword 0xbae0000000000000, 0xbb50000000000000 + .dword 0xbf40000000000000, 0xbef0000000000000 + .dword 0xbc20000000000000, 0xbd90000000000000 + .dword 0xaf00000000000000, 0xaeb0000000000000 + .dword 0xac60000000000000, 0xadd0000000000000 + .dword 0xa9c0000000000000, 0xa870000000000000 + .dword 0xaaa0000000000000, 0xab10000000000000 + .dword 0xa280000000000000, 0xa330000000000000 + .dword 0xa1e0000000000000, 0xa050000000000000 + .dword 0xa440000000000000, 0xa5f0000000000000 + .dword 0xa720000000000000, 0xa690000000000000 + .dword 0x8200000000000000, 0x83b0000000000000 + .dword 0x8160000000000000, 0x80d0000000000000 + .dword 0x84c0000000000000, 0x8570000000000000 + .dword 0x87a0000000000000, 0x8610000000000000 + .dword 0x8f80000000000000, 0x8e30000000000000 + .dword 0x8ce0000000000000, 0x8d50000000000000 + .dword 0x8940000000000000, 0x88f0000000000000 + .dword 0x8a20000000000000, 0x8b90000000000000 + .dword 0x9900000000000000, 0x98b0000000000000 + .dword 0x9a60000000000000, 0x9bd0000000000000 + .dword 0x9fc0000000000000, 0x9e70000000000000 + .dword 0x9ca0000000000000, 0x9d10000000000000 + .dword 0x9480000000000000, 0x9530000000000000 + .dword 0x97e0000000000000, 0x9650000000000000 + .dword 0x9240000000000000, 0x93f0000000000000 + .dword 0x9120000000000000, 0x9090000000000000 diff --git a/crc/riscv64/crc64_jones_norm.S b/crc/riscv64/crc64_jones_norm.S new file mode 100644 index 00000000..7bc81942 --- /dev/null +++ b/crc/riscv64/crc64_jones_norm.S @@ -0,0 +1,179 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc64_norm_common_clmul.h" + +/* uint64_t crc64_jones_norm(uint64_t init_crc, uint8_t *buf, uint64_t len) */ +crc64_func_norm crc64_jones_norm + +/* precomputed folding constants */ +.poly: + .dword 0xad93d23594c935a9 /* excludes leading 1 */ +.mu: + .dword 0xddf3eeb298be6cf8 /* excludes leading 1 */ +.k1: + .dword 0x13c961588f27f643 +.k2: + .dword 0x4e501e58ca43d25e +.k3: + .dword 0x698b74157cfbd736 +.k4: +.k5: + .dword 0x4445ed2750017038 + +/* lookup table */ +.crc64_table: + .dword 0x0000000000000000, 0xad93d23594c935a9 + .dword 0xf6b4765ebd5b5efb, 0x5b27a46b29926b52 + .dword 0x40fb3e88ee7f885f, 0xed68ecbd7ab6bdf6 + .dword 0xb64f48d65324d6a4, 0x1bdc9ae3c7ede30d + .dword 0x81f67d11dcff10be, 0x2c65af2448362517 + .dword 0x77420b4f61a44e45, 0xdad1d97af56d7bec + .dword 0xc10d4399328098e1, 0x6c9e91aca649ad48 + .dword 0x37b935c78fdbc61a, 0x9a2ae7f21b12f3b3 + .dword 0xae7f28162d3714d5, 0x03ecfa23b9fe217c + .dword 0x58cb5e48906c4a2e, 0xf5588c7d04a57f87 + .dword 0xee84169ec3489c8a, 0x4317c4ab5781a923 + .dword 0x183060c07e13c271, 0xb5a3b2f5eadaf7d8 + .dword 0x2f895507f1c8046b, 0x821a8732650131c2 + .dword 0xd93d23594c935a90, 0x74aef16cd85a6f39 + .dword 0x6f726b8f1fb78c34, 0xc2e1b9ba8b7eb99d + .dword 0x99c61dd1a2ecd2cf, 0x3455cfe43625e766 + .dword 0xf16d8219cea71c03, 0x5cfe502c5a6e29aa + .dword 0x07d9f44773fc42f8, 0xaa4a2672e7357751 + .dword 0xb196bc9120d8945c, 0x1c056ea4b411a1f5 + .dword 0x4722cacf9d83caa7, 0xeab118fa094aff0e + .dword 0x709bff0812580cbd, 0xdd082d3d86913914 + .dword 0x862f8956af035246, 0x2bbc5b633bca67ef + .dword 0x3060c180fc2784e2, 0x9df313b568eeb14b + .dword 0xc6d4b7de417cda19, 0x6b4765ebd5b5efb0 + .dword 0x5f12aa0fe39008d6, 0xf281783a77593d7f + .dword 0xa9a6dc515ecb562d, 0x04350e64ca026384 + .dword 0x1fe994870def8089, 0xb27a46b29926b520 + .dword 0xe95de2d9b0b4de72, 0x44ce30ec247debdb + .dword 0xdee4d71e3f6f1868, 0x7377052baba62dc1 + .dword 0x2850a14082344693, 0x85c3737516fd733a + .dword 0x9e1fe996d1109037, 0x338c3ba345d9a59e + .dword 0x68ab9fc86c4bcecc, 0xc5384dfdf882fb65 + .dword 0x4f48d60609870daf, 0xe2db04339d4e3806 + .dword 0xb9fca058b4dc5354, 0x146f726d201566fd + .dword 0x0fb3e88ee7f885f0, 0xa2203abb7331b059 + .dword 0xf9079ed05aa3db0b, 0x54944ce5ce6aeea2 + .dword 0xcebeab17d5781d11, 0x632d792241b128b8 + .dword 0x380add49682343ea, 0x95990f7cfcea7643 + .dword 0x8e45959f3b07954e, 0x23d647aaafcea0e7 + .dword 0x78f1e3c1865ccbb5, 0xd56231f41295fe1c + .dword 0xe137fe1024b0197a, 0x4ca42c25b0792cd3 + .dword 0x1783884e99eb4781, 0xba105a7b0d227228 + .dword 0xa1ccc098cacf9125, 0x0c5f12ad5e06a48c + .dword 0x5778b6c67794cfde, 0xfaeb64f3e35dfa77 + .dword 0x60c18301f84f09c4, 0xcd5251346c863c6d + .dword 0x9675f55f4514573f, 0x3be6276ad1dd6296 + .dword 0x203abd891630819b, 0x8da96fbc82f9b432 + .dword 0xd68ecbd7ab6bdf60, 0x7b1d19e23fa2eac9 + .dword 0xbe25541fc72011ac, 0x13b6862a53e92405 + .dword 0x489122417a7b4f57, 0xe502f074eeb27afe + .dword 0xfede6a97295f99f3, 0x534db8a2bd96ac5a + .dword 0x086a1cc99404c708, 0xa5f9cefc00cdf2a1 + .dword 0x3fd3290e1bdf0112, 0x9240fb3b8f1634bb + .dword 0xc9675f50a6845fe9, 0x64f48d65324d6a40 + .dword 0x7f281786f5a0894d, 0xd2bbc5b36169bce4 + .dword 0x899c61d848fbd7b6, 0x240fb3eddc32e21f + .dword 0x105a7c09ea170579, 0xbdc9ae3c7ede30d0 + .dword 0xe6ee0a57574c5b82, 0x4b7dd862c3856e2b + .dword 0x50a1428104688d26, 0xfd3290b490a1b88f + .dword 0xa61534dfb933d3dd, 0x0b86e6ea2dfae674 + .dword 0x91ac011836e815c7, 0x3c3fd32da221206e + .dword 0x671877468bb34b3c, 0xca8ba5731f7a7e95 + .dword 0xd1573f90d8979d98, 0x7cc4eda54c5ea831 + .dword 0x27e349ce65ccc363, 0x8a709bfbf105f6ca + .dword 0x9e91ac0c130e1b5e, 0x33027e3987c72ef7 + .dword 0x6825da52ae5545a5, 0xc5b608673a9c700c + .dword 0xde6a9284fd719301, 0x73f940b169b8a6a8 + .dword 0x28dee4da402acdfa, 0x854d36efd4e3f853 + .dword 0x1f67d11dcff10be0, 0xb2f403285b383e49 + .dword 0xe9d3a74372aa551b, 0x44407576e66360b2 + .dword 0x5f9cef95218e83bf, 0xf20f3da0b547b616 + .dword 0xa92899cb9cd5dd44, 0x04bb4bfe081ce8ed + .dword 0x30ee841a3e390f8b, 0x9d7d562faaf03a22 + .dword 0xc65af24483625170, 0x6bc9207117ab64d9 + .dword 0x7015ba92d04687d4, 0xdd8668a7448fb27d + .dword 0x86a1cccc6d1dd92f, 0x2b321ef9f9d4ec86 + .dword 0xb118f90be2c61f35, 0x1c8b2b3e760f2a9c + .dword 0x47ac8f555f9d41ce, 0xea3f5d60cb547467 + .dword 0xf1e3c7830cb9976a, 0x5c7015b69870a2c3 + .dword 0x0757b1ddb1e2c991, 0xaac463e8252bfc38 + .dword 0x6ffc2e15dda9075d, 0xc26ffc20496032f4 + .dword 0x9948584b60f259a6, 0x34db8a7ef43b6c0f + .dword 0x2f07109d33d68f02, 0x8294c2a8a71fbaab + .dword 0xd9b366c38e8dd1f9, 0x7420b4f61a44e450 + .dword 0xee0a5304015617e3, 0x43998131959f224a + .dword 0x18be255abc0d4918, 0xb52df76f28c47cb1 + .dword 0xaef16d8cef299fbc, 0x0362bfb97be0aa15 + .dword 0x58451bd25272c147, 0xf5d6c9e7c6bbf4ee + .dword 0xc1830603f09e1388, 0x6c10d43664572621 + .dword 0x3737705d4dc54d73, 0x9aa4a268d90c78da + .dword 0x8178388b1ee19bd7, 0x2cebeabe8a28ae7e + .dword 0x77cc4ed5a3bac52c, 0xda5f9ce03773f085 + .dword 0x40757b122c610336, 0xede6a927b8a8369f + .dword 0xb6c10d4c913a5dcd, 0x1b52df7905f36864 + .dword 0x008e459ac21e8b69, 0xad1d97af56d7bec0 + .dword 0xf63a33c47f45d592, 0x5ba9e1f1eb8ce03b + .dword 0xd1d97a0a1a8916f1, 0x7c4aa83f8e402358 + .dword 0x276d0c54a7d2480a, 0x8afede61331b7da3 + .dword 0x91224482f4f69eae, 0x3cb196b7603fab07 + .dword 0x679632dc49adc055, 0xca05e0e9dd64f5fc + .dword 0x502f071bc676064f, 0xfdbcd52e52bf33e6 + .dword 0xa69b71457b2d58b4, 0x0b08a370efe46d1d + .dword 0x10d4399328098e10, 0xbd47eba6bcc0bbb9 + .dword 0xe6604fcd9552d0eb, 0x4bf39df8019be542 + .dword 0x7fa6521c37be0224, 0xd2358029a377378d + .dword 0x891224428ae55cdf, 0x2481f6771e2c6976 + .dword 0x3f5d6c94d9c18a7b, 0x92cebea14d08bfd2 + .dword 0xc9e91aca649ad480, 0x647ac8fff053e129 + .dword 0xfe502f0deb41129a, 0x53c3fd387f882733 + .dword 0x08e45953561a4c61, 0xa5778b66c2d379c8 + .dword 0xbeab1185053e9ac5, 0x1338c3b091f7af6c + .dword 0x481f67dbb865c43e, 0xe58cb5ee2cacf197 + .dword 0x20b4f813d42e0af2, 0x8d272a2640e73f5b + .dword 0xd6008e4d69755409, 0x7b935c78fdbc61a0 + .dword 0x604fc69b3a5182ad, 0xcddc14aeae98b704 + .dword 0x96fbb0c5870adc56, 0x3b6862f013c3e9ff + .dword 0xa142850208d11a4c, 0x0cd157379c182fe5 + .dword 0x57f6f35cb58a44b7, 0xfa6521692143711e + .dword 0xe1b9bb8ae6ae9213, 0x4c2a69bf7267a7ba + .dword 0x170dcdd45bf5cce8, 0xba9e1fe1cf3cf941 + .dword 0x8ecbd005f9191e27, 0x235802306dd02b8e + .dword 0x787fa65b444240dc, 0xd5ec746ed08b7575 + .dword 0xce30ee8d17669678, 0x63a33cb883afa3d1 + .dword 0x388498d3aa3dc883, 0x95174ae63ef4fd2a + .dword 0x0f3dad1425e60e99, 0xa2ae7f21b12f3b30 + .dword 0xf989db4a98bd5062, 0x541a097f0c7465cb + .dword 0x4fc6939ccb9986c6, 0xe25541a95f50b36f + .dword 0xb972e5c276c2d83d, 0x14e137f7e20bed94 diff --git a/crc/riscv64/crc64_jones_refl.S b/crc/riscv64/crc64_jones_refl.S new file mode 100644 index 00000000..73ad9546 --- /dev/null +++ b/crc/riscv64/crc64_jones_refl.S @@ -0,0 +1,179 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc64_refl_common_clmul.h" + +/* uint64_t crc64_jones_refl(uint64_t init_crc, uint8_t *buf, unt64_t len) */ +crc64_func_refl crc64_jones_refl + +/* precomputed folding constants */ +.poly: + .dword 0x2b5926535897936b /* poly reflected, excluding leading 1 */ +.mu: + .dword 0x3e6cfa329aef9f77 +.k1: + .dword 0xaf86efb16d9ab4fb +.k2: + .dword 0xf49784a634f014e4 +.k3: + .dword 0xd9d7be7d505da32c +.k4: +.k5: + .dword 0x381d0015c96f4444 + +/* lookup table */ +.crc64_table: + .dword 0x0000000000000000, 0x7ad870c830358979 + .dword 0xf5b0e190606b12f2, 0x8f689158505e9b8b + .dword 0xc038e5739841b68f, 0xbae095bba8743ff6 + .dword 0x358804e3f82aa47d, 0x4f50742bc81f2d04 + .dword 0xab28ecb46814fe75, 0xd1f09c7c5821770c + .dword 0x5e980d24087fec87, 0x24407dec384a65fe + .dword 0x6b1009c7f05548fa, 0x11c8790fc060c183 + .dword 0x9ea0e857903e5a08, 0xe478989fa00bd371 + .dword 0x7d08ff3b88be6f81, 0x07d08ff3b88be6f8 + .dword 0x88b81eabe8d57d73, 0xf2606e63d8e0f40a + .dword 0xbd301a4810ffd90e, 0xc7e86a8020ca5077 + .dword 0x4880fbd87094cbfc, 0x32588b1040a14285 + .dword 0xd620138fe0aa91f4, 0xacf86347d09f188d + .dword 0x2390f21f80c18306, 0x594882d7b0f40a7f + .dword 0x1618f6fc78eb277b, 0x6cc0863448deae02 + .dword 0xe3a8176c18803589, 0x997067a428b5bcf0 + .dword 0xfa11fe77117cdf02, 0x80c98ebf2149567b + .dword 0x0fa11fe77117cdf0, 0x75796f2f41224489 + .dword 0x3a291b04893d698d, 0x40f16bccb908e0f4 + .dword 0xcf99fa94e9567b7f, 0xb5418a5cd963f206 + .dword 0x513912c379682177, 0x2be1620b495da80e + .dword 0xa489f35319033385, 0xde51839b2936bafc + .dword 0x9101f7b0e12997f8, 0xebd98778d11c1e81 + .dword 0x64b116208142850a, 0x1e6966e8b1770c73 + .dword 0x8719014c99c2b083, 0xfdc17184a9f739fa + .dword 0x72a9e0dcf9a9a271, 0x08719014c99c2b08 + .dword 0x4721e43f0183060c, 0x3df994f731b68f75 + .dword 0xb29105af61e814fe, 0xc849756751dd9d87 + .dword 0x2c31edf8f1d64ef6, 0x56e99d30c1e3c78f + .dword 0xd9810c6891bd5c04, 0xa3597ca0a188d57d + .dword 0xec09088b6997f879, 0x96d1784359a27100 + .dword 0x19b9e91b09fcea8b, 0x636199d339c963f2 + .dword 0xdf7adabd7a6e2d6f, 0xa5a2aa754a5ba416 + .dword 0x2aca3b2d1a053f9d, 0x50124be52a30b6e4 + .dword 0x1f423fcee22f9be0, 0x659a4f06d21a1299 + .dword 0xeaf2de5e82448912, 0x902aae96b271006b + .dword 0x74523609127ad31a, 0x0e8a46c1224f5a63 + .dword 0x81e2d7997211c1e8, 0xfb3aa75142244891 + .dword 0xb46ad37a8a3b6595, 0xceb2a3b2ba0eecec + .dword 0x41da32eaea507767, 0x3b024222da65fe1e + .dword 0xa2722586f2d042ee, 0xd8aa554ec2e5cb97 + .dword 0x57c2c41692bb501c, 0x2d1ab4dea28ed965 + .dword 0x624ac0f56a91f461, 0x1892b03d5aa47d18 + .dword 0x97fa21650afae693, 0xed2251ad3acf6fea + .dword 0x095ac9329ac4bc9b, 0x7382b9faaaf135e2 + .dword 0xfcea28a2faafae69, 0x8632586aca9a2710 + .dword 0xc9622c4102850a14, 0xb3ba5c8932b0836d + .dword 0x3cd2cdd162ee18e6, 0x460abd1952db919f + .dword 0x256b24ca6b12f26d, 0x5fb354025b277b14 + .dword 0xd0dbc55a0b79e09f, 0xaa03b5923b4c69e6 + .dword 0xe553c1b9f35344e2, 0x9f8bb171c366cd9b + .dword 0x10e3202993385610, 0x6a3b50e1a30ddf69 + .dword 0x8e43c87e03060c18, 0xf49bb8b633338561 + .dword 0x7bf329ee636d1eea, 0x012b592653589793 + .dword 0x4e7b2d0d9b47ba97, 0x34a35dc5ab7233ee + .dword 0xbbcbcc9dfb2ca865, 0xc113bc55cb19211c + .dword 0x5863dbf1e3ac9dec, 0x22bbab39d3991495 + .dword 0xadd33a6183c78f1e, 0xd70b4aa9b3f20667 + .dword 0x985b3e827bed2b63, 0xe2834e4a4bd8a21a + .dword 0x6debdf121b863991, 0x1733afda2bb3b0e8 + .dword 0xf34b37458bb86399, 0x8993478dbb8deae0 + .dword 0x06fbd6d5ebd3716b, 0x7c23a61ddbe6f812 + .dword 0x3373d23613f9d516, 0x49aba2fe23cc5c6f + .dword 0xc6c333a67392c7e4, 0xbc1b436e43a74e9d + .dword 0x95ac9329ac4bc9b5, 0xef74e3e19c7e40cc + .dword 0x601c72b9cc20db47, 0x1ac40271fc15523e + .dword 0x5594765a340a7f3a, 0x2f4c0692043ff643 + .dword 0xa02497ca54616dc8, 0xdafce7026454e4b1 + .dword 0x3e847f9dc45f37c0, 0x445c0f55f46abeb9 + .dword 0xcb349e0da4342532, 0xb1eceec59401ac4b + .dword 0xfebc9aee5c1e814f, 0x8464ea266c2b0836 + .dword 0x0b0c7b7e3c7593bd, 0x71d40bb60c401ac4 + .dword 0xe8a46c1224f5a634, 0x927c1cda14c02f4d + .dword 0x1d148d82449eb4c6, 0x67ccfd4a74ab3dbf + .dword 0x289c8961bcb410bb, 0x5244f9a98c8199c2 + .dword 0xdd2c68f1dcdf0249, 0xa7f41839ecea8b30 + .dword 0x438c80a64ce15841, 0x3954f06e7cd4d138 + .dword 0xb63c61362c8a4ab3, 0xcce411fe1cbfc3ca + .dword 0x83b465d5d4a0eece, 0xf96c151de49567b7 + .dword 0x76048445b4cbfc3c, 0x0cdcf48d84fe7545 + .dword 0x6fbd6d5ebd3716b7, 0x15651d968d029fce + .dword 0x9a0d8ccedd5c0445, 0xe0d5fc06ed698d3c + .dword 0xaf85882d2576a038, 0xd55df8e515432941 + .dword 0x5a3569bd451db2ca, 0x20ed197575283bb3 + .dword 0xc49581ead523e8c2, 0xbe4df122e51661bb + .dword 0x3125607ab548fa30, 0x4bfd10b2857d7349 + .dword 0x04ad64994d625e4d, 0x7e7514517d57d734 + .dword 0xf11d85092d094cbf, 0x8bc5f5c11d3cc5c6 + .dword 0x12b5926535897936, 0x686de2ad05bcf04f + .dword 0xe70573f555e26bc4, 0x9ddd033d65d7e2bd + .dword 0xd28d7716adc8cfb9, 0xa85507de9dfd46c0 + .dword 0x273d9686cda3dd4b, 0x5de5e64efd965432 + .dword 0xb99d7ed15d9d8743, 0xc3450e196da80e3a + .dword 0x4c2d9f413df695b1, 0x36f5ef890dc31cc8 + .dword 0x79a59ba2c5dc31cc, 0x037deb6af5e9b8b5 + .dword 0x8c157a32a5b7233e, 0xf6cd0afa9582aa47 + .dword 0x4ad64994d625e4da, 0x300e395ce6106da3 + .dword 0xbf66a804b64ef628, 0xc5bed8cc867b7f51 + .dword 0x8aeeace74e645255, 0xf036dc2f7e51db2c + .dword 0x7f5e4d772e0f40a7, 0x05863dbf1e3ac9de + .dword 0xe1fea520be311aaf, 0x9b26d5e88e0493d6 + .dword 0x144e44b0de5a085d, 0x6e963478ee6f8124 + .dword 0x21c640532670ac20, 0x5b1e309b16452559 + .dword 0xd476a1c3461bbed2, 0xaeaed10b762e37ab + .dword 0x37deb6af5e9b8b5b, 0x4d06c6676eae0222 + .dword 0xc26e573f3ef099a9, 0xb8b627f70ec510d0 + .dword 0xf7e653dcc6da3dd4, 0x8d3e2314f6efb4ad + .dword 0x0256b24ca6b12f26, 0x788ec2849684a65f + .dword 0x9cf65a1b368f752e, 0xe62e2ad306bafc57 + .dword 0x6946bb8b56e467dc, 0x139ecb4366d1eea5 + .dword 0x5ccebf68aecec3a1, 0x2616cfa09efb4ad8 + .dword 0xa97e5ef8cea5d153, 0xd3a62e30fe90582a + .dword 0xb0c7b7e3c7593bd8, 0xca1fc72bf76cb2a1 + .dword 0x45775673a732292a, 0x3faf26bb9707a053 + .dword 0x70ff52905f188d57, 0x0a2722586f2d042e + .dword 0x854fb3003f739fa5, 0xff97c3c80f4616dc + .dword 0x1bef5b57af4dc5ad, 0x61372b9f9f784cd4 + .dword 0xee5fbac7cf26d75f, 0x9487ca0fff135e26 + .dword 0xdbd7be24370c7322, 0xa10fceec0739fa5b + .dword 0x2e675fb4576761d0, 0x54bf2f7c6752e8a9 + .dword 0xcdcf48d84fe75459, 0xb71738107fd2dd20 + .dword 0x387fa9482f8c46ab, 0x42a7d9801fb9cfd2 + .dword 0x0df7adabd7a6e2d6, 0x772fdd63e7936baf + .dword 0xf8474c3bb7cdf024, 0x829f3cf387f8795d + .dword 0x66e7a46c27f3aa2c, 0x1c3fd4a417c62355 + .dword 0x935745fc4798b8de, 0xe98f353477ad31a7 + .dword 0xa6df411fbfb21ca3, 0xdc0731d78f8795da + .dword 0x536fa08fdfd90e51, 0x29b7d047efec8728 diff --git a/crc/riscv64/crc64_norm_common_clmul.h b/crc/riscv64/crc64_norm_common_clmul.h new file mode 100644 index 00000000..30533efd --- /dev/null +++ b/crc/riscv64/crc64_norm_common_clmul.h @@ -0,0 +1,104 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc_fold_common_clmul.h" + +/* folding final reduction */ +/* expects 128-bit value in HIGH:LOW (t0:t1), puts return value in SEED (a0) */ +.macro crc64_norm_fold_reduction + /* precomputed constants */ + ld K5, .k5 + + clmulh t2, K5, HIGH + xor t2, t2, LOW + clmul LOW, K5, HIGH + + /* as the mu and poly constants are 65-bits long, stored missing their + * leading 1, multiplication requires a clmul(h) and xor operation + */ + clmulh t3, MU, t2 + xor t3, t3, t2 + clmul t2, POLY, t3 + xor SEED, t2, LOW +.fold_1_done: +.endm + +/* calculate crc64 of a misaligned buffer using a table */ +/* \len is the register holding how many bytes to read */ +/* expects SEED (a0) and BUF (a1) to hold corresponding values */ +/* updates values of SEED and BUF */ +/* trashes t0, t1, t2 and t3 */ +.macro crc64_norm_table len:req + beqz \len, .table_done_\@ + add t1, BUF, \len + la t0, .crc64_table +.table_loop_\@: + lbu t2, (BUF) + srli t3, SEED, 56 + addi BUF, BUF, 1 + xor t2, t2, t3 + slli t2, t2, 3 + add t2, t2, t0 + ld t3, (t2) + slli SEED, SEED, 8 + xor SEED, SEED, t3 + bne BUF, t1, .table_loop_\@ +.table_done_\@: +.endm + +/* define a function to calculate a crc64 norm hash */ +.macro crc64_func_norm name:req +.text +.align 1 +.global \name +.type \name\(), %function +\name\(): + /* load precomputed constants */ + ld POLY, .poly + ld MU, .mu + + /* invert seed */ + not SEED, SEED + + /* align and fold buffer to 64-bits */ + and t4, BUF, 0b111 + bltu LEN, t4, .excess + crc64_norm_table t4 + sub LEN, LEN, t4 + + crc_fold_loop 64 1 0 + crc64_norm_fold_reduction + +.excess: + crc64_norm_table LEN + + /* invert result */ + not SEED, SEED + ret +.endm diff --git a/crc/riscv64/crc64_refl_common_clmul.h b/crc/riscv64/crc64_refl_common_clmul.h new file mode 100644 index 00000000..af45d2cb --- /dev/null +++ b/crc/riscv64/crc64_refl_common_clmul.h @@ -0,0 +1,104 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc_fold_common_clmul.h" + +/* folding final reduction */ +/* expects 128-bit value in HIGH:LOW (t0:t1), puts return value in SEED (a0) */ +.macro crc64_refl_fold_reduction + /* precomputed constants */ + ld K5, .k5 + + clmulh t2, K5, HIGH + clmul HIGH, K5, HIGH + xor LOW, HIGH, LOW + + clmul LOW, MU, LOW + /* as poly constant is 65-bits long missing the leading 1, + * multiplication requires a clmul(h) and xor + */ + clmulh HIGH, POLY, LOW + xor LOW, HIGH, LOW + xor SEED, LOW, t2 +.fold_1_done: +.endm + +/* calculate crc64 of a misaligned buffer using a table */ +/* \len is the register holding how many bytes to read */ +/* expects SEED (a0) and BUF (a1) to hold corresponding values */ +/* updates values of SEED and BUF */ +/* trashes t0, t1, t2 and t3 */ +.macro crc64_refl_table len:req + beqz \len, .table_done_\@ + add t1, BUF, \len + la t0, .crc64_table +.table_loop_\@: + lbu t2, (BUF) + andi t3, SEED, 0xff + addi BUF, BUF, 1 + xor t2, t2, t3 + slli t2, t2, 3 + add t2, t2, t0 + ld t3, (t2) + srli SEED, SEED, 8 + xor SEED, SEED, t3 + bne BUF, t1, .table_loop_\@ +.table_done_\@: +.endm + +/* define a function to calculate a crc64 refl hash */ +.macro crc64_func_refl name:req +.text +.align 1 +.global \name +.type \name\(), %function +\name\(): + /* load precomputed constants */ + ld POLY, .poly + ld MU, .mu + + /* invert seed */ + not SEED, SEED + + /* align and fold buffer to 64-bits */ + and t4, BUF, 0b111 + bltu LEN, t4, .excess + crc64_refl_table t4 + sub LEN, LEN, t4 + + crc_fold_loop 64 0 1 + crc64_refl_fold_reduction + +.excess: + crc64_refl_table LEN + + /* invert result */ + not SEED, SEED + ret +.endm diff --git a/crc/riscv64/crc64_rocksoft_norm.S b/crc/riscv64/crc64_rocksoft_norm.S new file mode 100644 index 00000000..003fd023 --- /dev/null +++ b/crc/riscv64/crc64_rocksoft_norm.S @@ -0,0 +1,179 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc64_norm_common_clmul.h" + +/* uint64_t crc64_rocksoft_norm(uint64_t init_crc, uint8_t *buf, uint64_t len) */ +crc64_func_norm crc64_rocksoft_norm + +/* precomputed folding constants */ +.poly: + .dword 0xad93d23594c93659 /* excludes leading 1 */ +.mu: + .dword 0xddf3eeb298be6fc8 /* excludes leading 1 */ +.k1: + .dword 0xa42a30f19b669860 +.k2: + .dword 0xb4414e6a0488488c +.k3: + .dword 0x08578ba97f0476ae +.k4: +.k5: + .dword 0x6b08c948f0dd2f08 + +/* lookup table */ +.crc64_table: + .dword 0x0000000000000000, 0xad93d23594c93659 + .dword 0xf6b4765ebd5b5aeb, 0x5b27a46b29926cb2 + .dword 0x40fb3e88ee7f838f, 0xed68ecbd7ab6b5d6 + .dword 0xb64f48d65324d964, 0x1bdc9ae3c7edef3d + .dword 0x81f67d11dcff071e, 0x2c65af2448363147 + .dword 0x77420b4f61a45df5, 0xdad1d97af56d6bac + .dword 0xc10d439932808491, 0x6c9e91aca649b2c8 + .dword 0x37b935c78fdbde7a, 0x9a2ae7f21b12e823 + .dword 0xae7f28162d373865, 0x03ecfa23b9fe0e3c + .dword 0x58cb5e48906c628e, 0xf5588c7d04a554d7 + .dword 0xee84169ec348bbea, 0x4317c4ab57818db3 + .dword 0x183060c07e13e101, 0xb5a3b2f5eadad758 + .dword 0x2f895507f1c83f7b, 0x821a873265010922 + .dword 0xd93d23594c936590, 0x74aef16cd85a53c9 + .dword 0x6f726b8f1fb7bcf4, 0xc2e1b9ba8b7e8aad + .dword 0x99c61dd1a2ece61f, 0x3455cfe43625d046 + .dword 0xf16d8219cea74693, 0x5cfe502c5a6e70ca + .dword 0x07d9f44773fc1c78, 0xaa4a2672e7352a21 + .dword 0xb196bc9120d8c51c, 0x1c056ea4b411f345 + .dword 0x4722cacf9d839ff7, 0xeab118fa094aa9ae + .dword 0x709bff081258418d, 0xdd082d3d869177d4 + .dword 0x862f8956af031b66, 0x2bbc5b633bca2d3f + .dword 0x3060c180fc27c202, 0x9df313b568eef45b + .dword 0xc6d4b7de417c98e9, 0x6b4765ebd5b5aeb0 + .dword 0x5f12aa0fe3907ef6, 0xf281783a775948af + .dword 0xa9a6dc515ecb241d, 0x04350e64ca021244 + .dword 0x1fe994870deffd79, 0xb27a46b29926cb20 + .dword 0xe95de2d9b0b4a792, 0x44ce30ec247d91cb + .dword 0xdee4d71e3f6f79e8, 0x7377052baba64fb1 + .dword 0x2850a14082342303, 0x85c3737516fd155a + .dword 0x9e1fe996d110fa67, 0x338c3ba345d9cc3e + .dword 0x68ab9fc86c4ba08c, 0xc5384dfdf88296d5 + .dword 0x4f48d6060987bb7f, 0xe2db04339d4e8d26 + .dword 0xb9fca058b4dce194, 0x146f726d2015d7cd + .dword 0x0fb3e88ee7f838f0, 0xa2203abb73310ea9 + .dword 0xf9079ed05aa3621b, 0x54944ce5ce6a5442 + .dword 0xcebeab17d578bc61, 0x632d792241b18a38 + .dword 0x380add496823e68a, 0x95990f7cfcead0d3 + .dword 0x8e45959f3b073fee, 0x23d647aaafce09b7 + .dword 0x78f1e3c1865c6505, 0xd56231f41295535c + .dword 0xe137fe1024b0831a, 0x4ca42c25b079b543 + .dword 0x1783884e99ebd9f1, 0xba105a7b0d22efa8 + .dword 0xa1ccc098cacf0095, 0x0c5f12ad5e0636cc + .dword 0x5778b6c677945a7e, 0xfaeb64f3e35d6c27 + .dword 0x60c18301f84f8404, 0xcd5251346c86b25d + .dword 0x9675f55f4514deef, 0x3be6276ad1dde8b6 + .dword 0x203abd891630078b, 0x8da96fbc82f931d2 + .dword 0xd68ecbd7ab6b5d60, 0x7b1d19e23fa26b39 + .dword 0xbe25541fc720fdec, 0x13b6862a53e9cbb5 + .dword 0x489122417a7ba707, 0xe502f074eeb2915e + .dword 0xfede6a97295f7e63, 0x534db8a2bd96483a + .dword 0x086a1cc994042488, 0xa5f9cefc00cd12d1 + .dword 0x3fd3290e1bdffaf2, 0x9240fb3b8f16ccab + .dword 0xc9675f50a684a019, 0x64f48d65324d9640 + .dword 0x7f281786f5a0797d, 0xd2bbc5b361694f24 + .dword 0x899c61d848fb2396, 0x240fb3eddc3215cf + .dword 0x105a7c09ea17c589, 0xbdc9ae3c7edef3d0 + .dword 0xe6ee0a57574c9f62, 0x4b7dd862c385a93b + .dword 0x50a1428104684606, 0xfd3290b490a1705f + .dword 0xa61534dfb9331ced, 0x0b86e6ea2dfa2ab4 + .dword 0x91ac011836e8c297, 0x3c3fd32da221f4ce + .dword 0x671877468bb3987c, 0xca8ba5731f7aae25 + .dword 0xd1573f90d8974118, 0x7cc4eda54c5e7741 + .dword 0x27e349ce65cc1bf3, 0x8a709bfbf1052daa + .dword 0x9e91ac0c130f76fe, 0x33027e3987c640a7 + .dword 0x6825da52ae542c15, 0xc5b608673a9d1a4c + .dword 0xde6a9284fd70f571, 0x73f940b169b9c328 + .dword 0x28dee4da402baf9a, 0x854d36efd4e299c3 + .dword 0x1f67d11dcff071e0, 0xb2f403285b3947b9 + .dword 0xe9d3a74372ab2b0b, 0x44407576e6621d52 + .dword 0x5f9cef95218ff26f, 0xf20f3da0b546c436 + .dword 0xa92899cb9cd4a884, 0x04bb4bfe081d9edd + .dword 0x30ee841a3e384e9b, 0x9d7d562faaf178c2 + .dword 0xc65af24483631470, 0x6bc9207117aa2229 + .dword 0x7015ba92d047cd14, 0xdd8668a7448efb4d + .dword 0x86a1cccc6d1c97ff, 0x2b321ef9f9d5a1a6 + .dword 0xb118f90be2c74985, 0x1c8b2b3e760e7fdc + .dword 0x47ac8f555f9c136e, 0xea3f5d60cb552537 + .dword 0xf1e3c7830cb8ca0a, 0x5c7015b69871fc53 + .dword 0x0757b1ddb1e390e1, 0xaac463e8252aa6b8 + .dword 0x6ffc2e15dda8306d, 0xc26ffc2049610634 + .dword 0x9948584b60f36a86, 0x34db8a7ef43a5cdf + .dword 0x2f07109d33d7b3e2, 0x8294c2a8a71e85bb + .dword 0xd9b366c38e8ce909, 0x7420b4f61a45df50 + .dword 0xee0a530401573773, 0x43998131959e012a + .dword 0x18be255abc0c6d98, 0xb52df76f28c55bc1 + .dword 0xaef16d8cef28b4fc, 0x0362bfb97be182a5 + .dword 0x58451bd25273ee17, 0xf5d6c9e7c6bad84e + .dword 0xc1830603f09f0808, 0x6c10d43664563e51 + .dword 0x3737705d4dc452e3, 0x9aa4a268d90d64ba + .dword 0x8178388b1ee08b87, 0x2cebeabe8a29bdde + .dword 0x77cc4ed5a3bbd16c, 0xda5f9ce03772e735 + .dword 0x40757b122c600f16, 0xede6a927b8a9394f + .dword 0xb6c10d4c913b55fd, 0x1b52df7905f263a4 + .dword 0x008e459ac21f8c99, 0xad1d97af56d6bac0 + .dword 0xf63a33c47f44d672, 0x5ba9e1f1eb8de02b + .dword 0xd1d97a0a1a88cd81, 0x7c4aa83f8e41fbd8 + .dword 0x276d0c54a7d3976a, 0x8afede61331aa133 + .dword 0x91224482f4f74e0e, 0x3cb196b7603e7857 + .dword 0x679632dc49ac14e5, 0xca05e0e9dd6522bc + .dword 0x502f071bc677ca9f, 0xfdbcd52e52befcc6 + .dword 0xa69b71457b2c9074, 0x0b08a370efe5a62d + .dword 0x10d4399328084910, 0xbd47eba6bcc17f49 + .dword 0xe6604fcd955313fb, 0x4bf39df8019a25a2 + .dword 0x7fa6521c37bff5e4, 0xd2358029a376c3bd + .dword 0x891224428ae4af0f, 0x2481f6771e2d9956 + .dword 0x3f5d6c94d9c0766b, 0x92cebea14d094032 + .dword 0xc9e91aca649b2c80, 0x647ac8fff0521ad9 + .dword 0xfe502f0deb40f2fa, 0x53c3fd387f89c4a3 + .dword 0x08e45953561ba811, 0xa5778b66c2d29e48 + .dword 0xbeab1185053f7175, 0x1338c3b091f6472c + .dword 0x481f67dbb8642b9e, 0xe58cb5ee2cad1dc7 + .dword 0x20b4f813d42f8b12, 0x8d272a2640e6bd4b + .dword 0xd6008e4d6974d1f9, 0x7b935c78fdbde7a0 + .dword 0x604fc69b3a50089d, 0xcddc14aeae993ec4 + .dword 0x96fbb0c5870b5276, 0x3b6862f013c2642f + .dword 0xa142850208d08c0c, 0x0cd157379c19ba55 + .dword 0x57f6f35cb58bd6e7, 0xfa6521692142e0be + .dword 0xe1b9bb8ae6af0f83, 0x4c2a69bf726639da + .dword 0x170dcdd45bf45568, 0xba9e1fe1cf3d6331 + .dword 0x8ecbd005f918b377, 0x235802306dd1852e + .dword 0x787fa65b4443e99c, 0xd5ec746ed08adfc5 + .dword 0xce30ee8d176730f8, 0x63a33cb883ae06a1 + .dword 0x388498d3aa3c6a13, 0x95174ae63ef55c4a + .dword 0x0f3dad1425e7b469, 0xa2ae7f21b12e8230 + .dword 0xf989db4a98bcee82, 0x541a097f0c75d8db + .dword 0x4fc6939ccb9837e6, 0xe25541a95f5101bf + .dword 0xb972e5c276c36d0d, 0x14e137f7e20a5b54 diff --git a/crc/riscv64/crc64_rocksoft_refl.S b/crc/riscv64/crc64_rocksoft_refl.S new file mode 100644 index 00000000..97e7cc9b --- /dev/null +++ b/crc/riscv64/crc64_rocksoft_refl.S @@ -0,0 +1,179 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#include "crc64_refl_common_clmul.h" + +/* uint64_t crc64_rocksoft_refl(uint64_t init_crc, uint8_t *buf, unt64_t len) */ +crc64_func_refl crc64_rocksoft_refl + +/* precomputed folding constants */ +.poly: + .dword 0x34d926535897936b /* poly reflected, excluding leading 1 */ +.mu: + .dword 0x27ecfa329aef9f77 +.k1: + .dword 0x0c32cdb31e18a84a +.k2: + .dword 0x62242240ace5045a +.k3: + .dword 0xeadc41fd2ba3d420 +.k4: +.k5: + .dword 0x21e9761e252621ac + +/* lookup table */ +.crc64_table: + .dword 0x0000000000000000, 0x7f6ef0c830358979 + .dword 0xfedde190606b12f2, 0x81b31158505e9b8b + .dword 0xc962e5739841b68f, 0xb60c15bba8743ff6 + .dword 0x37bf04e3f82aa47d, 0x48d1f42bc81f2d04 + .dword 0xa61cecb46814fe75, 0xd9721c7c5821770c + .dword 0x58c10d24087fec87, 0x27affdec384a65fe + .dword 0x6f7e09c7f05548fa, 0x1010f90fc060c183 + .dword 0x91a3e857903e5a08, 0xeecd189fa00bd371 + .dword 0x78e0ff3b88be6f81, 0x078e0ff3b88be6f8 + .dword 0x863d1eabe8d57d73, 0xf953ee63d8e0f40a + .dword 0xb1821a4810ffd90e, 0xceecea8020ca5077 + .dword 0x4f5ffbd87094cbfc, 0x30310b1040a14285 + .dword 0xdefc138fe0aa91f4, 0xa192e347d09f188d + .dword 0x2021f21f80c18306, 0x5f4f02d7b0f40a7f + .dword 0x179ef6fc78eb277b, 0x68f0063448deae02 + .dword 0xe943176c18803589, 0x962de7a428b5bcf0 + .dword 0xf1c1fe77117cdf02, 0x8eaf0ebf2149567b + .dword 0x0f1c1fe77117cdf0, 0x7072ef2f41224489 + .dword 0x38a31b04893d698d, 0x47cdebccb908e0f4 + .dword 0xc67efa94e9567b7f, 0xb9100a5cd963f206 + .dword 0x57dd12c379682177, 0x28b3e20b495da80e + .dword 0xa900f35319033385, 0xd66e039b2936bafc + .dword 0x9ebff7b0e12997f8, 0xe1d10778d11c1e81 + .dword 0x606216208142850a, 0x1f0ce6e8b1770c73 + .dword 0x8921014c99c2b083, 0xf64ff184a9f739fa + .dword 0x77fce0dcf9a9a271, 0x08921014c99c2b08 + .dword 0x4043e43f0183060c, 0x3f2d14f731b68f75 + .dword 0xbe9e05af61e814fe, 0xc1f0f56751dd9d87 + .dword 0x2f3dedf8f1d64ef6, 0x50531d30c1e3c78f + .dword 0xd1e00c6891bd5c04, 0xae8efca0a188d57d + .dword 0xe65f088b6997f879, 0x9931f84359a27100 + .dword 0x1882e91b09fcea8b, 0x67ec19d339c963f2 + .dword 0xd75adabd7a6e2d6f, 0xa8342a754a5ba416 + .dword 0x29873b2d1a053f9d, 0x56e9cbe52a30b6e4 + .dword 0x1e383fcee22f9be0, 0x6156cf06d21a1299 + .dword 0xe0e5de5e82448912, 0x9f8b2e96b271006b + .dword 0x71463609127ad31a, 0x0e28c6c1224f5a63 + .dword 0x8f9bd7997211c1e8, 0xf0f5275142244891 + .dword 0xb824d37a8a3b6595, 0xc74a23b2ba0eecec + .dword 0x46f932eaea507767, 0x3997c222da65fe1e + .dword 0xafba2586f2d042ee, 0xd0d4d54ec2e5cb97 + .dword 0x5167c41692bb501c, 0x2e0934dea28ed965 + .dword 0x66d8c0f56a91f461, 0x19b6303d5aa47d18 + .dword 0x980521650afae693, 0xe76bd1ad3acf6fea + .dword 0x09a6c9329ac4bc9b, 0x76c839faaaf135e2 + .dword 0xf77b28a2faafae69, 0x8815d86aca9a2710 + .dword 0xc0c42c4102850a14, 0xbfaadc8932b0836d + .dword 0x3e19cdd162ee18e6, 0x41773d1952db919f + .dword 0x269b24ca6b12f26d, 0x59f5d4025b277b14 + .dword 0xd846c55a0b79e09f, 0xa72835923b4c69e6 + .dword 0xeff9c1b9f35344e2, 0x90973171c366cd9b + .dword 0x1124202993385610, 0x6e4ad0e1a30ddf69 + .dword 0x8087c87e03060c18, 0xffe938b633338561 + .dword 0x7e5a29ee636d1eea, 0x0134d92653589793 + .dword 0x49e52d0d9b47ba97, 0x368bddc5ab7233ee + .dword 0xb738cc9dfb2ca865, 0xc8563c55cb19211c + .dword 0x5e7bdbf1e3ac9dec, 0x21152b39d3991495 + .dword 0xa0a63a6183c78f1e, 0xdfc8caa9b3f20667 + .dword 0x97193e827bed2b63, 0xe877ce4a4bd8a21a + .dword 0x69c4df121b863991, 0x16aa2fda2bb3b0e8 + .dword 0xf86737458bb86399, 0x8709c78dbb8deae0 + .dword 0x06bad6d5ebd3716b, 0x79d4261ddbe6f812 + .dword 0x3105d23613f9d516, 0x4e6b22fe23cc5c6f + .dword 0xcfd833a67392c7e4, 0xb0b6c36e43a74e9d + .dword 0x9a6c9329ac4bc9b5, 0xe50263e19c7e40cc + .dword 0x64b172b9cc20db47, 0x1bdf8271fc15523e + .dword 0x530e765a340a7f3a, 0x2c608692043ff643 + .dword 0xadd397ca54616dc8, 0xd2bd67026454e4b1 + .dword 0x3c707f9dc45f37c0, 0x431e8f55f46abeb9 + .dword 0xc2ad9e0da4342532, 0xbdc36ec59401ac4b + .dword 0xf5129aee5c1e814f, 0x8a7c6a266c2b0836 + .dword 0x0bcf7b7e3c7593bd, 0x74a18bb60c401ac4 + .dword 0xe28c6c1224f5a634, 0x9de29cda14c02f4d + .dword 0x1c518d82449eb4c6, 0x633f7d4a74ab3dbf + .dword 0x2bee8961bcb410bb, 0x548079a98c8199c2 + .dword 0xd53368f1dcdf0249, 0xaa5d9839ecea8b30 + .dword 0x449080a64ce15841, 0x3bfe706e7cd4d138 + .dword 0xba4d61362c8a4ab3, 0xc52391fe1cbfc3ca + .dword 0x8df265d5d4a0eece, 0xf29c951de49567b7 + .dword 0x732f8445b4cbfc3c, 0x0c41748d84fe7545 + .dword 0x6bad6d5ebd3716b7, 0x14c39d968d029fce + .dword 0x95708ccedd5c0445, 0xea1e7c06ed698d3c + .dword 0xa2cf882d2576a038, 0xdda178e515432941 + .dword 0x5c1269bd451db2ca, 0x237c997575283bb3 + .dword 0xcdb181ead523e8c2, 0xb2df7122e51661bb + .dword 0x336c607ab548fa30, 0x4c0290b2857d7349 + .dword 0x04d364994d625e4d, 0x7bbd94517d57d734 + .dword 0xfa0e85092d094cbf, 0x856075c11d3cc5c6 + .dword 0x134d926535897936, 0x6c2362ad05bcf04f + .dword 0xed9073f555e26bc4, 0x92fe833d65d7e2bd + .dword 0xda2f7716adc8cfb9, 0xa54187de9dfd46c0 + .dword 0x24f29686cda3dd4b, 0x5b9c664efd965432 + .dword 0xb5517ed15d9d8743, 0xca3f8e196da80e3a + .dword 0x4b8c9f413df695b1, 0x34e26f890dc31cc8 + .dword 0x7c339ba2c5dc31cc, 0x035d6b6af5e9b8b5 + .dword 0x82ee7a32a5b7233e, 0xfd808afa9582aa47 + .dword 0x4d364994d625e4da, 0x3258b95ce6106da3 + .dword 0xb3eba804b64ef628, 0xcc8558cc867b7f51 + .dword 0x8454ace74e645255, 0xfb3a5c2f7e51db2c + .dword 0x7a894d772e0f40a7, 0x05e7bdbf1e3ac9de + .dword 0xeb2aa520be311aaf, 0x944455e88e0493d6 + .dword 0x15f744b0de5a085d, 0x6a99b478ee6f8124 + .dword 0x224840532670ac20, 0x5d26b09b16452559 + .dword 0xdc95a1c3461bbed2, 0xa3fb510b762e37ab + .dword 0x35d6b6af5e9b8b5b, 0x4ab846676eae0222 + .dword 0xcb0b573f3ef099a9, 0xb465a7f70ec510d0 + .dword 0xfcb453dcc6da3dd4, 0x83daa314f6efb4ad + .dword 0x0269b24ca6b12f26, 0x7d0742849684a65f + .dword 0x93ca5a1b368f752e, 0xeca4aad306bafc57 + .dword 0x6d17bb8b56e467dc, 0x12794b4366d1eea5 + .dword 0x5aa8bf68aecec3a1, 0x25c64fa09efb4ad8 + .dword 0xa4755ef8cea5d153, 0xdb1bae30fe90582a + .dword 0xbcf7b7e3c7593bd8, 0xc399472bf76cb2a1 + .dword 0x422a5673a732292a, 0x3d44a6bb9707a053 + .dword 0x759552905f188d57, 0x0afba2586f2d042e + .dword 0x8b48b3003f739fa5, 0xf42643c80f4616dc + .dword 0x1aeb5b57af4dc5ad, 0x6585ab9f9f784cd4 + .dword 0xe436bac7cf26d75f, 0x9b584a0fff135e26 + .dword 0xd389be24370c7322, 0xace74eec0739fa5b + .dword 0x2d545fb4576761d0, 0x523aaf7c6752e8a9 + .dword 0xc41748d84fe75459, 0xbb79b8107fd2dd20 + .dword 0x3acaa9482f8c46ab, 0x45a459801fb9cfd2 + .dword 0x0d75adabd7a6e2d6, 0x721b5d63e7936baf + .dword 0xf3a84c3bb7cdf024, 0x8cc6bcf387f8795d + .dword 0x620ba46c27f3aa2c, 0x1d6554a417c62355 + .dword 0x9cd645fc4798b8de, 0xe3b8b53477ad31a7 + .dword 0xab69411fbfb21ca3, 0xd407b1d78f8795da + .dword 0x55b4a08fdfd90e51, 0x2ada5047efec8728 diff --git a/crc/riscv64/crc_fold_common_clmul.h b/crc/riscv64/crc_fold_common_clmul.h new file mode 100644 index 00000000..b3a0b5b1 --- /dev/null +++ b/crc/riscv64/crc_fold_common_clmul.h @@ -0,0 +1,342 @@ +######################################################################## +# Copyright(c) 2024 ByteDance All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of ByteDance Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#define SEED a0 +#define BUF a1 +#define LEN a2 +#define POLY a3 +#define MU a4 +#define K1 t5 +#define K2 t6 +#define K3 t5 +#define K4 t6 +#define K5 t5 +#define K6 t6 + +#define X3HIGH t0 +#define HIGH t0 +#define X3LOW t1 +#define LOW t1 + +#define X2HIGH t2 +#define X2LOW a5 +#define X1HIGH a6 +#define X1LOW a7 +#define X0HIGH t3 +#define X0LOW t4 + +#define BUF3HIGH s4 +#define BUF3LOW s5 +#define BUF2HIGH s6 +#define BUF2LOW s7 +#define BUF1HIGH s8 +#define BUF1LOW s9 +#define BUF0HIGH s10 +#define BUF0LOW s11 + +#define X3K1LOW ra +#define X3K2HIGH gp +#define X2K1LOW tp +#define X2K2HIGH s0 +#define X1K1LOW s1 +#define X1K2HIGH a0 +#define X0K1LOW s2 +#define X0K2HIGH s3 + +/* repeated fold-by-four followed by fold-by-one */ +/* takes parameter \bits, bit length of polynomial (32 or 64) */ +/* \endianswap is a boolean parameter, controlling whether an endiannes swap is + * needed (true for norm crc on little-endian cpu, false for refl crc) */ +/* expects SEED (a0), BUF (a1) and LEN (a2) to hold those values */ +/* expects BUF is doubleword-aligned */ +/* returns 128-bit result in HIGH:LOW (t0:t1) */ +/* returns updated buffer ptr & length in BUF and LEN */ +/* trashes all caller-saved registers except POLY and MU (a3/a4) */ +.macro crc_fold_loop bits:req endianswap:req reflected:req + + /* for a reflected crc, clmulh gets low word and vice-versa */ +.macro clmul_low rd:req, rs1:req, rs2:req +.if !\reflected + clmul \rd, \rs1, \rs2 +.else + clmulh \rd, \rs1, \rs2 +.endif +.endm +.macro clmul_high rd:req, rs1:req, rs2:req +.if !\reflected + clmulh \rd, \rs1, \rs2 +.else + clmul \rd, \rs1, \rs2 +.endif +.endm + + /* does enough buffer exist for a 4-fold? */ + li t0, 128 + bltu LEN, t0, .fold_1 + + /* push callee-saved registers to stack */ + addi sp, sp, -136 + sd a3, 128(sp) + sd ra, 120(sp) + sd gp, 112(sp) + sd tp, 104(sp) + sd s0, 96(sp) + sd s1, 88(sp) + sd s2, 80(sp) + sd s3, 72(sp) + sd s4, 64(sp) + sd s5, 56(sp) + sd s6, 48(sp) + sd s7, 40(sp) + sd s8, 32(sp) + sd s9, 24(sp) + sd s10, 16(sp) + sd s11, 8(sp) + + /* load initial 4 128-bit chunks */ + ld X3HIGH, 0(BUF) + ld X3LOW, 8(BUF) + ld X2HIGH, 16(BUF) + ld X2LOW, 24(BUF) + ld X1HIGH, 32(BUF) + ld X1LOW, 40(BUF) + ld X0HIGH, 48(BUF) + ld X0LOW, 56(BUF) + + addi BUF, BUF, 64 + addi LEN, LEN, -64 + + /* endianness swap */ +.if \endianswap + rev8 X3HIGH, X3HIGH + rev8 X3LOW, X3LOW + rev8 X2HIGH, X2HIGH + rev8 X2LOW, X2LOW + rev8 X1HIGH, X1HIGH + rev8 X1LOW, X1LOW + rev8 X0HIGH, X0HIGH + rev8 X0LOW, X0LOW +.endif + + /* xor in seed */ +.if (\bits != 64) && \endianswap + slli SEED, SEED, 64 - \bits +.endif + xor X3HIGH, X3HIGH, SEED + + /* load constants */ + ld K1, .k1 + ld K2, .k2 + + /* calculate how far we'll fold til and load LEN with the amount left */ + srli a3, LEN, 6 + slli a3, a3, 6 + add a3, BUF, a3 + and LEN, LEN, 0x3f + +.align 3 +.fold_4_loop: + /* carryless multiply each high doubleword by k1, get 128-bit result */ + /* interleve fetching next 4 128-bit chunks */ + clmul_low X3K1LOW, K1, X3HIGH + ld BUF3HIGH, 0(BUF) + clmul_low X2K1LOW, K1, X2HIGH + ld BUF3LOW, 8(BUF) + clmul_low X1K1LOW, K1, X1HIGH + ld BUF2HIGH, 16(BUF) + clmul_low X0K1LOW, K1, X0HIGH + ld BUF2LOW, 24(BUF) + clmul_high X3HIGH, K1, X3HIGH + ld BUF1HIGH, 32(BUF) + clmul_high X2HIGH, K1, X2HIGH + ld BUF1LOW, 40(BUF) + clmul_high X1HIGH, K1, X1HIGH + ld BUF0HIGH, 48(BUF) + clmul_high X0HIGH, K1, X0HIGH + ld BUF0LOW, 56(BUF) + + addi BUF, BUF, 64 + + /* endianness swap */ +.if \endianswap + rev8 BUF3HIGH, BUF3HIGH + rev8 BUF3LOW, BUF3LOW + rev8 BUF2HIGH, BUF2HIGH + rev8 BUF2LOW, BUF2LOW + rev8 BUF1HIGH, BUF1HIGH + rev8 BUF1LOW, BUF1LOW + rev8 BUF0HIGH, BUF0HIGH + rev8 BUF0LOW, BUF0LOW +.endif + + /* carryless multiply each low doubleword by k2 */ + clmul_high X3K2HIGH, K2, X3LOW + clmul_high X2K2HIGH, K2, X2LOW + clmul_high X1K2HIGH, K2, X1LOW + clmul_high X0K2HIGH, K2, X0LOW + clmul_low X3LOW, K2, X3LOW + clmul_low X2LOW, K2, X2LOW + clmul_low X1LOW, K2, X1LOW + clmul_low X0LOW, K2, X0LOW + + /* xor results together */ + xor BUF3LOW, BUF3LOW, X3K1LOW + xor BUF2LOW, BUF2LOW, X2K1LOW + xor BUF1LOW, BUF1LOW, X1K1LOW + xor BUF0LOW, BUF0LOW, X0K1LOW + xor X3HIGH, BUF3HIGH, X3HIGH + xor X2HIGH, BUF2HIGH, X2HIGH + xor X1HIGH, BUF1HIGH, X1HIGH + xor X0HIGH, BUF0HIGH, X0HIGH + xor X3LOW, X3LOW, BUF3LOW + xor X2LOW, X2LOW, BUF2LOW + xor X1LOW, X1LOW, BUF1LOW + xor X0LOW, X0LOW, BUF0LOW + xor X3HIGH, X3K2HIGH, X3HIGH + xor X2HIGH, X2K2HIGH, X2HIGH + xor X1HIGH, X1K2HIGH, X1HIGH + xor X0HIGH, X0K2HIGH, X0HIGH + + bne BUF, a3, .fold_4_loop + + /* we've four folded as much as we can, fold-by-one values in regs */ + /* load fold-by-one constants */ + ld K3, .k3 + ld K4, .k4 + + clmul_high s0, K3, X3HIGH + clmul_low s1, K3, X3HIGH + clmul_high s2, K4, X3LOW + clmul_low s3, K4, X3LOW + xor HIGH, X2HIGH, s0 + xor HIGH, HIGH, s2 + xor LOW, X2LOW, s1 + xor LOW, LOW, s3 + + clmul_high s0, K3, HIGH + clmul_low s1, K3, HIGH + clmul_high s2, K4, LOW + clmul_low s3, K4, LOW + xor HIGH, X1HIGH, s0 + xor HIGH, HIGH, s2 + xor LOW, X1LOW, s1 + xor LOW, LOW, s3 + + clmul_high s0, K3, HIGH + clmul_low s1, K3, HIGH + clmul_high s2, K4, LOW + clmul_low s3, K4, LOW + xor HIGH, X0HIGH, s0 + xor HIGH, HIGH, s2 + xor LOW, X0LOW, s1 + xor LOW, LOW, s3 + + /* pop register values saved on stack */ + ld a3, 128(sp) + ld ra, 120(sp) + ld gp, 112(sp) + ld tp, 104(sp) + ld s0, 96(sp) + ld s1, 88(sp) + ld s2, 80(sp) + ld s3, 72(sp) + ld s4, 64(sp) + ld s5, 56(sp) + ld s6, 48(sp) + ld s7, 40(sp) + ld s8, 32(sp) + ld s9, 24(sp) + ld s10, 16(sp) + ld s11, 8(sp) + addi sp, sp, 136 + + /* load fold loop constant, check if any more 1-folding to do */ + li t4, 16 + bgeu LEN, t4, .fold_1_loop + /* else jump straight to end */ + j .fold_1_cleanup + +.fold_1: + li t4, 16 /* kept throughout loop */ + /* handle case where not enough buffer to do any fold */ + /* .fold_1_done must be defined by the crc32/64 fold reduction macro */ + bltu LEN, t4, .fold_1_done + + /* load in initial values and xor with seed */ + ld HIGH, 0(BUF) +.if \endianswap + rev8 HIGH, HIGH +.endif + +.if (\bits != 64) && \endianswap + slli SEED, SEED, 64 - \bits +.endif + xor HIGH, HIGH, SEED + + ld LOW, 8(BUF) +.if \endianswap + rev8 LOW, LOW +.endif + + addi LEN, LEN, -16 + addi BUF, BUF, 16 + + bltu a2, t4, .fold_1_cleanup + + /* precomputed constants */ + ld K3, .k3 + ld K4, .k4 +.fold_1_loop: + /* multiply high and low by constants to get two 128-bit result */ + clmul_high t2, K3, HIGH + clmul_low t3, K3, HIGH + clmul_high a5, K4, LOW + clmul_low a6, K4, LOW + + /* load next 128-bits of buffer */ + ld HIGH, 0(BUF) + ld LOW, 8(BUF) +.if \endianswap + rev8 HIGH, HIGH + rev8 LOW, LOW +.endif + + addi LEN, LEN, -16 + addi BUF, BUF, 16 + + /* fold in values with xor */ + xor HIGH, HIGH, t2 + xor HIGH, HIGH, a5 + xor LOW, LOW, t3 + xor LOW, LOW, a6 + + bgeu LEN, t4, .fold_1_loop + +.fold_1_cleanup: +.endm From a62dd046b0e2e9b1db9676c81e8c5702ca21e940 Mon Sep 17 00:00:00 2001 From: Daniel Gregory Date: Fri, 26 Jul 2024 16:14:12 +0000 Subject: [PATCH 3/3] riscv64: Implement crc16_t10dif_copy Rather than duplicating all the crc32 4-folding and modifying it to write back to the destination the read-in bytes, write a very simple memcpy that then tail calls crc16_t10dif. This makes the performance of crc16_t10dif_copy much worse than crc16_t10dif, but still about twice as fast as crc16_t10dif_copy_base. Signed-off-by: Daniel Gregory --- crc/riscv64/crc16_t10dif.S | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/crc/riscv64/crc16_t10dif.S b/crc/riscv64/crc16_t10dif.S index e3dfdf82..c9dd1098 100644 --- a/crc/riscv64/crc16_t10dif.S +++ b/crc/riscv64/crc16_t10dif.S @@ -71,3 +71,27 @@ crc16_t10dif: .dword 0x000000002d560000 .k6: .dword 0x0000000013680000 + + +/* uint16_t crc16_t10dif_copy(uint16_t seed, uint8_t * dst, uint8_t * src, uint64_t len) */ +/* in addition to calculating crc, also copies from src to dst */ +.text +.align 1 +.global crc16_t10dif_copy +.type crc16_t10dif_copy, %function +crc16_t10dif_copy: + beqz a3, .memcpy_done + add t0, a2, a3 + mv t1, a2 +.memcpy_loop: + lb t2, 0(t1) + sb t2, 0(a1) + addi t1, t1, 1 + addi a1, a1, 1 + bne t1, t0, .memcpy_loop + +.memcpy_done: + /* tail-call crc function */ + mv a1, a2 + mv a2, a3 + tail crc16_t10dif