From 043208515799c06d570d8d6f9b81c324a33832a2 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Fri, 6 Oct 2023 00:33:33 -0400 Subject: [PATCH] Add separate ASM file for XGETBV64 and CPUID64 (GH #1240) This will allow us to define CRYPTOPP_DISABLE_ASM and completely avoid building x64dll.asm and x64masm.asm --- cpuid64.asm | 63 +++++++++++++++++++++ cryptest.nmake | 119 ++++++++++++++++++++------------------- cryptlib.vcxproj | 5 ++ cryptlib.vcxproj.filters | 3 + x64dll.asm | 45 --------------- 5 files changed, 131 insertions(+), 104 deletions(-) create mode 100644 cpuid64.asm diff --git a/cpuid64.asm b/cpuid64.asm new file mode 100644 index 000000000..75654b0c0 --- /dev/null +++ b/cpuid64.asm @@ -0,0 +1,63 @@ +;; https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention +;; The first four integer arguments are passed in registers. +;; Integer values are passed in left-to-right order in RCX, +;; RDX, R8, and R9, respectively. Arguments five and higher +;; are passed on the stack. + +;; The registers RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, +;; and the upper portions of YMM0-15 and ZMM0-15 are +;; considered volatile and must be considered destroyed on +;; function calls. + +.CODE + +TITLE CPU features source file +SUBTITLE Microsoft specific ASM code to utilize CPUID and XGETBV64 for down level Microsoft toolchains + +;; http://www.agner.org/optimize/vectorclass/read.php?i=65 +;; word64 Xgetbv(word32 ctrl) +;; ctrl = rcx + + ALIGN 8 +XGETBV64 PROC FRAME +.endprolog + ;; query + DB 0fh, 01h, 0d0h + ;; xcr = (EDX << 32) | EAX + and rax, 0ffffffffh + shl rdx, 32 + or rax, rdx + ret +XGETBV64 ENDP + +;; word64 CpuId(word32 func, word32 subfunc, word32 output[4]) +;; func = rcx +;; subfunc = rdx +;; output = r8 + + ALIGN 8 +CPUID64 PROC FRAME + ;; preserve per ABI + mov [rsp+8], rbx +.savereg rbx, 8 +.endprolog + ;; eax = func + mov rax, rcx + ;; ecx = subfunc + mov rcx, rdx + ;; query + cpuid + ;; save + mov [r8+0], eax + mov [r8+4], ebx + mov [r8+8], ecx + mov [r8+12], edx + ;; return value + mov rax, 1 + ;; restore + mov rbx, [rsp+8] + ret +CPUID64 ENDP + +_TEXT ENDS +END diff --git a/cryptest.nmake b/cryptest.nmake index d04a24799..1a7ba1d2b 100644 --- a/cryptest.nmake +++ b/cryptest.nmake @@ -57,68 +57,69 @@ LIB_SRCS = \ cryptlib.cpp cpu.cpp integer.cpp 3way.cpp adler32.cpp algebra.cpp \ - algparam.cpp allocate.cpp arc4.cpp aria.cpp ariatab.cpp \ - asn.cpp authenc.cpp base32.cpp base64.cpp basecode.cpp bfinit.cpp \ - blake2.cpp blake2b_simd.cpp blake2s_simd.cpp blowfish.cpp blumshub.cpp \ - camellia.cpp cast.cpp casts.cpp cbcmac.cpp ccm.cpp chacha.cpp \ - chacha_avx.cpp chacha_simd.cpp chachapoly.cpp cham.cpp cham_simd.cpp \ - channels.cpp cmac.cpp crc.cpp crc_simd.cpp darn.cpp default.cpp des.cpp \ - dessp.cpp dh.cpp dh2.cpp dll.cpp donna_32.cpp donna_64.cpp donna_sse.cpp \ - dsa.cpp eax.cpp ec2n.cpp eccrypto.cpp ecp.cpp elgamal.cpp emsa2.cpp \ - eprecomp.cpp esign.cpp files.cpp filters.cpp fips140.cpp fipstest.cpp \ - gcm.cpp gcm_simd.cpp gf256.cpp gf2_32.cpp gf2n.cpp gf2n_simd.cpp \ - gfpcrypt.cpp gost.cpp gzip.cpp hc128.cpp hc256.cpp hex.cpp hight.cpp \ - hmac.cpp hrtimer.cpp ida.cpp idea.cpp iterhash.cpp kalyna.cpp \ - kalynatab.cpp keccak.cpp keccak_core.cpp keccak_simd.cpp lea.cpp \ - lea_simd.cpp lsh256.cpp lsh256_avx.cpp lsh256_sse.cpp lsh512.cpp \ - lsh512_avx.cpp lsh512_sse.cpp luc.cpp mars.cpp marss.cpp md2.cpp md4.cpp \ - md5.cpp misc.cpp modes.cpp mqueue.cpp mqv.cpp nbtheory.cpp oaep.cpp \ - osrng.cpp padlkrng.cpp panama.cpp pkcspad.cpp poly1305.cpp polynomi.cpp \ - pssr.cpp pubkey.cpp queue.cpp rabbit.cpp rabin.cpp randpool.cpp rc2.cpp \ - rc5.cpp rc6.cpp rdrand.cpp rdtables.cpp rijndael.cpp rijndael_simd.cpp \ - ripemd.cpp rng.cpp rsa.cpp rw.cpp safer.cpp salsa.cpp scrypt.cpp \ - seal.cpp seed.cpp serpent.cpp sha.cpp sha3.cpp sha_simd.cpp shacal2.cpp \ - shacal2_simd.cpp shake.cpp shark.cpp sharkbox.cpp simeck.cpp simon.cpp \ - simon128_simd.cpp skipjack.cpp sm3.cpp sm4.cpp sm4_simd.cpp \ - sosemanuk.cpp speck.cpp speck128_simd.cpp square.cpp squaretb.cpp \ - sse_simd.cpp strciphr.cpp tea.cpp tftables.cpp threefish.cpp tiger.cpp \ - tigertab.cpp ttmac.cpp tweetnacl.cpp twofish.cpp vmac.cpp wake.cpp \ - whrlpool.cpp xed25519.cpp xtr.cpp xtrcrypt.cpp xts.cpp zdeflate.cpp \ - zinflate.cpp zlib.cpp + algparam.cpp allocate.cpp arc4.cpp aria.cpp ariatab.cpp asn.cpp \ + authenc.cpp base32.cpp base64.cpp basecode.cpp bfinit.cpp blake2.cpp \ + blake2b_simd.cpp blake2s_simd.cpp blowfish.cpp blumshub.cpp camellia.cpp \ + cast.cpp casts.cpp cbcmac.cpp ccm.cpp chacha.cpp chacha_avx.cpp \ + chacha_simd.cpp chachapoly.cpp cham.cpp cham_simd.cpp channels.cpp \ + cmac.cpp crc.cpp crc_simd.cpp darn.cpp default.cpp des.cpp dessp.cpp \ + dh.cpp dh2.cpp dll.cpp donna_32.cpp donna_64.cpp donna_sse.cpp dsa.cpp \ + eax.cpp ec2n.cpp eccrypto.cpp ecp.cpp elgamal.cpp emsa2.cpp eprecomp.cpp \ + esign.cpp files.cpp filters.cpp fips140.cpp fipstest.cpp gcm.cpp \ + gcm_simd.cpp gf256.cpp gf2_32.cpp gf2n.cpp gf2n_simd.cpp gfpcrypt.cpp \ + gost.cpp gzip.cpp hc128.cpp hc256.cpp hex.cpp hight.cpp hmac.cpp \ + hrtimer.cpp ida.cpp idea.cpp iterhash.cpp kalyna.cpp kalynatab.cpp \ + keccak.cpp keccak_core.cpp keccak_simd.cpp lea.cpp lea_simd.cpp \ + lsh256.cpp lsh256_avx.cpp lsh256_sse.cpp lsh512.cpp lsh512_avx.cpp \ + lsh512_sse.cpp luc.cpp mars.cpp marss.cpp md2.cpp md4.cpp md5.cpp \ + misc.cpp modes.cpp mqueue.cpp mqv.cpp nbtheory.cpp oaep.cpp osrng.cpp \ + padlkrng.cpp panama.cpp pkcspad.cpp poly1305.cpp polynomi.cpp \ + ppc_simd.cpp primetab.cpp pssr.cpp pubkey.cpp queue.cpp rabbit.cpp \ + rabin.cpp randpool.cpp rc2.cpp rc5.cpp rc6.cpp rdrand.cpp rdtables.cpp \ + rijndael.cpp rijndael_simd.cpp ripemd.cpp rng.cpp rsa.cpp rw.cpp \ + safer.cpp salsa.cpp scrypt.cpp seal.cpp seed.cpp serpent.cpp sha.cpp \ + sha3.cpp sha_simd.cpp shacal2.cpp shacal2_simd.cpp shake.cpp shark.cpp \ + sharkbox.cpp simeck.cpp simon.cpp simon128_simd.cpp skipjack.cpp sm3.cpp \ + sm4.cpp sm4_simd.cpp sosemanuk.cpp speck.cpp speck128_simd.cpp \ + square.cpp squaretb.cpp sse_simd.cpp strciphr.cpp tea.cpp tftables.cpp \ + threefish.cpp tiger.cpp tigertab.cpp ttmac.cpp tweetnacl.cpp twofish.cpp \ + vmac.cpp wake.cpp whrlpool.cpp xed25519.cpp xtr.cpp xtrcrypt.cpp xts.cpp \ + zdeflate.cpp zinflate.cpp zlib.cpp LIB_OBJS = \ cryptlib.obj cpu.obj integer.obj 3way.obj adler32.obj algebra.obj \ - algparam.obj allocate.obj arc4.obj aria.obj ariatab.obj \ - asn.obj authenc.obj base32.obj base64.obj basecode.obj bfinit.obj \ - blake2.obj blake2b_simd.obj blake2s_simd.obj blowfish.obj blumshub.obj \ - camellia.obj cast.obj casts.obj cbcmac.obj ccm.obj chacha.obj \ - chacha_avx.obj chacha_simd.obj chachapoly.obj cham.obj cham_simd.obj \ - channels.obj cmac.obj crc.obj crc_simd.obj darn.obj default.obj des.obj \ - dessp.obj dh.obj dh2.obj dll.obj donna_32.obj donna_64.obj donna_sse.obj \ - dsa.obj eax.obj ec2n.obj eccrypto.obj ecp.obj elgamal.obj emsa2.obj \ - eprecomp.obj esign.obj files.obj filters.obj fips140.obj fipstest.obj \ - gcm.obj gcm_simd.obj gf256.obj gf2_32.obj gf2n.obj gf2n_simd.obj \ - gfpcrypt.obj gost.obj gzip.obj hc128.obj hc256.obj hex.obj hight.obj \ - hmac.obj hrtimer.obj ida.obj idea.obj iterhash.obj kalyna.obj \ - kalynatab.obj keccak.obj keccak_core.obj keccak_simd.obj lea.obj \ - lea_simd.obj lsh256.obj lsh256_avx.obj lsh256_sse.obj lsh512.obj \ - lsh512_avx.obj lsh512_sse.obj luc.obj mars.obj marss.obj md2.obj md4.obj \ - md5.obj misc.obj modes.obj mqueue.obj mqv.obj nbtheory.obj oaep.obj \ - osrng.obj padlkrng.obj panama.obj pkcspad.obj poly1305.obj polynomi.obj \ - pssr.obj pubkey.obj queue.obj rabbit.obj rabin.obj randpool.obj rc2.obj \ - rc5.obj rc6.obj rdrand.obj rdtables.obj rijndael.obj rijndael_simd.obj \ - ripemd.obj rng.obj rsa.obj rw.obj safer.obj salsa.obj scrypt.obj \ - seal.obj seed.obj serpent.obj sha.obj sha3.obj sha_simd.obj shacal2.obj \ - shacal2_simd.obj shake.obj shark.obj sharkbox.obj simeck.obj simon.obj \ - simon128_simd.obj skipjack.obj sm3.obj sm4.obj sm4_simd.obj \ - sosemanuk.obj speck.obj speck128_simd.obj square.obj squaretb.obj \ - sse_simd.obj strciphr.obj tea.obj tftables.obj threefish.obj tiger.obj \ - tigertab.obj ttmac.obj tweetnacl.obj twofish.obj vmac.obj wake.obj \ - whrlpool.obj xed25519.obj xtr.obj xtrcrypt.obj xts.obj zdeflate.obj \ - zinflate.obj zlib.obj + algparam.obj allocate.obj arc4.obj aria.obj ariatab.obj asn.obj \ + authenc.obj base32.obj base64.obj basecode.obj bfinit.obj blake2.obj \ + blake2b_simd.obj blake2s_simd.obj blowfish.obj blumshub.obj camellia.obj \ + cast.obj casts.obj cbcmac.obj ccm.obj chacha.obj chacha_avx.obj \ + chacha_simd.obj chachapoly.obj cham.obj cham_simd.obj channels.obj \ + cmac.obj crc.obj crc_simd.obj darn.obj default.obj des.obj dessp.obj \ + dh.obj dh2.obj dll.obj donna_32.obj donna_64.obj donna_sse.obj dsa.obj \ + eax.obj ec2n.obj eccrypto.obj ecp.obj elgamal.obj emsa2.obj eprecomp.obj \ + esign.obj files.obj filters.obj fips140.obj fipstest.obj gcm.obj \ + gcm_simd.obj gf256.obj gf2_32.obj gf2n.obj gf2n_simd.obj gfpcrypt.obj \ + gost.obj gzip.obj hc128.obj hc256.obj hex.obj hight.obj hmac.obj \ + hrtimer.obj ida.obj idea.obj iterhash.obj kalyna.obj kalynatab.obj \ + keccak.obj keccak_core.obj keccak_simd.obj lea.obj lea_simd.obj \ + lsh256.obj lsh256_avx.obj lsh256_sse.obj lsh512.obj lsh512_avx.obj \ + lsh512_sse.obj luc.obj mars.obj marss.obj md2.obj md4.obj md5.obj \ + misc.obj modes.obj mqueue.obj mqv.obj nbtheory.obj oaep.obj osrng.obj \ + padlkrng.obj panama.obj pkcspad.obj poly1305.obj polynomi.obj \ + ppc_simd.obj primetab.obj pssr.obj pubkey.obj queue.obj rabbit.obj \ + rabin.obj randpool.obj rc2.obj rc5.obj rc6.obj rdrand.obj rdtables.obj \ + rijndael.obj rijndael_simd.obj ripemd.obj rng.obj rsa.obj rw.obj \ + safer.obj salsa.obj scrypt.obj seal.obj seed.obj serpent.obj sha.obj \ + sha3.obj sha_simd.obj shacal2.obj shacal2_simd.obj shake.obj shark.obj \ + sharkbox.obj simeck.obj simon.obj simon128_simd.obj skipjack.obj sm3.obj \ + sm4.obj sm4_simd.obj sosemanuk.obj speck.obj speck128_simd.obj \ + square.obj squaretb.obj sse_simd.obj strciphr.obj tea.obj tftables.obj \ + threefish.obj tiger.obj tigertab.obj ttmac.obj tweetnacl.obj twofish.obj \ + vmac.obj wake.obj whrlpool.obj xed25519.obj xtr.obj xtrcrypt.obj xts.obj \ + zdeflate.obj zinflate.obj zlib.obj ASM_OBJS = \ - rdrand-x86.obj rdrand-x64.obj rdseed-x86.obj rdseed-x64.obj x64masm.obj x64dll.obj + cpuid64.obj rdrand-x86.obj rdrand-x64.obj rdseed-x86.obj rdseed-x64.obj \ + x64masm.obj x64dll.obj TEST_SRCS = \ test.cpp bench1.cpp bench2.cpp bench3.cpp datatest.cpp \ @@ -211,8 +212,8 @@ RDSEED_OBJ = rdseed-x86.obj # CXXFLAGS = $(CXXFLAGS) /DWINAPI_FAMILY=WINAPI_FAMILY_APP AS = ml64.exe ASFLAGS = /nologo /D_M_X64 /W3 /Cx /Zi -LIB_SRCS = $(LIB_SRCS) rdrand.cpp rdrand.asm rdseed.asm -LIB_OBJS = $(LIB_OBJS) rdrand-x64.obj rdseed-x64.obj x64masm.obj x64dll.obj +LIB_SRCS = $(LIB_SRCS) cpuid64.asm rdrand.cpp rdrand.asm rdseed.asm +LIB_OBJS = $(LIB_OBJS) cpuid64.obj rdrand-x64.obj rdseed-x64.obj x64masm.obj x64dll.obj LDFLAGS = $(LDFLAGS) /MACHINE:X64 LDLIBS = $(LDLIBS) kernel32.lib RDRAND_OBJ = rdrand-x64.obj diff --git a/cryptlib.vcxproj b/cryptlib.vcxproj index f99661d07..91dd1c171 100644 --- a/cryptlib.vcxproj +++ b/cryptlib.vcxproj @@ -368,6 +368,11 @@ + + Building and Assembling cpuid64.asm + ml64.exe /c /nologo /D_M_X64 /W3 /Zi /Fo"$(IntDir)cpuid64.obj" "%(FullPath)" + $(IntDir)cpuid64.obj;%(Outputs) + Building and assembling rdrand.asm ml.exe /c /nologo /D_M_X86 /W3 /Cx /Zi /safeseh /Fo"$(IntDir)rdrand-x86.obj" "%(FullPath)" diff --git a/cryptlib.vcxproj.filters b/cryptlib.vcxproj.filters index ef6f3c532..e89012e1a 100644 --- a/cryptlib.vcxproj.filters +++ b/cryptlib.vcxproj.filters @@ -1088,6 +1088,9 @@ + + Source Files + Source Files diff --git a/x64dll.asm b/x64dll.asm index 557afa02b..440b3c967 100644 --- a/x64dll.asm +++ b/x64dll.asm @@ -1975,50 +1975,5 @@ pop rsi ret SHA256_HashMultipleBlocks_SSE2 ENDP -;; http://www.agner.org/optimize/vectorclass/read.php?i=65 -;; word64 Xgetbv(word32 ctrl) -;; ctrl = rcx - - ALIGN 8 -XGETBV64 PROC FRAME -.endprolog - ;; query - DB 0fh, 01h, 0d0h - ;; xcr = (EDX << 32) | EAX - and rax, 0ffffffffh - shl rdx, 32 - or rax, rdx - ret -XGETBV64 ENDP - -;; word64 CpuId(word32 func, word32 subfunc, word32 output[4]) -;; func = rcx -;; subfunc = rdx -;; output = r8 - - ALIGN 8 -CPUID64 PROC FRAME - ;; preserve per ABI - mov [rsp+8], rbx -.savereg rbx, 8 -.endprolog - ;; eax = func - mov rax, rcx - ;; ecx = subfunc - mov rcx, rdx - ;; query - cpuid - ;; save - mov [r8+0], eax - mov [r8+4], ebx - mov [r8+8], ecx - mov [r8+12], edx - ;; return value - mov rax, 1 - ;; restore - mov rbx, [rsp+8] - ret -CPUID64 ENDP - _TEXT ENDS END