diff --git a/generated-src/linux-x86/crypto/fipsmodule/sha1-586.S b/generated-src/linux-x86/crypto/fipsmodule/sha1-586.S index eb59f2ba9a..0e5754fe20 100644 --- a/generated-src/linux-x86/crypto/fipsmodule/sha1-586.S +++ b/generated-src/linux-x86/crypto/fipsmodule/sha1-586.S @@ -5,36 +5,16 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) .text -.globl sha1_block_data_order -.hidden sha1_block_data_order -.type sha1_block_data_order,@function +.globl sha1_block_data_order_nohw +.hidden sha1_block_data_order_nohw +.type sha1_block_data_order_nohw,@function .align 16 -sha1_block_data_order: -.L_sha1_block_data_order_begin: +sha1_block_data_order_nohw: +.L_sha1_block_data_order_nohw_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi - call .L000pic_point -.L000pic_point: - popl %ebp - leal OPENSSL_ia32cap_P-.L000pic_point(%ebp),%esi - leal .LK_XX_XX-.L000pic_point(%ebp),%ebp - movl (%esi),%eax - movl 4(%esi),%edx - testl $512,%edx - jz .L001x86 - movl 8(%esi),%ecx - testl $16777216,%eax - jz .L001x86 - andl $268435456,%edx - andl $1073741824,%eax - orl %edx,%eax - cmpl $1342177280,%eax - je .Lavx_shortcut - jmp .Lssse3_shortcut -.align 16 -.L001x86: movl 20(%esp),%ebp movl 24(%esp),%esi movl 28(%esp),%eax @@ -43,9 +23,9 @@ sha1_block_data_order: addl %esi,%eax movl %eax,104(%esp) movl 16(%ebp),%edi - jmp .L002loop + jmp .L000loop .align 16 -.L002loop: +.L000loop: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx @@ -1392,27 +1372,28 @@ sha1_block_data_order: movl %ebx,12(%ebp) movl %edx,%esi movl %ecx,16(%ebp) - jb .L002loop + jb .L000loop addl $76,%esp popl %edi popl %esi popl %ebx popl %ebp ret -.size sha1_block_data_order,.-.L_sha1_block_data_order_begin -.hidden _sha1_block_data_order_ssse3 -.type _sha1_block_data_order_ssse3,@function +.size sha1_block_data_order_nohw,.-.L_sha1_block_data_order_nohw_begin +.globl sha1_block_data_order_ssse3 +.hidden sha1_block_data_order_ssse3 +.type sha1_block_data_order_ssse3,@function .align 16 -_sha1_block_data_order_ssse3: +sha1_block_data_order_ssse3: +.L_sha1_block_data_order_ssse3_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi - call .L003pic_point -.L003pic_point: + call .L001pic_point +.L001pic_point: popl %ebp - leal .LK_XX_XX-.L003pic_point(%ebp),%ebp -.Lssse3_shortcut: + leal .LK_XX_XX-.L001pic_point(%ebp),%ebp movdqa (%ebp),%xmm7 movdqa 16(%ebp),%xmm0 movdqa 32(%ebp),%xmm1 @@ -1464,9 +1445,9 @@ _sha1_block_data_order_ssse3: xorl %edx,%ebp pshufd $238,%xmm0,%xmm4 andl %ebp,%esi - jmp .L004loop + jmp .L002loop .align 16 -.L004loop: +.L002loop: rorl $2,%ebx xorl %edx,%esi movl %eax,%ebp @@ -2369,7 +2350,7 @@ _sha1_block_data_order_ssse3: addl %edx,%ecx movl 196(%esp),%ebp cmpl 200(%esp),%ebp - je .L005done + je .L003done movdqa 160(%esp),%xmm7 movdqa 176(%esp),%xmm6 movdqu (%ebp),%xmm0 @@ -2504,9 +2485,9 @@ _sha1_block_data_order_ssse3: pshufd $238,%xmm0,%xmm4 andl %ebx,%esi movl %ebp,%ebx - jmp .L004loop + jmp .L002loop .align 16 -.L005done: +.L003done: addl 16(%esp),%ebx xorl %edi,%esi movl %ecx,%ebp @@ -2619,20 +2600,21 @@ _sha1_block_data_order_ssse3: popl %ebx popl %ebp ret -.size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3 -.hidden _sha1_block_data_order_avx -.type _sha1_block_data_order_avx,@function +.size sha1_block_data_order_ssse3,.-.L_sha1_block_data_order_ssse3_begin +.globl sha1_block_data_order_avx +.hidden sha1_block_data_order_avx +.type sha1_block_data_order_avx,@function .align 16 -_sha1_block_data_order_avx: +sha1_block_data_order_avx: +.L_sha1_block_data_order_avx_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi - call .L006pic_point -.L006pic_point: + call .L004pic_point +.L004pic_point: popl %ebp - leal .LK_XX_XX-.L006pic_point(%ebp),%ebp -.Lavx_shortcut: + leal .LK_XX_XX-.L004pic_point(%ebp),%ebp vzeroall vmovdqa (%ebp),%xmm7 vmovdqa 16(%ebp),%xmm0 @@ -2681,9 +2663,9 @@ _sha1_block_data_order_avx: xorl %edx,%ebp vmovdqa %xmm6,32(%esp) andl %ebp,%esi - jmp .L007loop + jmp .L005loop .align 16 -.L007loop: +.L005loop: shrdl $2,%ebx,%ebx xorl %edx,%esi vpalignr $8,%xmm0,%xmm1,%xmm4 @@ -3543,7 +3525,7 @@ _sha1_block_data_order_avx: addl %edx,%ecx movl 196(%esp),%ebp cmpl 200(%esp),%ebp - je .L008done + je .L006done vmovdqa 160(%esp),%xmm7 vmovdqa 176(%esp),%xmm6 vmovdqu (%ebp),%xmm0 @@ -3674,9 +3656,9 @@ _sha1_block_data_order_avx: movl %esi,%ebp andl %ebx,%esi movl %ebp,%ebx - jmp .L007loop + jmp .L005loop .align 16 -.L008done: +.L006done: addl 16(%esp),%ebx xorl %edi,%esi movl %ecx,%ebp @@ -3790,7 +3772,7 @@ _sha1_block_data_order_avx: popl %ebx popl %ebp ret -.size _sha1_block_data_order_avx,.-_sha1_block_data_order_avx +.size sha1_block_data_order_avx,.-.L_sha1_block_data_order_avx_begin .align 64 .LK_XX_XX: .long 1518500249,1518500249,1518500249,1518500249 diff --git a/generated-src/linux-x86/crypto/fipsmodule/sha256-586.S b/generated-src/linux-x86/crypto/fipsmodule/sha256-586.S index ee41b78cbf..41b3759d36 100644 --- a/generated-src/linux-x86/crypto/fipsmodule/sha256-586.S +++ b/generated-src/linux-x86/crypto/fipsmodule/sha256-586.S @@ -5,12 +5,12 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) .text -.globl sha256_block_data_order -.hidden sha256_block_data_order -.type sha256_block_data_order,@function +.globl sha256_block_data_order_nohw +.hidden sha256_block_data_order_nohw +.type sha256_block_data_order_nohw,@function .align 16 -sha256_block_data_order: -.L_sha256_block_data_order_begin: +sha256_block_data_order_nohw: +.L_sha256_block_data_order_nohw_begin: pushl %ebp pushl %ebx pushl %esi @@ -22,7 +22,7 @@ sha256_block_data_order: call .L000pic_point .L000pic_point: popl %ebp - leal .L001K256-.L000pic_point(%ebp),%ebp + leal .LK256-.L000pic_point(%ebp),%ebp subl $16,%esp andl $-64,%esp shll $6,%eax @@ -31,29 +31,13 @@ sha256_block_data_order: movl %edi,4(%esp) movl %eax,8(%esp) movl %ebx,12(%esp) - leal OPENSSL_ia32cap_P-.L001K256(%ebp),%edx - movl (%edx),%ecx - movl 4(%edx),%ebx - testl $1048576,%ecx - jnz .L002loop - movl 8(%edx),%edx - testl $16777216,%ecx - jz .L003no_xmm - andl $1073741824,%ecx - andl $268435968,%ebx - orl %ebx,%ecx - andl $1342177280,%ecx - cmpl $1342177280,%ecx - je .L004AVX - testl $512,%ebx - jnz .L005SSSE3 -.L003no_xmm: +.L001no_xmm: subl %edi,%eax cmpl $256,%eax - jae .L006unrolled - jmp .L002loop + jae .L002unrolled + jmp .L003loop .align 16 -.L002loop: +.L003loop: movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx @@ -122,7 +106,7 @@ sha256_block_data_order: movl %ecx,28(%esp) movl %edi,32(%esp) .align 16 -.L00700_15: +.L00400_15: movl %edx,%ecx movl 24(%esp),%esi rorl $14,%ecx @@ -160,11 +144,11 @@ sha256_block_data_order: addl $4,%ebp addl %ebx,%eax cmpl $3248222580,%esi - jne .L00700_15 + jne .L00400_15 movl 156(%esp),%ecx - jmp .L00816_63 + jmp .L00516_63 .align 16 -.L00816_63: +.L00516_63: movl %ecx,%ebx movl 104(%esp),%esi rorl $11,%ecx @@ -219,7 +203,7 @@ sha256_block_data_order: addl $4,%ebp addl %ebx,%eax cmpl $3329325298,%esi - jne .L00816_63 + jne .L00516_63 movl 356(%esp),%esi movl 8(%esp),%ebx movl 16(%esp),%ecx @@ -246,7 +230,7 @@ sha256_block_data_order: leal 356(%esp),%esp subl $256,%ebp cmpl 8(%esp),%edi - jb .L002loop + jb .L003loop movl 12(%esp),%esp popl %edi popl %esi @@ -254,7 +238,7 @@ sha256_block_data_order: popl %ebp ret .align 64 -.L001K256: +.LK256: .long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 .long 66051,67438087,134810123,202182159 .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 @@ -263,7 +247,7 @@ sha256_block_data_order: .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 .align 16 -.L006unrolled: +.L002unrolled: leal -96(%esp),%esp movl (%esi),%eax movl 4(%esi),%ebp @@ -280,9 +264,9 @@ sha256_block_data_order: movl %ebx,20(%esp) movl %ecx,24(%esp) movl %esi,28(%esp) - jmp .L009grand_loop + jmp .L006grand_loop .align 16 -.L009grand_loop: +.L006grand_loop: movl (%edi),%ebx movl 4(%edi),%ecx bswap %ebx @@ -3162,15 +3146,40 @@ sha256_block_data_order: movl %ebx,24(%esp) movl %ecx,28(%esp) cmpl 104(%esp),%edi - jb .L009grand_loop + jb .L006grand_loop movl 108(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret -.align 32 -.L005SSSE3: +.size sha256_block_data_order_nohw,.-.L_sha256_block_data_order_nohw_begin +.globl sha256_block_data_order_ssse3 +.hidden sha256_block_data_order_ssse3 +.type sha256_block_data_order_ssse3,@function +.align 16 +sha256_block_data_order_ssse3: +.L_sha256_block_data_order_ssse3_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L007pic_point +.L007pic_point: + popl %ebp + leal .LK256-.L007pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) leal -96(%esp),%esp movl (%esi),%eax movl 4(%esi),%ebx @@ -3189,9 +3198,9 @@ sha256_block_data_order: movl %ecx,24(%esp) movl %esi,28(%esp) movdqa 256(%ebp),%xmm7 - jmp .L010grand_ssse3 + jmp .L008grand_ssse3 .align 16 -.L010grand_ssse3: +.L008grand_ssse3: movdqu (%edi),%xmm0 movdqu 16(%edi),%xmm1 movdqu 32(%edi),%xmm2 @@ -3214,9 +3223,9 @@ sha256_block_data_order: paddd %xmm3,%xmm7 movdqa %xmm6,64(%esp) movdqa %xmm7,80(%esp) - jmp .L011ssse3_00_47 + jmp .L009ssse3_00_47 .align 16 -.L011ssse3_00_47: +.L009ssse3_00_47: addl $64,%ebp movl %edx,%ecx movdqa %xmm1,%xmm4 @@ -3859,7 +3868,7 @@ sha256_block_data_order: addl %ecx,%eax movdqa %xmm6,80(%esp) cmpl $66051,64(%ebp) - jne .L011ssse3_00_47 + jne .L009ssse3_00_47 movl %edx,%ecx rorl $14,%edx movl 20(%esp),%esi @@ -4373,15 +4382,40 @@ sha256_block_data_order: movdqa 64(%ebp),%xmm7 subl $192,%ebp cmpl 104(%esp),%edi - jb .L010grand_ssse3 + jb .L008grand_ssse3 movl 108(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret -.align 32 -.L004AVX: +.size sha256_block_data_order_ssse3,.-.L_sha256_block_data_order_ssse3_begin +.globl sha256_block_data_order_avx +.hidden sha256_block_data_order_avx +.type sha256_block_data_order_avx,@function +.align 16 +sha256_block_data_order_avx: +.L_sha256_block_data_order_avx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L010pic_point +.L010pic_point: + popl %ebp + leal .LK256-.L010pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) leal -96(%esp),%esp vzeroall movl (%esi),%eax @@ -4401,9 +4435,9 @@ sha256_block_data_order: movl %ecx,24(%esp) movl %esi,28(%esp) vmovdqa 256(%ebp),%xmm7 - jmp .L012grand_avx + jmp .L011grand_avx .align 32 -.L012grand_avx: +.L011grand_avx: vmovdqu (%edi),%xmm0 vmovdqu 16(%edi),%xmm1 vmovdqu 32(%edi),%xmm2 @@ -4422,9 +4456,9 @@ sha256_block_data_order: vmovdqa %xmm5,48(%esp) vmovdqa %xmm6,64(%esp) vmovdqa %xmm7,80(%esp) - jmp .L013avx_00_47 + jmp .L012avx_00_47 .align 16 -.L013avx_00_47: +.L012avx_00_47: addl $64,%ebp vpalignr $4,%xmm0,%xmm1,%xmm4 movl %edx,%ecx @@ -5039,7 +5073,7 @@ sha256_block_data_order: addl %ecx,%eax vmovdqa %xmm6,80(%esp) cmpl $66051,64(%ebp) - jne .L013avx_00_47 + jne .L012avx_00_47 movl %edx,%ecx shrdl $14,%edx,%edx movl 20(%esp),%esi @@ -5553,7 +5587,7 @@ sha256_block_data_order: vmovdqa 64(%ebp),%xmm7 subl $192,%ebp cmpl 104(%esp),%edi - jb .L012grand_avx + jb .L011grand_avx movl 108(%esp),%esp vzeroall popl %edi @@ -5561,5 +5595,5 @@ sha256_block_data_order: popl %ebx popl %ebp ret -.size sha256_block_data_order,.-.L_sha256_block_data_order_begin +.size sha256_block_data_order_avx,.-.L_sha256_block_data_order_avx_begin #endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) diff --git a/generated-src/mac-x86/crypto/fipsmodule/sha1-586.S b/generated-src/mac-x86/crypto/fipsmodule/sha1-586.S index 76ee6bc5a3..f0ab02be58 100644 --- a/generated-src/mac-x86/crypto/fipsmodule/sha1-586.S +++ b/generated-src/mac-x86/crypto/fipsmodule/sha1-586.S @@ -5,35 +5,15 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) .text -.globl _sha1_block_data_order -.private_extern _sha1_block_data_order +.globl _sha1_block_data_order_nohw +.private_extern _sha1_block_data_order_nohw .align 4 -_sha1_block_data_order: -L_sha1_block_data_order_begin: +_sha1_block_data_order_nohw: +L_sha1_block_data_order_nohw_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi - call L000pic_point -L000pic_point: - popl %ebp - movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000pic_point(%ebp),%esi - leal LK_XX_XX-L000pic_point(%ebp),%ebp - movl (%esi),%eax - movl 4(%esi),%edx - testl $512,%edx - jz L001x86 - movl 8(%esi),%ecx - testl $16777216,%eax - jz L001x86 - andl $268435456,%edx - andl $1073741824,%eax - orl %edx,%eax - cmpl $1342177280,%eax - je Lavx_shortcut - jmp Lssse3_shortcut -.align 4,0x90 -L001x86: movl 20(%esp),%ebp movl 24(%esp),%esi movl 28(%esp),%eax @@ -42,9 +22,9 @@ L001x86: addl %esi,%eax movl %eax,104(%esp) movl 16(%ebp),%edi - jmp L002loop + jmp L000loop .align 4,0x90 -L002loop: +L000loop: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx @@ -1391,25 +1371,26 @@ L002loop: movl %ebx,12(%ebp) movl %edx,%esi movl %ecx,16(%ebp) - jb L002loop + jb L000loop addl $76,%esp popl %edi popl %esi popl %ebx popl %ebp ret -.private_extern __sha1_block_data_order_ssse3 +.globl _sha1_block_data_order_ssse3 +.private_extern _sha1_block_data_order_ssse3 .align 4 -__sha1_block_data_order_ssse3: +_sha1_block_data_order_ssse3: +L_sha1_block_data_order_ssse3_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi - call L003pic_point -L003pic_point: + call L001pic_point +L001pic_point: popl %ebp - leal LK_XX_XX-L003pic_point(%ebp),%ebp -Lssse3_shortcut: + leal LK_XX_XX-L001pic_point(%ebp),%ebp movdqa (%ebp),%xmm7 movdqa 16(%ebp),%xmm0 movdqa 32(%ebp),%xmm1 @@ -1461,9 +1442,9 @@ Lssse3_shortcut: xorl %edx,%ebp pshufd $238,%xmm0,%xmm4 andl %ebp,%esi - jmp L004loop + jmp L002loop .align 4,0x90 -L004loop: +L002loop: rorl $2,%ebx xorl %edx,%esi movl %eax,%ebp @@ -2366,7 +2347,7 @@ L004loop: addl %edx,%ecx movl 196(%esp),%ebp cmpl 200(%esp),%ebp - je L005done + je L003done movdqa 160(%esp),%xmm7 movdqa 176(%esp),%xmm6 movdqu (%ebp),%xmm0 @@ -2501,9 +2482,9 @@ L004loop: pshufd $238,%xmm0,%xmm4 andl %ebx,%esi movl %ebp,%ebx - jmp L004loop + jmp L002loop .align 4,0x90 -L005done: +L003done: addl 16(%esp),%ebx xorl %edi,%esi movl %ecx,%ebp @@ -2616,18 +2597,19 @@ L005done: popl %ebx popl %ebp ret -.private_extern __sha1_block_data_order_avx +.globl _sha1_block_data_order_avx +.private_extern _sha1_block_data_order_avx .align 4 -__sha1_block_data_order_avx: +_sha1_block_data_order_avx: +L_sha1_block_data_order_avx_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi - call L006pic_point -L006pic_point: + call L004pic_point +L004pic_point: popl %ebp - leal LK_XX_XX-L006pic_point(%ebp),%ebp -Lavx_shortcut: + leal LK_XX_XX-L004pic_point(%ebp),%ebp vzeroall vmovdqa (%ebp),%xmm7 vmovdqa 16(%ebp),%xmm0 @@ -2676,9 +2658,9 @@ Lavx_shortcut: xorl %edx,%ebp vmovdqa %xmm6,32(%esp) andl %ebp,%esi - jmp L007loop + jmp L005loop .align 4,0x90 -L007loop: +L005loop: shrdl $2,%ebx,%ebx xorl %edx,%esi vpalignr $8,%xmm0,%xmm1,%xmm4 @@ -3538,7 +3520,7 @@ L007loop: addl %edx,%ecx movl 196(%esp),%ebp cmpl 200(%esp),%ebp - je L008done + je L006done vmovdqa 160(%esp),%xmm7 vmovdqa 176(%esp),%xmm6 vmovdqu (%ebp),%xmm0 @@ -3669,9 +3651,9 @@ L007loop: movl %esi,%ebp andl %ebx,%esi movl %ebp,%ebx - jmp L007loop + jmp L005loop .align 4,0x90 -L008done: +L006done: addl 16(%esp),%ebx xorl %edi,%esi movl %ecx,%ebp @@ -3797,8 +3779,4 @@ LK_XX_XX: .byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 .byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 .byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.section __IMPORT,__pointers,non_lazy_symbol_pointers -L_OPENSSL_ia32cap_P$non_lazy_ptr: -.indirect_symbol _OPENSSL_ia32cap_P -.long 0 #endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/generated-src/mac-x86/crypto/fipsmodule/sha256-586.S b/generated-src/mac-x86/crypto/fipsmodule/sha256-586.S index d43510a491..8e74e68620 100644 --- a/generated-src/mac-x86/crypto/fipsmodule/sha256-586.S +++ b/generated-src/mac-x86/crypto/fipsmodule/sha256-586.S @@ -5,11 +5,11 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) .text -.globl _sha256_block_data_order -.private_extern _sha256_block_data_order +.globl _sha256_block_data_order_nohw +.private_extern _sha256_block_data_order_nohw .align 4 -_sha256_block_data_order: -L_sha256_block_data_order_begin: +_sha256_block_data_order_nohw: +L_sha256_block_data_order_nohw_begin: pushl %ebp pushl %ebx pushl %esi @@ -21,7 +21,7 @@ L_sha256_block_data_order_begin: call L000pic_point L000pic_point: popl %ebp - leal L001K256-L000pic_point(%ebp),%ebp + leal LK256-L000pic_point(%ebp),%ebp subl $16,%esp andl $-64,%esp shll $6,%eax @@ -30,29 +30,13 @@ L000pic_point: movl %edi,4(%esp) movl %eax,8(%esp) movl %ebx,12(%esp) - movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001K256(%ebp),%edx - movl (%edx),%ecx - movl 4(%edx),%ebx - testl $1048576,%ecx - jnz L002loop - movl 8(%edx),%edx - testl $16777216,%ecx - jz L003no_xmm - andl $1073741824,%ecx - andl $268435968,%ebx - orl %ebx,%ecx - andl $1342177280,%ecx - cmpl $1342177280,%ecx - je L004AVX - testl $512,%ebx - jnz L005SSSE3 -L003no_xmm: +L001no_xmm: subl %edi,%eax cmpl $256,%eax - jae L006unrolled - jmp L002loop + jae L002unrolled + jmp L003loop .align 4,0x90 -L002loop: +L003loop: movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx @@ -121,7 +105,7 @@ L002loop: movl %ecx,28(%esp) movl %edi,32(%esp) .align 4,0x90 -L00700_15: +L00400_15: movl %edx,%ecx movl 24(%esp),%esi rorl $14,%ecx @@ -159,11 +143,11 @@ L00700_15: addl $4,%ebp addl %ebx,%eax cmpl $3248222580,%esi - jne L00700_15 + jne L00400_15 movl 156(%esp),%ecx - jmp L00816_63 + jmp L00516_63 .align 4,0x90 -L00816_63: +L00516_63: movl %ecx,%ebx movl 104(%esp),%esi rorl $11,%ecx @@ -218,7 +202,7 @@ L00816_63: addl $4,%ebp addl %ebx,%eax cmpl $3329325298,%esi - jne L00816_63 + jne L00516_63 movl 356(%esp),%esi movl 8(%esp),%ebx movl 16(%esp),%ecx @@ -245,7 +229,7 @@ L00816_63: leal 356(%esp),%esp subl $256,%ebp cmpl 8(%esp),%edi - jb L002loop + jb L003loop movl 12(%esp),%esp popl %edi popl %esi @@ -253,7 +237,7 @@ L00816_63: popl %ebp ret .align 6,0x90 -L001K256: +LK256: .long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 .long 66051,67438087,134810123,202182159 .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 @@ -262,7 +246,7 @@ L001K256: .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 .align 4,0x90 -L006unrolled: +L002unrolled: leal -96(%esp),%esp movl (%esi),%eax movl 4(%esi),%ebp @@ -279,9 +263,9 @@ L006unrolled: movl %ebx,20(%esp) movl %ecx,24(%esp) movl %esi,28(%esp) - jmp L009grand_loop + jmp L006grand_loop .align 4,0x90 -L009grand_loop: +L006grand_loop: movl (%edi),%ebx movl 4(%edi),%ecx bswap %ebx @@ -3161,15 +3145,38 @@ L009grand_loop: movl %ebx,24(%esp) movl %ecx,28(%esp) cmpl 104(%esp),%edi - jb L009grand_loop + jb L006grand_loop movl 108(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret -.align 5,0x90 -L005SSSE3: +.globl _sha256_block_data_order_ssse3 +.private_extern _sha256_block_data_order_ssse3 +.align 4 +_sha256_block_data_order_ssse3: +L_sha256_block_data_order_ssse3_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call L007pic_point +L007pic_point: + popl %ebp + leal LK256-L007pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) leal -96(%esp),%esp movl (%esi),%eax movl 4(%esi),%ebx @@ -3188,9 +3195,9 @@ L005SSSE3: movl %ecx,24(%esp) movl %esi,28(%esp) movdqa 256(%ebp),%xmm7 - jmp L010grand_ssse3 + jmp L008grand_ssse3 .align 4,0x90 -L010grand_ssse3: +L008grand_ssse3: movdqu (%edi),%xmm0 movdqu 16(%edi),%xmm1 movdqu 32(%edi),%xmm2 @@ -3213,9 +3220,9 @@ L010grand_ssse3: paddd %xmm3,%xmm7 movdqa %xmm6,64(%esp) movdqa %xmm7,80(%esp) - jmp L011ssse3_00_47 + jmp L009ssse3_00_47 .align 4,0x90 -L011ssse3_00_47: +L009ssse3_00_47: addl $64,%ebp movl %edx,%ecx movdqa %xmm1,%xmm4 @@ -3858,7 +3865,7 @@ L011ssse3_00_47: addl %ecx,%eax movdqa %xmm6,80(%esp) cmpl $66051,64(%ebp) - jne L011ssse3_00_47 + jne L009ssse3_00_47 movl %edx,%ecx rorl $14,%edx movl 20(%esp),%esi @@ -4372,15 +4379,38 @@ L011ssse3_00_47: movdqa 64(%ebp),%xmm7 subl $192,%ebp cmpl 104(%esp),%edi - jb L010grand_ssse3 + jb L008grand_ssse3 movl 108(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret -.align 5,0x90 -L004AVX: +.globl _sha256_block_data_order_avx +.private_extern _sha256_block_data_order_avx +.align 4 +_sha256_block_data_order_avx: +L_sha256_block_data_order_avx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call L010pic_point +L010pic_point: + popl %ebp + leal LK256-L010pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) leal -96(%esp),%esp vzeroall movl (%esi),%eax @@ -4400,9 +4430,9 @@ L004AVX: movl %ecx,24(%esp) movl %esi,28(%esp) vmovdqa 256(%ebp),%xmm7 - jmp L012grand_avx + jmp L011grand_avx .align 5,0x90 -L012grand_avx: +L011grand_avx: vmovdqu (%edi),%xmm0 vmovdqu 16(%edi),%xmm1 vmovdqu 32(%edi),%xmm2 @@ -4421,9 +4451,9 @@ L012grand_avx: vmovdqa %xmm5,48(%esp) vmovdqa %xmm6,64(%esp) vmovdqa %xmm7,80(%esp) - jmp L013avx_00_47 + jmp L012avx_00_47 .align 4,0x90 -L013avx_00_47: +L012avx_00_47: addl $64,%ebp vpalignr $4,%xmm0,%xmm1,%xmm4 movl %edx,%ecx @@ -5038,7 +5068,7 @@ L013avx_00_47: addl %ecx,%eax vmovdqa %xmm6,80(%esp) cmpl $66051,64(%ebp) - jne L013avx_00_47 + jne L012avx_00_47 movl %edx,%ecx shrdl $14,%edx,%edx movl 20(%esp),%esi @@ -5552,7 +5582,7 @@ L013avx_00_47: vmovdqa 64(%ebp),%xmm7 subl $192,%ebp cmpl 104(%esp),%edi - jb L012grand_avx + jb L011grand_avx movl 108(%esp),%esp vzeroall popl %edi @@ -5560,8 +5590,4 @@ L013avx_00_47: popl %ebx popl %ebp ret -.section __IMPORT,__pointers,non_lazy_symbol_pointers -L_OPENSSL_ia32cap_P$non_lazy_ptr: -.indirect_symbol _OPENSSL_ia32cap_P -.long 0 #endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/generated-src/win-x86/crypto/fipsmodule/sha1-586.asm b/generated-src/win-x86/crypto/fipsmodule/sha1-586.asm index ae30b3d13d..8cff61f50f 100644 --- a/generated-src/win-x86/crypto/fipsmodule/sha1-586.asm +++ b/generated-src/win-x86/crypto/fipsmodule/sha1-586.asm @@ -11,35 +11,14 @@ section .text code align=64 %else section .text code %endif -;extern _OPENSSL_ia32cap_P -global _sha1_block_data_order +global _sha1_block_data_order_nohw align 16 -_sha1_block_data_order: -L$_sha1_block_data_order_begin: +_sha1_block_data_order_nohw: +L$_sha1_block_data_order_nohw_begin: push ebp push ebx push esi push edi - call L$000pic_point -L$000pic_point: - pop ebp - lea esi,[_OPENSSL_ia32cap_P] - lea ebp,[(L$K_XX_XX-L$000pic_point)+ebp] - mov eax,DWORD [esi] - mov edx,DWORD [4+esi] - test edx,512 - jz NEAR L$001x86 - mov ecx,DWORD [8+esi] - test eax,16777216 - jz NEAR L$001x86 - and edx,268435456 - and eax,1073741824 - or eax,edx - cmp eax,1342177280 - je NEAR L$avx_shortcut - jmp NEAR L$ssse3_shortcut -align 16 -L$001x86: mov ebp,DWORD [20+esp] mov esi,DWORD [24+esp] mov eax,DWORD [28+esp] @@ -48,9 +27,9 @@ L$001x86: add eax,esi mov DWORD [104+esp],eax mov edi,DWORD [16+ebp] - jmp NEAR L$002loop + jmp NEAR L$000loop align 16 -L$002loop: +L$000loop: mov eax,DWORD [esi] mov ebx,DWORD [4+esi] mov ecx,DWORD [8+esi] @@ -1397,24 +1376,25 @@ L$002loop: mov DWORD [12+ebp],ebx mov esi,edx mov DWORD [16+ebp],ecx - jb NEAR L$002loop + jb NEAR L$000loop add esp,76 pop edi pop esi pop ebx pop ebp ret +global _sha1_block_data_order_ssse3 align 16 -__sha1_block_data_order_ssse3: +_sha1_block_data_order_ssse3: +L$_sha1_block_data_order_ssse3_begin: push ebp push ebx push esi push edi - call L$003pic_point -L$003pic_point: + call L$001pic_point +L$001pic_point: pop ebp - lea ebp,[(L$K_XX_XX-L$003pic_point)+ebp] -L$ssse3_shortcut: + lea ebp,[(L$K_XX_XX-L$001pic_point)+ebp] movdqa xmm7,[ebp] movdqa xmm0,[16+ebp] movdqa xmm1,[32+ebp] @@ -1466,9 +1446,9 @@ db 102,15,56,0,222 xor ebp,edx pshufd xmm4,xmm0,238 and esi,ebp - jmp NEAR L$004loop + jmp NEAR L$002loop align 16 -L$004loop: +L$002loop: ror ebx,2 xor esi,edx mov ebp,eax @@ -2371,7 +2351,7 @@ L$004loop: add ecx,edx mov ebp,DWORD [196+esp] cmp ebp,DWORD [200+esp] - je NEAR L$005done + je NEAR L$003done movdqa xmm7,[160+esp] movdqa xmm6,[176+esp] movdqu xmm0,[ebp] @@ -2506,9 +2486,9 @@ db 102,15,56,0,222 pshufd xmm4,xmm0,238 and esi,ebx mov ebx,ebp - jmp NEAR L$004loop + jmp NEAR L$002loop align 16 -L$005done: +L$003done: add ebx,DWORD [16+esp] xor esi,edi mov ebp,ecx @@ -2621,17 +2601,18 @@ L$005done: pop ebx pop ebp ret +global _sha1_block_data_order_avx align 16 -__sha1_block_data_order_avx: +_sha1_block_data_order_avx: +L$_sha1_block_data_order_avx_begin: push ebp push ebx push esi push edi - call L$006pic_point -L$006pic_point: + call L$004pic_point +L$004pic_point: pop ebp - lea ebp,[(L$K_XX_XX-L$006pic_point)+ebp] -L$avx_shortcut: + lea ebp,[(L$K_XX_XX-L$004pic_point)+ebp] vzeroall vmovdqa xmm7,[ebp] vmovdqa xmm0,[16+ebp] @@ -2680,9 +2661,9 @@ L$avx_shortcut: xor ebp,edx vmovdqa [32+esp],xmm6 and esi,ebp - jmp NEAR L$007loop + jmp NEAR L$005loop align 16 -L$007loop: +L$005loop: shrd ebx,ebx,2 xor esi,edx vpalignr xmm4,xmm1,xmm0,8 @@ -3542,7 +3523,7 @@ L$007loop: add ecx,edx mov ebp,DWORD [196+esp] cmp ebp,DWORD [200+esp] - je NEAR L$008done + je NEAR L$006done vmovdqa xmm7,[160+esp] vmovdqa xmm6,[176+esp] vmovdqu xmm0,[ebp] @@ -3673,9 +3654,9 @@ L$007loop: mov ebp,esi and esi,ebx mov ebx,ebp - jmp NEAR L$007loop + jmp NEAR L$005loop align 16 -L$008done: +L$006done: add ebx,DWORD [16+esp] xor esi,edi mov ebp,ecx @@ -3801,8 +3782,6 @@ db 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 db 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 db 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 db 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -segment .bss -common _OPENSSL_ia32cap_P 16 %else ; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 ret diff --git a/generated-src/win-x86/crypto/fipsmodule/sha256-586.asm b/generated-src/win-x86/crypto/fipsmodule/sha256-586.asm index 434195bad4..61b3a6b199 100644 --- a/generated-src/win-x86/crypto/fipsmodule/sha256-586.asm +++ b/generated-src/win-x86/crypto/fipsmodule/sha256-586.asm @@ -11,11 +11,10 @@ section .text code align=64 %else section .text code %endif -;extern _OPENSSL_ia32cap_P -global _sha256_block_data_order +global _sha256_block_data_order_nohw align 16 -_sha256_block_data_order: -L$_sha256_block_data_order_begin: +_sha256_block_data_order_nohw: +L$_sha256_block_data_order_nohw_begin: push ebp push ebx push esi @@ -27,7 +26,7 @@ L$_sha256_block_data_order_begin: call L$000pic_point L$000pic_point: pop ebp - lea ebp,[(L$001K256-L$000pic_point)+ebp] + lea ebp,[(L$K256-L$000pic_point)+ebp] sub esp,16 and esp,-64 shl eax,6 @@ -36,29 +35,13 @@ L$000pic_point: mov DWORD [4+esp],edi mov DWORD [8+esp],eax mov DWORD [12+esp],ebx - lea edx,[_OPENSSL_ia32cap_P] - mov ecx,DWORD [edx] - mov ebx,DWORD [4+edx] - test ecx,1048576 - jnz NEAR L$002loop - mov edx,DWORD [8+edx] - test ecx,16777216 - jz NEAR L$003no_xmm - and ecx,1073741824 - and ebx,268435968 - or ecx,ebx - and ecx,1342177280 - cmp ecx,1342177280 - je NEAR L$004AVX - test ebx,512 - jnz NEAR L$005SSSE3 -L$003no_xmm: +L$001no_xmm: sub eax,edi cmp eax,256 - jae NEAR L$006unrolled - jmp NEAR L$002loop + jae NEAR L$002unrolled + jmp NEAR L$003loop align 16 -L$002loop: +L$003loop: mov eax,DWORD [edi] mov ebx,DWORD [4+edi] mov ecx,DWORD [8+edi] @@ -127,7 +110,7 @@ L$002loop: mov DWORD [28+esp],ecx mov DWORD [32+esp],edi align 16 -L$00700_15: +L$00400_15: mov ecx,edx mov esi,DWORD [24+esp] ror ecx,14 @@ -165,11 +148,11 @@ L$00700_15: add ebp,4 add eax,ebx cmp esi,3248222580 - jne NEAR L$00700_15 + jne NEAR L$00400_15 mov ecx,DWORD [156+esp] - jmp NEAR L$00816_63 + jmp NEAR L$00516_63 align 16 -L$00816_63: +L$00516_63: mov ebx,ecx mov esi,DWORD [104+esp] ror ecx,11 @@ -224,7 +207,7 @@ L$00816_63: add ebp,4 add eax,ebx cmp esi,3329325298 - jne NEAR L$00816_63 + jne NEAR L$00516_63 mov esi,DWORD [356+esp] mov ebx,DWORD [8+esp] mov ecx,DWORD [16+esp] @@ -251,7 +234,7 @@ L$00816_63: lea esp,[356+esp] sub ebp,256 cmp edi,DWORD [8+esp] - jb NEAR L$002loop + jb NEAR L$003loop mov esp,DWORD [12+esp] pop edi pop esi @@ -259,7 +242,7 @@ L$00816_63: pop ebp ret align 64 -L$001K256: +L$K256: dd 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 dd 66051,67438087,134810123,202182159 db 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 @@ -268,7 +251,7 @@ db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 db 62,0 align 16 -L$006unrolled: +L$002unrolled: lea esp,[esp-96] mov eax,DWORD [esi] mov ebp,DWORD [4+esi] @@ -285,9 +268,9 @@ L$006unrolled: mov DWORD [20+esp],ebx mov DWORD [24+esp],ecx mov DWORD [28+esp],esi - jmp NEAR L$009grand_loop + jmp NEAR L$006grand_loop align 16 -L$009grand_loop: +L$006grand_loop: mov ebx,DWORD [edi] mov ecx,DWORD [4+edi] bswap ebx @@ -3167,15 +3150,37 @@ L$009grand_loop: mov DWORD [24+esp],ebx mov DWORD [28+esp],ecx cmp edi,DWORD [104+esp] - jb NEAR L$009grand_loop + jb NEAR L$006grand_loop mov esp,DWORD [108+esp] pop edi pop esi pop ebx pop ebp ret -align 32 -L$005SSSE3: +global _sha256_block_data_order_ssse3 +align 16 +_sha256_block_data_order_ssse3: +L$_sha256_block_data_order_ssse3_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov ebx,esp + call L$007pic_point +L$007pic_point: + pop ebp + lea ebp,[(L$K256-L$007pic_point)+ebp] + sub esp,16 + and esp,-64 + shl eax,6 + add eax,edi + mov DWORD [esp],esi + mov DWORD [4+esp],edi + mov DWORD [8+esp],eax + mov DWORD [12+esp],ebx lea esp,[esp-96] mov eax,DWORD [esi] mov ebx,DWORD [4+esi] @@ -3194,9 +3199,9 @@ L$005SSSE3: mov DWORD [24+esp],ecx mov DWORD [28+esp],esi movdqa xmm7,[256+ebp] - jmp NEAR L$010grand_ssse3 + jmp NEAR L$008grand_ssse3 align 16 -L$010grand_ssse3: +L$008grand_ssse3: movdqu xmm0,[edi] movdqu xmm1,[16+edi] movdqu xmm2,[32+edi] @@ -3219,9 +3224,9 @@ db 102,15,56,0,223 paddd xmm7,xmm3 movdqa [64+esp],xmm6 movdqa [80+esp],xmm7 - jmp NEAR L$011ssse3_00_47 + jmp NEAR L$009ssse3_00_47 align 16 -L$011ssse3_00_47: +L$009ssse3_00_47: add ebp,64 mov ecx,edx movdqa xmm4,xmm1 @@ -3864,7 +3869,7 @@ db 102,15,58,15,249,4 add eax,ecx movdqa [80+esp],xmm6 cmp DWORD [64+ebp],66051 - jne NEAR L$011ssse3_00_47 + jne NEAR L$009ssse3_00_47 mov ecx,edx ror edx,14 mov esi,DWORD [20+esp] @@ -4378,15 +4383,37 @@ db 102,15,58,15,249,4 movdqa xmm7,[64+ebp] sub ebp,192 cmp edi,DWORD [104+esp] - jb NEAR L$010grand_ssse3 + jb NEAR L$008grand_ssse3 mov esp,DWORD [108+esp] pop edi pop esi pop ebx pop ebp ret -align 32 -L$004AVX: +global _sha256_block_data_order_avx +align 16 +_sha256_block_data_order_avx: +L$_sha256_block_data_order_avx_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov ebx,esp + call L$010pic_point +L$010pic_point: + pop ebp + lea ebp,[(L$K256-L$010pic_point)+ebp] + sub esp,16 + and esp,-64 + shl eax,6 + add eax,edi + mov DWORD [esp],esi + mov DWORD [4+esp],edi + mov DWORD [8+esp],eax + mov DWORD [12+esp],ebx lea esp,[esp-96] vzeroall mov eax,DWORD [esi] @@ -4406,9 +4433,9 @@ L$004AVX: mov DWORD [24+esp],ecx mov DWORD [28+esp],esi vmovdqa xmm7,[256+ebp] - jmp NEAR L$012grand_avx + jmp NEAR L$011grand_avx align 32 -L$012grand_avx: +L$011grand_avx: vmovdqu xmm0,[edi] vmovdqu xmm1,[16+edi] vmovdqu xmm2,[32+edi] @@ -4427,9 +4454,9 @@ L$012grand_avx: vmovdqa [48+esp],xmm5 vmovdqa [64+esp],xmm6 vmovdqa [80+esp],xmm7 - jmp NEAR L$013avx_00_47 + jmp NEAR L$012avx_00_47 align 16 -L$013avx_00_47: +L$012avx_00_47: add ebp,64 vpalignr xmm4,xmm1,xmm0,4 mov ecx,edx @@ -5044,7 +5071,7 @@ L$013avx_00_47: add eax,ecx vmovdqa [80+esp],xmm6 cmp DWORD [64+ebp],66051 - jne NEAR L$013avx_00_47 + jne NEAR L$012avx_00_47 mov ecx,edx shrd edx,edx,14 mov esi,DWORD [20+esp] @@ -5558,7 +5585,7 @@ L$013avx_00_47: vmovdqa xmm7,[64+ebp] sub ebp,192 cmp edi,DWORD [104+esp] - jb NEAR L$012grand_avx + jb NEAR L$011grand_avx mov esp,DWORD [108+esp] vzeroall pop edi @@ -5566,8 +5593,6 @@ L$013avx_00_47: pop ebx pop ebp ret -segment .bss -common _OPENSSL_ia32cap_P 16 %else ; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 ret