Skip to content

Commit

Permalink
Upstream merge 2024 10 17 (#1934)
Browse files Browse the repository at this point in the history
By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 license and the ISC license.
  • Loading branch information
torben-hansen authored Oct 22, 2024
2 parents 238f7a9 + 328ef55 commit 8128380
Show file tree
Hide file tree
Showing 23 changed files with 822 additions and 642 deletions.
54 changes: 9 additions & 45 deletions crypto/fipsmodule/sha/asm/sha1-586.pl
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,6 @@
# been tested.
$shaext = 0;

&external_label("OPENSSL_ia32cap_P") if ($xmm);


$A="eax";
$B="ebx";
Expand Down Expand Up @@ -322,40 +320,9 @@ sub BODY_40_59
}
}

&function_begin("sha1_block_data_order");
if ($xmm) {
&static_label("shaext_shortcut") if ($shaext);
&static_label("ssse3_shortcut");
&static_label("avx_shortcut") if ($ymm);
&static_label("K_XX_XX");
&static_label("K_XX_XX");

&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($tmp1);
&picmeup($T,"OPENSSL_ia32cap_P",$tmp1,&label("pic_point"));
&lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));

&mov ($A,&DWP(0,$T));
&mov ($D,&DWP(4,$T));
&test ($D,1<<9); # check SSSE3 bit
&jz (&label("x86"));
&mov ($C,&DWP(8,$T));
&test ($A,1<<24); # check FXSR bit
&jz (&label("x86"));
if ($shaext) {
&test ($C,1<<29); # check SHA bit
&jnz (&label("shaext_shortcut"));
}
if ($ymm) {
&and ($D,1<<28); # mask AVX bit
&and ($A,1<<30); # mask "Intel CPU" bit
&or ($A,$D);
&cmp ($A,1<<28|1<<30);
&je (&label("avx_shortcut"));
}
&jmp (&label("ssse3_shortcut"));
&set_label("x86",16);
}
&function_begin("sha1_block_data_order_nohw");
&mov($tmp1,&wparam(0)); # SHA_CTX *c
&mov($T,&wparam(1)); # const void *input
&mov($A,&wparam(2)); # size_t num
Expand Down Expand Up @@ -421,7 +388,7 @@ sub BODY_40_59
&jb(&label("loop"));

&stack_pop(16+3);
&function_end("sha1_block_data_order");
&function_end("sha1_block_data_order_nohw");

if ($xmm) {
if ($shaext) {
Expand All @@ -446,12 +413,11 @@ sub sha1op38 {
sub sha1msg1 { sha1op38(0xc9,@_); }
sub sha1msg2 { sha1op38(0xca,@_); }

&function_begin("_sha1_block_data_order_shaext");
&function_begin("sha1_block_data_order_shaext");
&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($tmp1);
&lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
&set_label("shaext_shortcut");
&mov ($ctx,&wparam(0));
&mov ("ebx","esp");
&mov ($inp,&wparam(1));
Expand Down Expand Up @@ -533,7 +499,7 @@ sub sha1op38 {
&movdqu (&QWP(0,$ctx),$ABCD)
&movd (&DWP(16,$ctx),$E);
&mov ("esp","ebx");
&function_end("_sha1_block_data_order_shaext");
&function_end("sha1_block_data_order_shaext");
}
######################################################################
# The SSSE3 implementation.
Expand Down Expand Up @@ -569,12 +535,11 @@ sub sha1op38 {
my $_rol=sub { &rol(@_) };
my $_ror=sub { &ror(@_) };

&function_begin("_sha1_block_data_order_ssse3");
&function_begin("sha1_block_data_order_ssse3");
&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($tmp1);
&lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
&set_label("ssse3_shortcut");

&movdqa (@X[3],&QWP(0,$tmp1)); # K_00_19
&movdqa (@X[4],&QWP(16,$tmp1)); # K_20_39
Expand Down Expand Up @@ -1097,7 +1062,7 @@ ()
&mov (&DWP(12,@T[1]),$D);
&mov (&DWP(16,@T[1]),$E);

&function_end("_sha1_block_data_order_ssse3");
&function_end("sha1_block_data_order_ssse3");

$rx=0; # reset

Expand All @@ -1112,12 +1077,11 @@ ()
my $_rol=sub { &shld(@_[0],@_) };
my $_ror=sub { &shrd(@_[0],@_) };

&function_begin("_sha1_block_data_order_avx");
&function_begin("sha1_block_data_order_avx");
&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($tmp1);
&lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
&set_label("avx_shortcut");
&vzeroall();

&vmovdqa(@X[3],&QWP(0,$tmp1)); # K_00_19
Expand Down Expand Up @@ -1470,7 +1434,7 @@ ()
&mov (&DWP(8,@T[1]),$C);
&mov (&DWP(12,@T[1]),$D);
&mov (&DWP(16,@T[1]),$E);
&function_end("_sha1_block_data_order_avx");
&function_end("sha1_block_data_order_avx");
}
&set_label("K_XX_XX",64);
&data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999); # K_00_19
Expand Down
159 changes: 120 additions & 39 deletions crypto/fipsmodule/sha/asm/sha256-586.pl
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
# versions, but BoringSSL is intended to be used with pre-generated perlasm
# output, so this isn't useful anyway.
#
# TODO(davidben): Enable AVX2 code after testing by setting $avx to 2.
# TODO(davidben): Enable AVX+BMI2 code after testing by setting $avx to 2.
$avx = 1;

$avx = 0 unless ($xmm);
Expand Down Expand Up @@ -190,9 +190,9 @@ ()
&add ($A,$T); # h += T
}

&external_label("OPENSSL_ia32cap_P") if (!$i386);
&static_label("K256");

&function_begin("sha256_block_data_order");
&function_begin("sha256_block_data_order_nohw");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
Expand All @@ -213,28 +213,6 @@ ()
&mov (&DWP(8,"esp"),"eax"); # inp+num*128
&mov (&DWP(12,"esp"),"ebx"); # saved sp
if (!$i386 && $xmm) {
&picmeup("edx","OPENSSL_ia32cap_P",$K256,&label("K256"));
&mov ("ecx",&DWP(0,"edx"));
&mov ("ebx",&DWP(4,"edx"));
&test ("ecx",1<<20); # check for P4
&jnz (&label("loop"));
&mov ("edx",&DWP(8,"edx")) if ($xmm);
&test ("ecx",1<<24); # check for FXSR
&jz ($unroll_after?&label("no_xmm"):&label("loop"));
&and ("ecx",1<<30); # mask "Intel CPU" bit
&and ("ebx",1<<28|1<<9); # mask AVX and SSSE3 bits
&test ("edx",1<<29) if ($shaext); # check for SHA
&jnz (&label("shaext")) if ($shaext);
&or ("ecx","ebx");
&and ("ecx",1<<28|1<<30);
&cmp ("ecx",1<<28|1<<30);
if ($xmm) {
&je (&label("AVX")) if ($avx);
&test ("ebx",1<<9); # check for SSSE3
&jnz (&label("SSSE3"));
} else {
&je (&label("loop_shrd"));
}
if ($unroll_after) {
&set_label("no_xmm");
&sub ("eax","edi");
Expand Down Expand Up @@ -522,6 +500,8 @@ ()
&mov ("esp",&DWP(96+12,"esp")); # restore sp
&function_end_A();
}
&function_end_B("sha256_block_data_order_nohw");

if (!$i386 && $xmm) {{{
if ($shaext) {
######################################################################
Expand All @@ -540,7 +520,33 @@ sub sha256op38 {
sub sha256msg1 { sha256op38(0xcc,@_); }
sub sha256msg2 { sha256op38(0xcd,@_); }

&set_label("shaext",32);
&function_begin("sha256_block_data_order_hw");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
&mov ("ebx","esp"); # saved sp

&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($K256);
&lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256));

&sub ("esp",16);
&and ("esp",-64);

&shl ("eax",6);
&add ("eax","edi");
&mov (&DWP(0,"esp"),"esi"); # ctx
&mov (&DWP(4,"esp"),"edi"); # inp
&mov (&DWP(8,"esp"),"eax"); # inp+num*128
&mov (&DWP(12,"esp"),"ebx"); # saved sp

# TODO(davidben): The preamble above this point comes from the original
# merged sha256_block_data_order function, which performed some common
# setup and then jumped to the particular SHA-256 implementation. The
# parts of the preamble that do not apply to this function can be
# removed.

&sub ("esp",32);

&movdqu ($ABEF,&QWP(0,$ctx)); # DCBA
Expand Down Expand Up @@ -660,14 +666,40 @@ sub sha256op38 {
&mov ("esp",&DWP(32+12,"esp"));
&movdqu (&QWP(0,$ctx),$ABEF);
&movdqu (&QWP(16,$ctx),$CDGH);
&function_end_A();
&function_end("sha256_block_data_order_shaext");
}

my @X = map("xmm$_",(0..3));
my ($t0,$t1,$t2,$t3) = map("xmm$_",(4..7));
my @AH = ($A,$T);

&set_label("SSSE3",32);
&function_begin("sha256_block_data_order_ssse3");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
&mov ("ebx","esp"); # saved sp

&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($K256);
&lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256));

&sub ("esp",16);
&and ("esp",-64);

&shl ("eax",6);
&add ("eax","edi");
&mov (&DWP(0,"esp"),"esi"); # ctx
&mov (&DWP(4,"esp"),"edi"); # inp
&mov (&DWP(8,"esp"),"eax"); # inp+num*128
&mov (&DWP(12,"esp"),"ebx"); # saved sp

# TODO(davidben): The preamble above this point comes from the original
# merged sha256_block_data_order function, which performed some common
# setup and then jumped to the particular SHA-256 implementation. The
# parts of the preamble that do not apply to this function can be
# removed.

&lea ("esp",&DWP(-96,"esp"));
# copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack
&mov ($AH[0],&DWP(0,"esi"));
Expand Down Expand Up @@ -975,14 +1007,36 @@ ()
&jb (&label("grand_ssse3"));

&mov ("esp",&DWP(96+12,"esp")); # restore sp
&function_end_A();
&function_end("sha256_block_data_order_ssse3");

if ($avx) {
&set_label("AVX",32);
if ($avx>1) {
&and ("edx",1<<8|1<<3); # check for BMI2+BMI1
&cmp ("edx",1<<8|1<<3);
&je (&label("AVX_BMI"));
}
&function_begin("sha256_block_data_order_avx");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
&mov ("ebx","esp"); # saved sp

&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($K256);
&lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256));

&sub ("esp",16);
&and ("esp",-64);

&shl ("eax",6);
&add ("eax","edi");
&mov (&DWP(0,"esp"),"esi"); # ctx
&mov (&DWP(4,"esp"),"edi"); # inp
&mov (&DWP(8,"esp"),"eax"); # inp+num*128
&mov (&DWP(12,"esp"),"ebx"); # saved sp

# TODO(davidben): The preamble above this point comes from the original
# merged sha256_block_data_order function, which performed some common
# setup and then jumped to the particular SHA-256 implementation. The
# parts of the preamble that do not apply to this function can be
# removed.

&lea ("esp",&DWP(-96,"esp"));
&vzeroall ();
# copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack
Expand Down Expand Up @@ -1142,7 +1196,8 @@ ()

&mov ("esp",&DWP(96+12,"esp")); # restore sp
&vzeroall ();
&function_end_A();
&function_end("sha256_block_data_order_avx");

if ($avx>1) {
sub bodyx_00_15 () { # +10%
(
Expand Down Expand Up @@ -1179,7 +1234,34 @@ ()
);
}

&set_label("AVX_BMI",32);
# If enabled, this function should be gated on AVX, BMI1, and BMI2.
&function_begin("sha256_block_data_order_avx_bmi");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
&mov ("ebx","esp"); # saved sp

&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($K256);
&lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256));

&sub ("esp",16);
&and ("esp",-64);

&shl ("eax",6);
&add ("eax","edi");
&mov (&DWP(0,"esp"),"esi"); # ctx
&mov (&DWP(4,"esp"),"edi"); # inp
&mov (&DWP(8,"esp"),"eax"); # inp+num*128
&mov (&DWP(12,"esp"),"ebx"); # saved sp

# TODO(davidben): The preamble above this point comes from the original
# merged sha256_block_data_order function, which performed some common
# setup and then jumped to the particular SHA-256 implementation. The
# parts of the preamble that do not apply to this function can be
# removed.

&lea ("esp",&DWP(-96,"esp"));
&vzeroall ();
# copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack
Expand Down Expand Up @@ -1283,11 +1365,10 @@ ()

&mov ("esp",&DWP(96+12,"esp")); # restore sp
&vzeroall ();
&function_end_A();
&function_end("sha256_block_data_order_avx_bmi");
}
}
}}}
&function_end_B("sha256_block_data_order");

&asm_finish();

Expand Down
Loading

0 comments on commit 8128380

Please sign in to comment.