Skip to content

Commit

Permalink
Merge pull request #7460 from douzzer/20240423-linuxkm-sha-2-3-asm-sa…
Browse files Browse the repository at this point in the history
…ve-vector-regs

20240423-linuxkm-sha-2-3-asm-save-vector-regs
  • Loading branch information
SparkiDev authored Apr 23, 2024
2 parents a75c2be + 5d9154e commit 9d79135
Show file tree
Hide file tree
Showing 4 changed files with 333 additions and 5 deletions.
159 changes: 157 additions & 2 deletions wolfcrypt/src/sha256.c
Original file line number Diff line number Diff line change
Expand Up @@ -371,25 +371,172 @@ static int InitSha256(wc_Sha256* sha256)
} /* extern "C" */
#endif

static word32 intel_flags;
static int Transform_Sha256_is_vectorized = 0;

#ifdef WC_NO_INTERNAL_FUNCTION_POINTERS

static enum { SHA256_UNSET, SHA256_AVX1, SHA256_AVX2, SHA256_AVX1_RORX,
SHA256_AVX2_RORX, SHA256_SSE2, SHA256_C }
sha_method = SHA256_UNSET;

static void Sha256_SetTransform(void)
{

if (sha_method != SHA256_UNSET)
return;

intel_flags = cpuid_get_flags();

if (IS_INTEL_SHA(intel_flags)) {
#ifdef HAVE_INTEL_AVX1
if (IS_INTEL_AVX1(intel_flags)) {
sha_method = SHA256_AVX1;
Transform_Sha256_is_vectorized = 1;
}
else
#endif
{
sha_method = SHA256_SSE2;
Transform_Sha256_is_vectorized = 1;
}
}
else
#ifdef HAVE_INTEL_AVX2
if (IS_INTEL_AVX2(intel_flags)) {
#ifdef HAVE_INTEL_RORX
if (IS_INTEL_BMI2(intel_flags)) {
sha_method = SHA256_AVX2_RORX;
Transform_Sha256_is_vectorized = 1;
}
else
#endif
{
sha_method = SHA256_AVX2;
Transform_Sha256_is_vectorized = 1;
}
}
else
#endif
#ifdef HAVE_INTEL_AVX1
if (IS_INTEL_AVX1(intel_flags)) {
#ifdef HAVE_INTEL_RORX
if (IS_INTEL_BMI2(intel_flags)) {
sha_method = SHA256_AVX1_RORX;
Transform_Sha256_is_vectorized = 1;
}
else
#endif
{
sha_method = SHA256_AVX1;
Transform_Sha256_is_vectorized = 1;
}
}
else
#endif
{
sha_method = SHA256_C;
Transform_Sha256_is_vectorized = 0;
}
}

static WC_INLINE int inline_XTRANSFORM(wc_Sha256* S, const byte* D) {
int ret;
if (sha_method == SHA256_C)
return Transform_Sha256(S, D);
SAVE_VECTOR_REGISTERS(return _svr_ret;);
switch (sha_method) {
case SHA256_AVX2:
ret = Transform_Sha256_AVX2(S, D);
break;
case SHA256_AVX2_RORX:
ret = Transform_Sha256_AVX2_RORX(S, D);
break;
case SHA256_AVX1:
ret = Transform_Sha256_AVX1_Sha(S, D);
break;
case SHA256_AVX1_RORX:
ret = Transform_Sha256_AVX1_RORX(S, D);
break;
case SHA256_SSE2:
ret = Transform_Sha256_SSE2_Sha(S, D);
break;
case SHA256_C:
case SHA256_UNSET:
default:
ret = Transform_Sha256(S, D);
break;
}
RESTORE_VECTOR_REGISTERS();
return ret;
}
#define XTRANSFORM(...) inline_XTRANSFORM(__VA_ARGS__)

static WC_INLINE int inline_XTRANSFORM_LEN(wc_Sha256* S, const byte* D, word32 L) {
int ret;
SAVE_VECTOR_REGISTERS(return _svr_ret;);
switch (sha_method) {
case SHA256_AVX2:
ret = Transform_Sha256_AVX2_Len(S, D, L);
break;
case SHA256_AVX2_RORX:
ret = Transform_Sha256_AVX2_RORX_Len(S, D, L);
break;
case SHA256_AVX1:
ret = Transform_Sha256_AVX1_Sha_Len(S, D, L);
break;
case SHA256_AVX1_RORX:
ret = Transform_Sha256_AVX1_RORX_Len(S, D, L);
break;
case SHA256_SSE2:
ret = Transform_Sha256_SSE2_Sha_Len(S, D, L);
break;
case SHA256_C:
case SHA256_UNSET:
default:
ret = 0;
break;
}
RESTORE_VECTOR_REGISTERS();
return ret;
}
#define XTRANSFORM_LEN(...) inline_XTRANSFORM_LEN(__VA_ARGS__)

#else /* !WC_NO_INTERNAL_FUNCTION_POINTERS */

static int (*Transform_Sha256_p)(wc_Sha256* sha256, const byte* data);
/* = _Transform_Sha256 */
static int (*Transform_Sha256_Len_p)(wc_Sha256* sha256, const byte* data,
word32 len);
/* = NULL */
static int transform_check = 0;
static word32 intel_flags;
static int Transform_Sha256_is_vectorized = 0;

static WC_INLINE int inline_XTRANSFORM(wc_Sha256* S, const byte* D) {
int ret;
#ifdef WOLFSSL_LINUXKM
if (Transform_Sha256_is_vectorized)
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#endif
ret = (*Transform_Sha256_p)(S, D);
#ifdef WOLFSSL_LINUXKM
if (Transform_Sha256_is_vectorized)
RESTORE_VECTOR_REGISTERS();
#endif
return ret;
}
#define XTRANSFORM(...) inline_XTRANSFORM(__VA_ARGS__)

static WC_INLINE int inline_XTRANSFORM_LEN(wc_Sha256* S, const byte* D, word32 L) {
int ret;
#ifdef WOLFSSL_LINUXKM
if (Transform_Sha256_is_vectorized)
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#endif
ret = (*Transform_Sha256_Len_p)(S, D, L);
#ifdef WOLFSSL_LINUXKM
if (Transform_Sha256_is_vectorized)
RESTORE_VECTOR_REGISTERS();
#endif
return ret;
}
#define XTRANSFORM_LEN(...) inline_XTRANSFORM_LEN(__VA_ARGS__)
Expand Down Expand Up @@ -463,6 +610,8 @@ static int InitSha256(wc_Sha256* sha256)
transform_check = 1;
}

#endif /* !WC_NO_INTERNAL_FUNCTION_POINTERS */

#if !defined(WOLFSSL_KCAPI_HASH)
int wc_InitSha256_ex(wc_Sha256* sha256, void* heap, int devId)
{
Expand Down Expand Up @@ -1162,7 +1311,13 @@ static int InitSha256(wc_Sha256* sha256)
#ifdef XTRANSFORM_LEN
#if defined(WOLFSSL_X86_64_BUILD) && defined(USE_INTEL_SPEEDUP) && \
(defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2))

#ifdef WC_NO_INTERNAL_FUNCTION_POINTERS
if (sha_method != SHA256_C)
#else
if (Transform_Sha256_Len_p != NULL)
#endif

#endif
{
if (len >= WC_SHA256_BLOCK_SIZE) {
Expand Down
35 changes: 35 additions & 0 deletions wolfcrypt/src/sha3.c
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,10 @@ static int Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p)
word32 i;
word32 blocks;

#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP)
if (sha3_block == sha3_block_avx2)
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#endif
if (sha3->i > 0) {
byte *t;
byte l = (byte)(p * 8 - sha3->i);
Expand Down Expand Up @@ -699,6 +703,10 @@ static int Sha3Update(wc_Sha3* sha3, const byte* data, word32 len, byte p)
len -= p * 8;
data += p * 8;
}
#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP)
if (sha3_block == sha3_block_avx2)
RESTORE_VECTOR_REGISTERS();
#endif
XMEMCPY(sha3->t, data, len);
sha3->i += (byte)len;

Expand Down Expand Up @@ -732,6 +740,12 @@ static int Sha3Final(wc_Sha3* sha3, byte padChar, byte* hash, byte p, word32 l)
for (i = 0; i < p; i++) {
sha3->s[i] ^= Load64BitBigEndian(sha3->t + 8 * i);
}

#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP)
if (sha3_block == sha3_block_avx2)
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#endif

for (j = 0; l - j >= rate; j += rate) {
#ifdef USE_INTEL_SPEEDUP
(*sha3_block)(sha3->s);
Expand All @@ -755,6 +769,11 @@ static int Sha3Final(wc_Sha3* sha3, byte padChar, byte* hash, byte p, word32 l)
#endif
XMEMCPY(hash + j, sha3->s, l - j);
}
#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP)
if (sha3_block == sha3_block_avx2)
RESTORE_VECTOR_REGISTERS();
#endif

return 0;
}

Expand Down Expand Up @@ -1328,6 +1347,10 @@ int wc_Shake128_Absorb(wc_Shake* shake, const byte* data, word32 len)
*/
int wc_Shake128_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt)
{
#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP)
if (sha3_block == sha3_block_avx2)
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#endif
for (; (blockCnt > 0); blockCnt--) {
#ifdef USE_INTEL_SPEEDUP
(*sha3_block)(shake->s);
Expand All @@ -1341,6 +1364,10 @@ int wc_Shake128_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt)
#endif
out += WC_SHA3_128_COUNT * 8;
}
#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP)
if (sha3_block == sha3_block_avx2)
RESTORE_VECTOR_REGISTERS();
#endif

return 0;
}
Expand Down Expand Up @@ -1458,6 +1485,10 @@ int wc_Shake256_Absorb(wc_Shake* shake, const byte* data, word32 len)
*/
int wc_Shake256_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt)
{
#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP)
if (sha3_block == sha3_block_avx2)
SAVE_VECTOR_REGISTERS(return _svr_ret;);
#endif
for (; (blockCnt > 0); blockCnt--) {
#ifdef USE_INTEL_SPEEDUP
(*sha3_block)(shake->s);
Expand All @@ -1471,6 +1502,10 @@ int wc_Shake256_SqueezeBlocks(wc_Shake* shake, byte* out, word32 blockCnt)
#endif
out += WC_SHA3_256_COUNT * 8;
}
#if defined(WOLFSSL_LINUXKM) && defined(USE_INTEL_SPEEDUP)
if (sha3_block == sha3_block_avx2)
RESTORE_VECTOR_REGISTERS();
#endif

return 0;
}
Expand Down
Loading

0 comments on commit 9d79135

Please sign in to comment.