diff --git a/trezor-crypto/crypto/scrypt.c b/trezor-crypto/crypto/scrypt.c index c1856dcbda5..1049c517b53 100644 --- a/trezor-crypto/crypto/scrypt.c +++ b/trezor-crypto/crypto/scrypt.c @@ -25,6 +25,8 @@ * * This file was originally written by Colin Percival as part of the Tarsnap * online backup system. + * + * This file was copied from https://github.com/Tarsnap/scrypt/blob/fbd5b105b75da42cbdf91ca55387724e312499b5/lib/crypto/crypto_scrypt-ref.c */ #include @@ -33,44 +35,33 @@ #include #include -#include -#ifndef _WIN32 -#include -#endif #include #include #include -#include -static void blkcpy(void *, void *, size_t); -static void blkxor(void *, void *, size_t); -static void salsa20_8(uint32_t[16]); -static void blockmix_salsa8(uint32_t *, uint32_t *, uint32_t *, size_t); -static uint64_t integerify(void *, size_t); -static void smix(uint8_t *, size_t, uint64_t, uint32_t *, uint32_t *); +static void blkcpy(uint8_t *, uint8_t *, size_t); +static void blkxor(uint8_t *, uint8_t *, size_t); +static void salsa20_8(uint8_t[64]); +static void blockmix_salsa8(uint8_t *, uint8_t *, size_t); +static uint64_t integerify(uint8_t *, size_t); +static void smix(uint8_t *, size_t, uint64_t, uint8_t *, uint8_t *); static void -blkcpy(void * dest, void * src, size_t len) +blkcpy(uint8_t * dest, uint8_t * src, size_t len) { - size_t * D = dest; - size_t * S = src; - size_t L = len / sizeof(size_t); - size_t i; + size_t i; - for (i = 0; i < L; i++) - D[i] = S[i]; + for (i = 0; i < len; i++) + dest[i] = src[i]; } static void -blkxor(void * dest, void * src, size_t len) +blkxor(uint8_t * dest, uint8_t * src, size_t len) { - size_t * D = dest; - size_t * S = src; - size_t L = len / sizeof(size_t); - size_t i; + size_t i; - for (i = 0; i < L; i++) - D[i] ^= S[i]; + for (i = 0; i < len; i++) + dest[i] ^= src[i]; } /** @@ -78,77 +69,87 @@ blkxor(void * dest, void * src, size_t len) * Apply the salsa20/8 core to the provided block. */ static void -salsa20_8(uint32_t B[16]) +salsa20_8(uint8_t B[64]) { - uint32_t x[16]; - size_t i; - - blkcpy(x, B, 64); - for (i = 0; i < 8; i += 2) { + uint32_t B32[16]; + uint32_t x[16]; + size_t i; + + /* Convert little-endian values in. */ + for (i = 0; i < 16; i++) + B32[i] = le32dec(&B[i * 4]); + + /* Compute x = doubleround^4(B32). */ + for (i = 0; i < 16; i++) + x[i] = B32[i]; + for (i = 0; i < 8; i += 2) { #define R(a,b) (((a) << (b)) | ((a) >> (32 - (b)))) - /* Operate on columns. */ - x[ 4] ^= R(x[ 0]+x[12], 7); x[ 8] ^= R(x[ 4]+x[ 0], 9); - x[12] ^= R(x[ 8]+x[ 4],13); x[ 0] ^= R(x[12]+x[ 8],18); + /* Operate on columns. */ + x[ 4] ^= R(x[ 0]+x[12], 7); x[ 8] ^= R(x[ 4]+x[ 0], 9); + x[12] ^= R(x[ 8]+x[ 4],13); x[ 0] ^= R(x[12]+x[ 8],18); - x[ 9] ^= R(x[ 5]+x[ 1], 7); x[13] ^= R(x[ 9]+x[ 5], 9); - x[ 1] ^= R(x[13]+x[ 9],13); x[ 5] ^= R(x[ 1]+x[13],18); + x[ 9] ^= R(x[ 5]+x[ 1], 7); x[13] ^= R(x[ 9]+x[ 5], 9); + x[ 1] ^= R(x[13]+x[ 9],13); x[ 5] ^= R(x[ 1]+x[13],18); - x[14] ^= R(x[10]+x[ 6], 7); x[ 2] ^= R(x[14]+x[10], 9); - x[ 6] ^= R(x[ 2]+x[14],13); x[10] ^= R(x[ 6]+x[ 2],18); + x[14] ^= R(x[10]+x[ 6], 7); x[ 2] ^= R(x[14]+x[10], 9); + x[ 6] ^= R(x[ 2]+x[14],13); x[10] ^= R(x[ 6]+x[ 2],18); - x[ 3] ^= R(x[15]+x[11], 7); x[ 7] ^= R(x[ 3]+x[15], 9); - x[11] ^= R(x[ 7]+x[ 3],13); x[15] ^= R(x[11]+x[ 7],18); + x[ 3] ^= R(x[15]+x[11], 7); x[ 7] ^= R(x[ 3]+x[15], 9); + x[11] ^= R(x[ 7]+x[ 3],13); x[15] ^= R(x[11]+x[ 7],18); - /* Operate on rows. */ - x[ 1] ^= R(x[ 0]+x[ 3], 7); x[ 2] ^= R(x[ 1]+x[ 0], 9); - x[ 3] ^= R(x[ 2]+x[ 1],13); x[ 0] ^= R(x[ 3]+x[ 2],18); + /* Operate on rows. */ + x[ 1] ^= R(x[ 0]+x[ 3], 7); x[ 2] ^= R(x[ 1]+x[ 0], 9); + x[ 3] ^= R(x[ 2]+x[ 1],13); x[ 0] ^= R(x[ 3]+x[ 2],18); - x[ 6] ^= R(x[ 5]+x[ 4], 7); x[ 7] ^= R(x[ 6]+x[ 5], 9); - x[ 4] ^= R(x[ 7]+x[ 6],13); x[ 5] ^= R(x[ 4]+x[ 7],18); + x[ 6] ^= R(x[ 5]+x[ 4], 7); x[ 7] ^= R(x[ 6]+x[ 5], 9); + x[ 4] ^= R(x[ 7]+x[ 6],13); x[ 5] ^= R(x[ 4]+x[ 7],18); - x[11] ^= R(x[10]+x[ 9], 7); x[ 8] ^= R(x[11]+x[10], 9); - x[ 9] ^= R(x[ 8]+x[11],13); x[10] ^= R(x[ 9]+x[ 8],18); + x[11] ^= R(x[10]+x[ 9], 7); x[ 8] ^= R(x[11]+x[10], 9); + x[ 9] ^= R(x[ 8]+x[11],13); x[10] ^= R(x[ 9]+x[ 8],18); - x[12] ^= R(x[15]+x[14], 7); x[13] ^= R(x[12]+x[15], 9); - x[14] ^= R(x[13]+x[12],13); x[15] ^= R(x[14]+x[13],18); + x[12] ^= R(x[15]+x[14], 7); x[13] ^= R(x[12]+x[15], 9); + x[14] ^= R(x[13]+x[12],13); x[15] ^= R(x[14]+x[13],18); #undef R - } - for (i = 0; i < 16; i++) - B[i] += x[i]; + } + + /* Compute B32 = B32 + x. */ + for (i = 0; i < 16; i++) + B32[i] += x[i]; + + /* Convert little-endian values out. */ + for (i = 0; i < 16; i++) + le32enc(&B[4 * i], B32[i]); } /** - * blockmix_salsa8(Bin, Bout, X, r): - * Compute Bout = BlockMix_{salsa20/8, r}(Bin). The input Bin must be 128r - * bytes in length; the output Bout must also be the same size. The - * temporary space X must be 64 bytes. + * blockmix_salsa8(B, Y, r): + * Compute B = BlockMix_{salsa20/8, r}(B). The input B must be 128r bytes in + * length; the temporary space Y must also be the same size. */ static void -blockmix_salsa8(uint32_t * Bin, uint32_t * Bout, uint32_t * X, size_t r) +blockmix_salsa8(uint8_t * B, uint8_t * Y, size_t r) { - size_t i; - - /* 1: X <-- B_{2r - 1} */ - blkcpy(X, &Bin[(2 * r - 1) * 16], 64); - - /* 2: for i = 0 to 2r - 1 do */ - for (i = 0; i < 2 * r; i += 2) { - /* 3: X <-- H(X \xor B_i) */ - blkxor(X, &Bin[i * 16], 64); - salsa20_8(X); - - /* 4: Y_i <-- X */ - /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ - blkcpy(&Bout[i * 8], X, 64); - - /* 3: X <-- H(X \xor B_i) */ - blkxor(X, &Bin[i * 16 + 16], 64); - salsa20_8(X); - - /* 4: Y_i <-- X */ - /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ - blkcpy(&Bout[i * 8 + r * 16], X, 64); - } + uint8_t X[64]; + size_t i; + + /* 1: X <-- B_{2r - 1} */ + blkcpy(X, &B[(2 * r - 1) * 64], 64); + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < 2 * r; i++) { + /* 3: X <-- H(X \xor B_i) */ + blkxor(X, &B[i * 64], 64); + salsa20_8(X); + + /* 4: Y_i <-- X */ + blkcpy(&Y[i * 64], X, 64); + } + + /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ + for (i = 0; i < r; i++) + blkcpy(&B[i * 64], &Y[(i * 2) * 64], 64); + for (i = 0; i < r; i++) + blkcpy(&B[(i + r) * 64], &Y[(i * 2 + 1) * 64], 64); } /** @@ -156,70 +157,51 @@ blockmix_salsa8(uint32_t * Bin, uint32_t * Bout, uint32_t * X, size_t r) * Return the result of parsing B_{2r-1} as a little-endian integer. */ static uint64_t -integerify(void * B, size_t r) +integerify(uint8_t * B, size_t r) { - uint32_t * X = (void *)((uintptr_t)(B) + (2 * r - 1) * 64); + uint8_t * X = &B[(2 * r - 1) * 64]; - return (((uint64_t)(X[1]) << 32) + X[0]); + return (le64dec(X)); } /** * smix(B, r, N, V, XY): - * Compute B = SMix_r(B, N). The input B must be 128r bytes in length; - * the temporary storage V must be 128rN bytes in length; the temporary - * storage XY must be 256r + 64 bytes in length. The value N must be a - * power of 2 greater than 1. The arrays B, V, and XY must be aligned to a - * multiple of 64 bytes. + * Compute B = SMix_r(B, N). The input B must be 128r bytes in length; the + * temporary storage V must be 128rN bytes in length; the temporary storage + * XY must be 256r bytes in length. The value N must be a power of 2. */ static void -smix(uint8_t * B, size_t r, uint64_t N, uint32_t * V, uint32_t * XY) +smix(uint8_t * B, size_t r, uint64_t N, uint8_t * V, uint8_t * XY) { - uint32_t * X = XY; - uint32_t * Y = &XY[32 * r]; - uint32_t * Z = &XY[64 * r]; - uint64_t i; - uint64_t j; - size_t k; - - /* 1: X <-- B */ - for (k = 0; k < 32 * r; k++) - X[k] = le32dec(&B[4 * k]); - - /* 2: for i = 0 to N - 1 do */ - for (i = 0; i < N; i += 2) { - /* 3: V_i <-- X */ - blkcpy(&V[i * (32 * r)], X, 128 * r); - - /* 4: X <-- H(X) */ - blockmix_salsa8(X, Y, Z, r); - - /* 3: V_i <-- X */ - blkcpy(&V[(i + 1) * (32 * r)], Y, 128 * r); - - /* 4: X <-- H(X) */ - blockmix_salsa8(Y, X, Z, r); - } - - /* 6: for i = 0 to N - 1 do */ - for (i = 0; i < N; i += 2) { - /* 7: j <-- Integerify(X) mod N */ - j = integerify(X, r) & (N - 1); - - /* 8: X <-- H(X \xor V_j) */ - blkxor(X, &V[j * (32 * r)], 128 * r); - blockmix_salsa8(X, Y, Z, r); - - /* 7: j <-- Integerify(X) mod N */ - j = integerify(Y, r) & (N - 1); - - /* 8: X <-- H(X \xor V_j) */ - blkxor(Y, &V[j * (32 * r)], 128 * r); - blockmix_salsa8(Y, X, Z, r); - } - - /* 10: B' <-- X */ - for (k = 0; k < 32 * r; k++) - le32enc(&B[4 * k], X[k]); + uint8_t * X = XY; + uint8_t * Y = &XY[128 * r]; + uint64_t i; + uint64_t j; + + /* 1: X <-- B */ + blkcpy(X, B, 128 * r); + + /* 2: for i = 0 to N - 1 do */ + for (i = 0; i < N; i++) { + /* 3: V_i <-- X */ + blkcpy(&V[i * (128 * r)], X, 128 * r); + + /* 4: X <-- H(X) */ + blockmix_salsa8(X, Y, r); + } + + /* 6: for i = 0 to N - 1 do */ + for (i = 0; i < N; i++) { + /* 7: j <-- Integerify(X) mod N */ + j = integerify(X, r) & (N - 1); + + /* 8: X <-- H(X \xor V_j) */ + blkxor(X, &V[j * (128 * r)], 128 * r); + blockmix_salsa8(X, Y, r); + } + + /* 10: B' <-- X */ + blkcpy(B, X, 128 * r); } /** @@ -227,117 +209,78 @@ smix(uint8_t * B, size_t r, uint64_t N, uint32_t * V, uint32_t * XY) * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, * p, buflen) and write the result into buf. The parameters r, p, and buflen * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N - * must be a power of 2 greater than 1. + * must be a power of 2. * - * Return 0 on success; or -1 on error + * Return 0 on success; or -1 on error. */ int scrypt(const uint8_t * passwd, size_t passwdlen, - const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p, - uint8_t * buf, size_t buflen) + const uint8_t * salt, size_t saltlen, uint64_t N, uint32_t _r, uint32_t _p, + uint8_t * buf, size_t buflen) { - void * B0, * V0, * XY0; - uint8_t * B; - uint32_t * V; - uint32_t * XY; - uint32_t i; + uint8_t * B; + uint8_t * V; + uint8_t * XY; + size_t r = _r, p = _p; + uint32_t i; - /* Sanity-check parameters. */ + /* Sanity-check parameters. */ #if SIZE_MAX > UINT32_MAX - if (buflen > (((uint64_t)(1) << 32) - 1) * 32) { - errno = EFBIG; - goto err0; - } -#endif - if ((uint64_t)(r) * (uint64_t)(p) >= (1 << 30)) { - errno = EFBIG; - goto err0; - } - if (r == 0 || p == 0) { - errno = EINVAL; - goto err0; - } - if (((N & (N - 1)) != 0) || (N < 2)) { - errno = EINVAL; - goto err0; - } - if ((r > SIZE_MAX / 128 / p) || -#if SIZE_MAX / 256 <= UINT32_MAX - (r > SIZE_MAX / 256) || + if (buflen > (((uint64_t)(1) << 32) - 1) * 32) { + errno = EFBIG; + goto err0; + } #endif - (N > SIZE_MAX / 128 / r)) { - errno = ENOMEM; - goto err0; - } - - /* Allocate memory. */ -#ifdef HAVE_POSIX_MEMALIGN - if ((errno = posix_memalign(&B0, 64, 128 * r * p)) != 0) - goto err0; - B = (uint8_t *)(B0); - if ((errno = posix_memalign(&XY0, 64, 256 * r + 64)) != 0) - goto err1; - XY = (uint32_t *)(XY0); -#ifndef MAP_ANON - if ((errno = posix_memalign(&V0, 64, 128 * r * N)) != 0) - goto err2; - V = (uint32_t *)(V0); -#endif -#else - if ((B0 = malloc(128 * r * p + 63)) == NULL) - goto err0; - B = (uint8_t *)(((uintptr_t)(B0) + 63) & ~ (uintptr_t)(63)); - if ((XY0 = malloc(256 * r + 64 + 63)) == NULL) - goto err1; - XY = (uint32_t *)(((uintptr_t)(XY0) + 63) & ~ (uintptr_t)(63)); -#ifndef MAP_ANON - if ((V0 = malloc(128 * r * N + 63)) == NULL) - goto err2; - V = (uint32_t *)(((uintptr_t)(V0) + 63) & ~ (uintptr_t)(63)); -#endif -#endif -#ifdef MAP_ANON - if ((V0 = mmap(NULL, 128 * r * N, PROT_READ | PROT_WRITE, -#ifdef MAP_NOCORE - MAP_ANON | MAP_PRIVATE | MAP_NOCORE, -#else - MAP_ANON | MAP_PRIVATE, -#endif - -1, 0)) == MAP_FAILED) - goto err2; - V = (uint32_t *)(V0); -#endif - - /* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */ - pbkdf2_hmac_sha256(passwd, passwdlen, salt, saltlen, 1, B, p * 128 * r); - - /* 2: for i = 0 to p - 1 do */ - for (i = 0; i < p; i++) { - /* 3: B_i <-- MF(B_i, N) */ - smix(&B[i * 128 * r], r, N, V, XY); - } - - /* 5: DK <-- PBKDF2(P, B, 1, dkLen) */ - pbkdf2_hmac_sha256(passwd, passwdlen, B, p * 128 * r, 1, buf, buflen); - - /* Free memory. */ -#ifdef MAP_ANON - if (munmap(V0, 128 * r * N)) - goto err2; -#else - free(V0); -#endif - free(XY0); - free(B0); - - /* Success! */ - return (0); + if ((uint64_t)(r) * (uint64_t)(p) >= (1 << 30)) { + errno = EFBIG; + goto err0; + } + if (((N & (N - 1)) != 0) || (N == 0)) { + errno = EINVAL; + goto err0; + } + if ((r > SIZE_MAX / 128 / p) || + #if SIZE_MAX / 256 <= UINT32_MAX + (r > SIZE_MAX / 256) || + #endif + (N > SIZE_MAX / 128 / r)) { + errno = ENOMEM; + goto err0; + } + + /* Allocate memory. */ + if ((B = malloc(128 * r * p)) == NULL) + goto err0; + if ((XY = malloc(256 * r)) == NULL) + goto err1; + if ((V = malloc(128 * r * N)) == NULL) + goto err2; + + /* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */ + pbkdf2_hmac_sha256(passwd, passwdlen, salt, saltlen, 1, B, p * 128 * r); + + /* 2: for i = 0 to p - 1 do */ + for (i = 0; i < p; i++) { + /* 3: B_i <-- MF(B_i, N) */ + smix(&B[i * 128 * r], r, N, V, XY); + } + + /* 5: DK <-- PBKDF2(P, B, 1, dkLen) */ + pbkdf2_hmac_sha256(passwd, passwdlen, B, p * 128 * r, 1, buf, buflen); + + /* Free memory. */ + free(V); + free(XY); + free(B); + + /* Success! */ + return (0); err2: - free(XY0); + free(XY); err1: - free(B0); + free(B); err0: - /* Failure! */ - return (-1); + /* Failure! */ + return (-1); }