diff options
Diffstat (limited to 'target/linux/generic/backport-5.4/080-wireguard-0057-crypto-arch-lib-limit-simd-usage-to-4k-chunks.patch')
-rw-r--r-- | target/linux/generic/backport-5.4/080-wireguard-0057-crypto-arch-lib-limit-simd-usage-to-4k-chunks.patch | 243 |
1 files changed, 0 insertions, 243 deletions
diff --git a/target/linux/generic/backport-5.4/080-wireguard-0057-crypto-arch-lib-limit-simd-usage-to-4k-chunks.patch b/target/linux/generic/backport-5.4/080-wireguard-0057-crypto-arch-lib-limit-simd-usage-to-4k-chunks.patch deleted file mode 100644 index f8828f243e..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0057-crypto-arch-lib-limit-simd-usage-to-4k-chunks.patch +++ /dev/null @@ -1,243 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Thu, 23 Apr 2020 15:54:04 -0600 -Subject: [PATCH] crypto: arch/lib - limit simd usage to 4k chunks - -commit 706024a52c614b478b63f7728d202532ce6591a9 upstream. - -The initial Zinc patchset, after some mailing list discussion, contained -code to ensure that kernel_fpu_enable would not be kept on for more than -a 4k chunk, since it disables preemption. The choice of 4k isn't totally -scientific, but it's not a bad guess either, and it's what's used in -both the x86 poly1305, blake2s, and nhpoly1305 code already (in the form -of PAGE_SIZE, which this commit corrects to be explicitly 4k for the -former two). - -Ard did some back of the envelope calculations and found that -at 5 cycles/byte (overestimate) on a 1ghz processor (pretty slow), 4k -means we have a maximum preemption disabling of 20us, which Sebastian -confirmed was probably a good limit. - -Unfortunately the chunking appears to have been left out of the final -patchset that added the glue code. So, this commit adds it back in. - -Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function") -Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function") -Fixes: a44a3430d71b ("crypto: arm/chacha - expose ARM ChaCha routine as library function") -Fixes: d7d7b8535662 ("crypto: x86/poly1305 - wire up faster implementations for kernel") -Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation") -Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation") -Fixes: ed0356eda153 ("crypto: blake2s - x86_64 SIMD implementation") -Cc: Eric Biggers <ebiggers@google.com> -Cc: Ard Biesheuvel <ardb@kernel.org> -Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Cc: stable@vger.kernel.org -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Reviewed-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/arm/crypto/chacha-glue.c | 14 +++++++++++--- - arch/arm/crypto/poly1305-glue.c | 15 +++++++++++---- - arch/arm64/crypto/chacha-neon-glue.c | 14 +++++++++++--- - arch/arm64/crypto/poly1305-glue.c | 15 +++++++++++---- - arch/x86/crypto/blake2s-glue.c | 10 ++++------ - arch/x86/crypto/chacha_glue.c | 14 +++++++++++--- - arch/x86/crypto/poly1305_glue.c | 13 ++++++------- - 7 files changed, 65 insertions(+), 30 deletions(-) - ---- a/arch/arm/crypto/chacha-glue.c -+++ b/arch/arm/crypto/chacha-glue.c -@@ -91,9 +91,17 @@ void chacha_crypt_arch(u32 *state, u8 *d - return; - } - -- kernel_neon_begin(); -- chacha_doneon(state, dst, src, bytes, nrounds); -- kernel_neon_end(); -+ do { -+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K); -+ -+ kernel_neon_begin(); -+ chacha_doneon(state, dst, src, todo, nrounds); -+ kernel_neon_end(); -+ -+ bytes -= todo; -+ src += todo; -+ dst += todo; -+ } while (bytes); - } - EXPORT_SYMBOL(chacha_crypt_arch); - ---- a/arch/arm/crypto/poly1305-glue.c -+++ b/arch/arm/crypto/poly1305-glue.c -@@ -160,13 +160,20 @@ void poly1305_update_arch(struct poly130 - unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); - - if (static_branch_likely(&have_neon) && do_neon) { -- kernel_neon_begin(); -- poly1305_blocks_neon(&dctx->h, src, len, 1); -- kernel_neon_end(); -+ do { -+ unsigned int todo = min_t(unsigned int, len, SZ_4K); -+ -+ kernel_neon_begin(); -+ poly1305_blocks_neon(&dctx->h, src, todo, 1); -+ kernel_neon_end(); -+ -+ len -= todo; -+ src += todo; -+ } while (len); - } else { - poly1305_blocks_arm(&dctx->h, src, len, 1); -+ src += len; - } -- src += len; - nbytes %= POLY1305_BLOCK_SIZE; - } - ---- a/arch/arm64/crypto/chacha-neon-glue.c -+++ b/arch/arm64/crypto/chacha-neon-glue.c -@@ -87,9 +87,17 @@ void chacha_crypt_arch(u32 *state, u8 *d - !crypto_simd_usable()) - return chacha_crypt_generic(state, dst, src, bytes, nrounds); - -- kernel_neon_begin(); -- chacha_doneon(state, dst, src, bytes, nrounds); -- kernel_neon_end(); -+ do { -+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K); -+ -+ kernel_neon_begin(); -+ chacha_doneon(state, dst, src, todo, nrounds); -+ kernel_neon_end(); -+ -+ bytes -= todo; -+ src += todo; -+ dst += todo; -+ } while (bytes); - } - EXPORT_SYMBOL(chacha_crypt_arch); - ---- a/arch/arm64/crypto/poly1305-glue.c -+++ b/arch/arm64/crypto/poly1305-glue.c -@@ -143,13 +143,20 @@ void poly1305_update_arch(struct poly130 - unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); - - if (static_branch_likely(&have_neon) && crypto_simd_usable()) { -- kernel_neon_begin(); -- poly1305_blocks_neon(&dctx->h, src, len, 1); -- kernel_neon_end(); -+ do { -+ unsigned int todo = min_t(unsigned int, len, SZ_4K); -+ -+ kernel_neon_begin(); -+ poly1305_blocks_neon(&dctx->h, src, todo, 1); -+ kernel_neon_end(); -+ -+ len -= todo; -+ src += todo; -+ } while (len); - } else { - poly1305_blocks(&dctx->h, src, len, 1); -+ src += len; - } -- src += len; - nbytes %= POLY1305_BLOCK_SIZE; - } - ---- a/arch/x86/crypto/blake2s-glue.c -+++ b/arch/x86/crypto/blake2s-glue.c -@@ -32,16 +32,16 @@ void blake2s_compress_arch(struct blake2 - const u32 inc) - { - /* SIMD disables preemption, so relax after processing each page. */ -- BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8); -+ BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8); - - if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) { - blake2s_compress_generic(state, block, nblocks, inc); - return; - } - -- for (;;) { -+ do { - const size_t blocks = min_t(size_t, nblocks, -- PAGE_SIZE / BLAKE2S_BLOCK_SIZE); -+ SZ_4K / BLAKE2S_BLOCK_SIZE); - - kernel_fpu_begin(); - if (IS_ENABLED(CONFIG_AS_AVX512) && -@@ -52,10 +52,8 @@ void blake2s_compress_arch(struct blake2 - kernel_fpu_end(); - - nblocks -= blocks; -- if (!nblocks) -- break; - block += blocks * BLAKE2S_BLOCK_SIZE; -- } -+ } while (nblocks); - } - EXPORT_SYMBOL(blake2s_compress_arch); - ---- a/arch/x86/crypto/chacha_glue.c -+++ b/arch/x86/crypto/chacha_glue.c -@@ -154,9 +154,17 @@ void chacha_crypt_arch(u32 *state, u8 *d - bytes <= CHACHA_BLOCK_SIZE) - return chacha_crypt_generic(state, dst, src, bytes, nrounds); - -- kernel_fpu_begin(); -- chacha_dosimd(state, dst, src, bytes, nrounds); -- kernel_fpu_end(); -+ do { -+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K); -+ -+ kernel_fpu_begin(); -+ chacha_dosimd(state, dst, src, todo, nrounds); -+ kernel_fpu_end(); -+ -+ bytes -= todo; -+ src += todo; -+ dst += todo; -+ } while (bytes); - } - EXPORT_SYMBOL(chacha_crypt_arch); - ---- a/arch/x86/crypto/poly1305_glue.c -+++ b/arch/x86/crypto/poly1305_glue.c -@@ -91,8 +91,8 @@ static void poly1305_simd_blocks(void *c - struct poly1305_arch_internal *state = ctx; - - /* SIMD disables preemption, so relax after processing each page. */ -- BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE || -- PAGE_SIZE % POLY1305_BLOCK_SIZE); -+ BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || -+ SZ_4K % POLY1305_BLOCK_SIZE); - - if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) || - (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) || -@@ -102,8 +102,8 @@ static void poly1305_simd_blocks(void *c - return; - } - -- for (;;) { -- const size_t bytes = min_t(size_t, len, PAGE_SIZE); -+ do { -+ const size_t bytes = min_t(size_t, len, SZ_4K); - - kernel_fpu_begin(); - if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512)) -@@ -113,11 +113,10 @@ static void poly1305_simd_blocks(void *c - else - poly1305_blocks_avx(ctx, inp, bytes, padbit); - kernel_fpu_end(); -+ - len -= bytes; -- if (!len) -- break; - inp += bytes; -- } -+ } while (len); - } - - static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], |