diff options
Diffstat (limited to 'target/linux/generic/backport-5.4/080-wireguard-0004-crypto-x86-chacha-expose-SIMD-ChaCha-routine-as-libr.patch')
-rw-r--r-- | target/linux/generic/backport-5.4/080-wireguard-0004-crypto-x86-chacha-expose-SIMD-ChaCha-routine-as-libr.patch | 205 |
1 files changed, 205 insertions, 0 deletions
diff --git a/target/linux/generic/backport-5.4/080-wireguard-0004-crypto-x86-chacha-expose-SIMD-ChaCha-routine-as-libr.patch b/target/linux/generic/backport-5.4/080-wireguard-0004-crypto-x86-chacha-expose-SIMD-ChaCha-routine-as-libr.patch new file mode 100644 index 0000000000..0e5462837b --- /dev/null +++ b/target/linux/generic/backport-5.4/080-wireguard-0004-crypto-x86-chacha-expose-SIMD-ChaCha-routine-as-libr.patch @@ -0,0 +1,205 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel <ardb@kernel.org> +Date: Fri, 8 Nov 2019 13:22:10 +0100 +Subject: [PATCH] crypto: x86/chacha - expose SIMD ChaCha routine as library + function + +commit 84e03fa39fbe95a5567d43bff458c6d3b3a23ad1 upstream. + +Wire the existing x86 SIMD ChaCha code into the new ChaCha library +interface, so that users of the library interface will get the +accelerated version when available. + +Given that calls into the library API will always go through the +routines in this module if it is enabled, switch to static keys +to select the optimal implementation available (which may be none +at all, in which case we defer to the generic implementation for +all invocations). + +Signed-off-by: Ard Biesheuvel <ardb@kernel.org> +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> +--- + arch/x86/crypto/chacha_glue.c | 91 +++++++++++++++++++++++++---------- + crypto/Kconfig | 1 + + include/crypto/chacha.h | 6 +++ + 3 files changed, 73 insertions(+), 25 deletions(-) + +--- a/arch/x86/crypto/chacha_glue.c ++++ b/arch/x86/crypto/chacha_glue.c +@@ -21,24 +21,24 @@ asmlinkage void chacha_block_xor_ssse3(u + asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); + asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds); +-#ifdef CONFIG_AS_AVX2 ++ + asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); + asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); + asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); +-static bool chacha_use_avx2; +-#ifdef CONFIG_AS_AVX512 ++ + asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); + asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); + asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); +-static bool chacha_use_avx512vl; +-#endif +-#endif ++ ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd); ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2); ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl); + + static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) + { +@@ -49,9 +49,8 @@ static unsigned int chacha_advance(unsig + static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) + { +-#ifdef CONFIG_AS_AVX2 +-#ifdef CONFIG_AS_AVX512 +- if (chacha_use_avx512vl) { ++ if (IS_ENABLED(CONFIG_AS_AVX512) && ++ static_branch_likely(&chacha_use_avx512vl)) { + while (bytes >= CHACHA_BLOCK_SIZE * 8) { + chacha_8block_xor_avx512vl(state, dst, src, bytes, + nrounds); +@@ -79,8 +78,9 @@ static void chacha_dosimd(u32 *state, u8 + return; + } + } +-#endif +- if (chacha_use_avx2) { ++ ++ if (IS_ENABLED(CONFIG_AS_AVX2) && ++ static_branch_likely(&chacha_use_avx2)) { + while (bytes >= CHACHA_BLOCK_SIZE * 8) { + chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); + bytes -= CHACHA_BLOCK_SIZE * 8; +@@ -104,7 +104,7 @@ static void chacha_dosimd(u32 *state, u8 + return; + } + } +-#endif ++ + while (bytes >= CHACHA_BLOCK_SIZE * 4) { + chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); + bytes -= CHACHA_BLOCK_SIZE * 4; +@@ -123,6 +123,43 @@ static void chacha_dosimd(u32 *state, u8 + } + } + ++void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) ++{ ++ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); ++ ++ if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) { ++ hchacha_block_generic(state, stream, nrounds); ++ } else { ++ kernel_fpu_begin(); ++ hchacha_block_ssse3(state, stream, nrounds); ++ kernel_fpu_end(); ++ } ++} ++EXPORT_SYMBOL(hchacha_block_arch); ++ ++void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) ++{ ++ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); ++ ++ chacha_init_generic(state, key, iv); ++} ++EXPORT_SYMBOL(chacha_init_arch); ++ ++void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, ++ int nrounds) ++{ ++ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN); ++ ++ if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() || ++ bytes <= CHACHA_BLOCK_SIZE) ++ return chacha_crypt_generic(state, dst, src, bytes, nrounds); ++ ++ kernel_fpu_begin(); ++ chacha_dosimd(state, dst, src, bytes, nrounds); ++ kernel_fpu_end(); ++} ++EXPORT_SYMBOL(chacha_crypt_arch); ++ + static int chacha_simd_stream_xor(struct skcipher_request *req, + const struct chacha_ctx *ctx, const u8 *iv) + { +@@ -143,7 +180,8 @@ static int chacha_simd_stream_xor(struct + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + +- if (!crypto_simd_usable()) { ++ if (!static_branch_likely(&chacha_use_simd) || ++ !crypto_simd_usable()) { + chacha_crypt_generic(state, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, + ctx->nrounds); +@@ -246,18 +284,21 @@ static struct skcipher_alg algs[] = { + static int __init chacha_simd_mod_init(void) + { + if (!boot_cpu_has(X86_FEATURE_SSSE3)) +- return -ENODEV; ++ return 0; + +-#ifdef CONFIG_AS_AVX2 +- chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && +- boot_cpu_has(X86_FEATURE_AVX2) && +- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); +-#ifdef CONFIG_AS_AVX512 +- chacha_use_avx512vl = chacha_use_avx2 && +- boot_cpu_has(X86_FEATURE_AVX512VL) && +- boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */ +-#endif +-#endif ++ static_branch_enable(&chacha_use_simd); ++ ++ if (IS_ENABLED(CONFIG_AS_AVX2) && ++ boot_cpu_has(X86_FEATURE_AVX) && ++ boot_cpu_has(X86_FEATURE_AVX2) && ++ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { ++ static_branch_enable(&chacha_use_avx2); ++ ++ if (IS_ENABLED(CONFIG_AS_AVX512) && ++ boot_cpu_has(X86_FEATURE_AVX512VL) && ++ boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */ ++ static_branch_enable(&chacha_use_avx512vl); ++ } + return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); + } + +--- a/crypto/Kconfig ++++ b/crypto/Kconfig +@@ -1418,6 +1418,7 @@ config CRYPTO_CHACHA20_X86_64 + depends on X86 && 64BIT + select CRYPTO_BLKCIPHER + select CRYPTO_LIB_CHACHA_GENERIC ++ select CRYPTO_ARCH_HAVE_LIB_CHACHA + help + SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20, + XChaCha20, and XChaCha12 stream ciphers. +--- a/include/crypto/chacha.h ++++ b/include/crypto/chacha.h +@@ -25,6 +25,12 @@ + #define CHACHA_BLOCK_SIZE 64 + #define CHACHAPOLY_IV_SIZE 12 + ++#ifdef CONFIG_X86_64 ++#define CHACHA_STATE_WORDS ((CHACHA_BLOCK_SIZE + 12) / sizeof(u32)) ++#else ++#define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32)) ++#endif ++ + /* 192-bit nonce, then 64-bit stream position */ + #define XCHACHA_IV_SIZE 32 + |