aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/backport-5.4/080-wireguard-0004-crypto-x86-chacha-expose-SIMD-ChaCha-routine-as-libr.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/generic/backport-5.4/080-wireguard-0004-crypto-x86-chacha-expose-SIMD-ChaCha-routine-as-libr.patch')
-rw-r--r--target/linux/generic/backport-5.4/080-wireguard-0004-crypto-x86-chacha-expose-SIMD-ChaCha-routine-as-libr.patch205
1 files changed, 205 insertions, 0 deletions
diff --git a/target/linux/generic/backport-5.4/080-wireguard-0004-crypto-x86-chacha-expose-SIMD-ChaCha-routine-as-libr.patch b/target/linux/generic/backport-5.4/080-wireguard-0004-crypto-x86-chacha-expose-SIMD-ChaCha-routine-as-libr.patch
new file mode 100644
index 0000000000..0e5462837b
--- /dev/null
+++ b/target/linux/generic/backport-5.4/080-wireguard-0004-crypto-x86-chacha-expose-SIMD-ChaCha-routine-as-libr.patch
@@ -0,0 +1,205 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Fri, 8 Nov 2019 13:22:10 +0100
+Subject: [PATCH] crypto: x86/chacha - expose SIMD ChaCha routine as library
+ function
+
+commit 84e03fa39fbe95a5567d43bff458c6d3b3a23ad1 upstream.
+
+Wire the existing x86 SIMD ChaCha code into the new ChaCha library
+interface, so that users of the library interface will get the
+accelerated version when available.
+
+Given that calls into the library API will always go through the
+routines in this module if it is enabled, switch to static keys
+to select the optimal implementation available (which may be none
+at all, in which case we defer to the generic implementation for
+all invocations).
+
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+---
+ arch/x86/crypto/chacha_glue.c | 91 +++++++++++++++++++++++++----------
+ crypto/Kconfig | 1 +
+ include/crypto/chacha.h | 6 +++
+ 3 files changed, 73 insertions(+), 25 deletions(-)
+
+--- a/arch/x86/crypto/chacha_glue.c
++++ b/arch/x86/crypto/chacha_glue.c
+@@ -21,24 +21,24 @@ asmlinkage void chacha_block_xor_ssse3(u
+ asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+ asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
+-#ifdef CONFIG_AS_AVX2
++
+ asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+ asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+ asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+-static bool chacha_use_avx2;
+-#ifdef CONFIG_AS_AVX512
++
+ asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+ asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+ asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+-static bool chacha_use_avx512vl;
+-#endif
+-#endif
++
++static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd);
++static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2);
++static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl);
+
+ static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
+ {
+@@ -49,9 +49,8 @@ static unsigned int chacha_advance(unsig
+ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+ {
+-#ifdef CONFIG_AS_AVX2
+-#ifdef CONFIG_AS_AVX512
+- if (chacha_use_avx512vl) {
++ if (IS_ENABLED(CONFIG_AS_AVX512) &&
++ static_branch_likely(&chacha_use_avx512vl)) {
+ while (bytes >= CHACHA_BLOCK_SIZE * 8) {
+ chacha_8block_xor_avx512vl(state, dst, src, bytes,
+ nrounds);
+@@ -79,8 +78,9 @@ static void chacha_dosimd(u32 *state, u8
+ return;
+ }
+ }
+-#endif
+- if (chacha_use_avx2) {
++
++ if (IS_ENABLED(CONFIG_AS_AVX2) &&
++ static_branch_likely(&chacha_use_avx2)) {
+ while (bytes >= CHACHA_BLOCK_SIZE * 8) {
+ chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
+ bytes -= CHACHA_BLOCK_SIZE * 8;
+@@ -104,7 +104,7 @@ static void chacha_dosimd(u32 *state, u8
+ return;
+ }
+ }
+-#endif
++
+ while (bytes >= CHACHA_BLOCK_SIZE * 4) {
+ chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
+ bytes -= CHACHA_BLOCK_SIZE * 4;
+@@ -123,6 +123,43 @@ static void chacha_dosimd(u32 *state, u8
+ }
+ }
+
++void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
++{
++ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
++
++ if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) {
++ hchacha_block_generic(state, stream, nrounds);
++ } else {
++ kernel_fpu_begin();
++ hchacha_block_ssse3(state, stream, nrounds);
++ kernel_fpu_end();
++ }
++}
++EXPORT_SYMBOL(hchacha_block_arch);
++
++void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
++{
++ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
++
++ chacha_init_generic(state, key, iv);
++}
++EXPORT_SYMBOL(chacha_init_arch);
++
++void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
++ int nrounds)
++{
++ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
++
++ if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() ||
++ bytes <= CHACHA_BLOCK_SIZE)
++ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
++
++ kernel_fpu_begin();
++ chacha_dosimd(state, dst, src, bytes, nrounds);
++ kernel_fpu_end();
++}
++EXPORT_SYMBOL(chacha_crypt_arch);
++
+ static int chacha_simd_stream_xor(struct skcipher_request *req,
+ const struct chacha_ctx *ctx, const u8 *iv)
+ {
+@@ -143,7 +180,8 @@ static int chacha_simd_stream_xor(struct
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
+
+- if (!crypto_simd_usable()) {
++ if (!static_branch_likely(&chacha_use_simd) ||
++ !crypto_simd_usable()) {
+ chacha_crypt_generic(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes,
+ ctx->nrounds);
+@@ -246,18 +284,21 @@ static struct skcipher_alg algs[] = {
+ static int __init chacha_simd_mod_init(void)
+ {
+ if (!boot_cpu_has(X86_FEATURE_SSSE3))
+- return -ENODEV;
++ return 0;
+
+-#ifdef CONFIG_AS_AVX2
+- chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
+- boot_cpu_has(X86_FEATURE_AVX2) &&
+- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
+-#ifdef CONFIG_AS_AVX512
+- chacha_use_avx512vl = chacha_use_avx2 &&
+- boot_cpu_has(X86_FEATURE_AVX512VL) &&
+- boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */
+-#endif
+-#endif
++ static_branch_enable(&chacha_use_simd);
++
++ if (IS_ENABLED(CONFIG_AS_AVX2) &&
++ boot_cpu_has(X86_FEATURE_AVX) &&
++ boot_cpu_has(X86_FEATURE_AVX2) &&
++ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
++ static_branch_enable(&chacha_use_avx2);
++
++ if (IS_ENABLED(CONFIG_AS_AVX512) &&
++ boot_cpu_has(X86_FEATURE_AVX512VL) &&
++ boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
++ static_branch_enable(&chacha_use_avx512vl);
++ }
+ return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+ }
+
+--- a/crypto/Kconfig
++++ b/crypto/Kconfig
+@@ -1418,6 +1418,7 @@ config CRYPTO_CHACHA20_X86_64
+ depends on X86 && 64BIT
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_LIB_CHACHA_GENERIC
++ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+ help
+ SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
+ XChaCha20, and XChaCha12 stream ciphers.
+--- a/include/crypto/chacha.h
++++ b/include/crypto/chacha.h
+@@ -25,6 +25,12 @@
+ #define CHACHA_BLOCK_SIZE 64
+ #define CHACHAPOLY_IV_SIZE 12
+
++#ifdef CONFIG_X86_64
++#define CHACHA_STATE_WORDS ((CHACHA_BLOCK_SIZE + 12) / sizeof(u32))
++#else
++#define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32))
++#endif
++
+ /* 192-bit nonce, then 64-bit stream position */
+ #define XCHACHA_IV_SIZE 32
+