aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/backport-5.4/080-wireguard-0024-crypto-blake2s-x86_64-SIMD-implementation.patch
diff options
context:
space:
mode:
authorDaniel Golle <daniel@makrotopia.org>2022-03-21 01:16:48 +0000
committerDaniel Golle <daniel@makrotopia.org>2022-03-21 13:11:56 +0000
commit786bf7fdaca4c75e7eba6e9aa3a8b5775fd21186 (patch)
tree926fecb2b1f6ce1e42ba7ef4c7aab8e68dfd214c /target/linux/generic/backport-5.4/080-wireguard-0024-crypto-blake2s-x86_64-SIMD-implementation.patch
parent9470160c350d15f765c33d6c1db15d6c4709a64c (diff)
downloadupstream-786bf7fdaca4c75e7eba6e9aa3a8b5775fd21186.tar.gz
upstream-786bf7fdaca4c75e7eba6e9aa3a8b5775fd21186.tar.bz2
upstream-786bf7fdaca4c75e7eba6e9aa3a8b5775fd21186.zip
kernel: delete Linux 5.4 config and patches
As the upcoming release will be based on Linux 5.10 only, remove all kernel configuration as well as patches for Linux 5.4. There were no targets still actively using Linux 5.4. Signed-off-by: Daniel Golle <daniel@makrotopia.org> (cherry picked from commit 3a14580411adfb75f9a44eded9f41245b9e44606)
Diffstat (limited to 'target/linux/generic/backport-5.4/080-wireguard-0024-crypto-blake2s-x86_64-SIMD-implementation.patch')
-rw-r--r--target/linux/generic/backport-5.4/080-wireguard-0024-crypto-blake2s-x86_64-SIMD-implementation.patch557
1 files changed, 0 insertions, 557 deletions
diff --git a/target/linux/generic/backport-5.4/080-wireguard-0024-crypto-blake2s-x86_64-SIMD-implementation.patch b/target/linux/generic/backport-5.4/080-wireguard-0024-crypto-blake2s-x86_64-SIMD-implementation.patch
deleted file mode 100644
index 04405581d2..0000000000
--- a/target/linux/generic/backport-5.4/080-wireguard-0024-crypto-blake2s-x86_64-SIMD-implementation.patch
+++ /dev/null
@@ -1,557 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: "Jason A. Donenfeld" <Jason@zx2c4.com>
-Date: Fri, 8 Nov 2019 13:22:31 +0100
-Subject: [PATCH] crypto: blake2s - x86_64 SIMD implementation
-
-commit ed0356eda153f6a95649e11feb7b07083caf9e20 upstream.
-
-These implementations from Samuel Neves support AVX and AVX-512VL.
-Originally this used AVX-512F, but Skylake thermal throttling made
-AVX-512VL more attractive and possible to do with negligable difference.
-
-Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
-Signed-off-by: Samuel Neves <sneves@dei.uc.pt>
-Co-developed-by: Samuel Neves <sneves@dei.uc.pt>
-[ardb: move to arch/x86/crypto, wire into lib/crypto framework]
-Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
-Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
----
- arch/x86/crypto/Makefile | 2 +
- arch/x86/crypto/blake2s-core.S | 258 +++++++++++++++++++++++++++++++++
- arch/x86/crypto/blake2s-glue.c | 233 +++++++++++++++++++++++++++++
- crypto/Kconfig | 6 +
- 4 files changed, 499 insertions(+)
- create mode 100644 arch/x86/crypto/blake2s-core.S
- create mode 100644 arch/x86/crypto/blake2s-glue.c
-
---- a/arch/x86/crypto/Makefile
-+++ b/arch/x86/crypto/Makefile
-@@ -48,6 +48,7 @@ ifeq ($(avx_supported),yes)
- obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
- obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
- obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
-+ obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o
- endif
-
- # These modules require assembler to support AVX2.
-@@ -70,6 +71,7 @@ serpent-sse2-x86_64-y := serpent-sse2-x8
- aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
-
- nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
-+blake2s-x86_64-y := blake2s-core.o blake2s-glue.o
-
- ifeq ($(avx_supported),yes)
- camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
---- /dev/null
-+++ b/arch/x86/crypto/blake2s-core.S
-@@ -0,0 +1,258 @@
-+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
-+/*
-+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
-+ * Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
-+ */
-+
-+#include <linux/linkage.h>
-+
-+.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32
-+.align 32
-+IV: .octa 0xA54FF53A3C6EF372BB67AE856A09E667
-+ .octa 0x5BE0CD191F83D9AB9B05688C510E527F
-+.section .rodata.cst16.ROT16, "aM", @progbits, 16
-+.align 16
-+ROT16: .octa 0x0D0C0F0E09080B0A0504070601000302
-+.section .rodata.cst16.ROR328, "aM", @progbits, 16
-+.align 16
-+ROR328: .octa 0x0C0F0E0D080B0A090407060500030201
-+.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160
-+.align 64
-+SIGMA:
-+.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13
-+.byte 14, 4, 9, 13, 10, 8, 15, 6, 5, 1, 0, 11, 3, 12, 2, 7
-+.byte 11, 12, 5, 15, 8, 0, 2, 13, 9, 10, 3, 7, 4, 14, 6, 1
-+.byte 7, 3, 13, 11, 9, 1, 12, 14, 15, 2, 5, 4, 8, 6, 10, 0
-+.byte 9, 5, 2, 10, 0, 7, 4, 15, 3, 14, 11, 6, 13, 1, 12, 8
-+.byte 2, 6, 0, 8, 12, 10, 11, 3, 1, 4, 7, 15, 9, 13, 5, 14
-+.byte 12, 1, 14, 4, 5, 15, 13, 10, 8, 0, 6, 9, 11, 7, 3, 2
-+.byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6
-+.byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4
-+.byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12
-+#ifdef CONFIG_AS_AVX512
-+.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640
-+.align 64
-+SIGMA2:
-+.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13
-+.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7
-+.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9
-+.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5
-+.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12
-+.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9
-+.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0
-+.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10
-+.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14
-+.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9
-+#endif /* CONFIG_AS_AVX512 */
-+
-+.text
-+#ifdef CONFIG_AS_SSSE3
-+ENTRY(blake2s_compress_ssse3)
-+ testq %rdx,%rdx
-+ je .Lendofloop
-+ movdqu (%rdi),%xmm0
-+ movdqu 0x10(%rdi),%xmm1
-+ movdqa ROT16(%rip),%xmm12
-+ movdqa ROR328(%rip),%xmm13
-+ movdqu 0x20(%rdi),%xmm14
-+ movq %rcx,%xmm15
-+ leaq SIGMA+0xa0(%rip),%r8
-+ jmp .Lbeginofloop
-+ .align 32
-+.Lbeginofloop:
-+ movdqa %xmm0,%xmm10
-+ movdqa %xmm1,%xmm11
-+ paddq %xmm15,%xmm14
-+ movdqa IV(%rip),%xmm2
-+ movdqa %xmm14,%xmm3
-+ pxor IV+0x10(%rip),%xmm3
-+ leaq SIGMA(%rip),%rcx
-+.Lroundloop:
-+ movzbl (%rcx),%eax
-+ movd (%rsi,%rax,4),%xmm4
-+ movzbl 0x1(%rcx),%eax
-+ movd (%rsi,%rax,4),%xmm5
-+ movzbl 0x2(%rcx),%eax
-+ movd (%rsi,%rax,4),%xmm6
-+ movzbl 0x3(%rcx),%eax
-+ movd (%rsi,%rax,4),%xmm7
-+ punpckldq %xmm5,%xmm4
-+ punpckldq %xmm7,%xmm6
-+ punpcklqdq %xmm6,%xmm4
-+ paddd %xmm4,%xmm0
-+ paddd %xmm1,%xmm0
-+ pxor %xmm0,%xmm3
-+ pshufb %xmm12,%xmm3
-+ paddd %xmm3,%xmm2
-+ pxor %xmm2,%xmm1
-+ movdqa %xmm1,%xmm8
-+ psrld $0xc,%xmm1
-+ pslld $0x14,%xmm8
-+ por %xmm8,%xmm1
-+ movzbl 0x4(%rcx),%eax
-+ movd (%rsi,%rax,4),%xmm5
-+ movzbl 0x5(%rcx),%eax
-+ movd (%rsi,%rax,4),%xmm6
-+ movzbl 0x6(%rcx),%eax
-+ movd (%rsi,%rax,4),%xmm7
-+ movzbl 0x7(%rcx),%eax
-+ movd (%rsi,%rax,4),%xmm4
-+ punpckldq %xmm6,%xmm5
-+ punpckldq %xmm4,%xmm7
-+ punpcklqdq %xmm7,%xmm5
-+ paddd %xmm5,%xmm0
-+ paddd %xmm1,%xmm0
-+ pxor %xmm0,%xmm3
-+ pshufb %xmm13,%xmm3
-+ paddd %xmm3,%xmm2
-+ pxor %xmm2,%xmm1
-+ movdqa %xmm1,%xmm8
-+ psrld $0x7,%xmm1
-+ pslld $0x19,%xmm8
-+ por %xmm8,%xmm1
-+ pshufd $0x93,%xmm0,%xmm0
-+ pshufd $0x4e,%xmm3,%xmm3
-+ pshufd $0x39,%xmm2,%xmm2
-+ movzbl 0x8(%rcx),%eax
-+ movd (%rsi,%rax,4),%xmm6
-+ movzbl 0x9(%rcx),%eax
-+ movd (%rsi,%rax,4),%xmm7
-+ movzbl 0xa(%rcx),%eax
-+ movd (%rsi,%rax,4),%xmm4
-+ movzbl 0xb(%rcx),%eax
-+ movd (%rsi,%rax,4),%xmm5
-+ punpckldq %xmm7,%xmm6
-+ punpckldq %xmm5,%xmm4
-+ punpcklqdq %xmm4,%xmm6
-+ paddd %xmm6,%xmm0
-+ paddd %xmm1,%xmm0
-+ pxor %xmm0,%xmm3
-+ pshufb %xmm12,%xmm3
-+ paddd %xmm3,%xmm2
-+ pxor %xmm2,%xmm1
-+ movdqa %xmm1,%xmm8
-+ psrld $0xc,%xmm1
-+ pslld $0x14,%xmm8
-+ por %xmm8,%xmm1
-+ movzbl 0xc(%rcx),%eax
-+ movd (%rsi,%rax,4),%xmm7
-+ movzbl 0xd(%rcx),%eax
-+ movd (%rsi,%rax,4),%xmm4
-+ movzbl 0xe(%rcx),%eax
-+ movd (%rsi,%rax,4),%xmm5
-+ movzbl 0xf(%rcx),%eax
-+ movd (%rsi,%rax,4),%xmm6
-+ punpckldq %xmm4,%xmm7
-+ punpckldq %xmm6,%xmm5
-+ punpcklqdq %xmm5,%xmm7
-+ paddd %xmm7,%xmm0
-+ paddd %xmm1,%xmm0
-+ pxor %xmm0,%xmm3
-+ pshufb %xmm13,%xmm3
-+ paddd %xmm3,%xmm2
-+ pxor %xmm2,%xmm1
-+ movdqa %xmm1,%xmm8
-+ psrld $0x7,%xmm1
-+ pslld $0x19,%xmm8
-+ por %xmm8,%xmm1
-+ pshufd $0x39,%xmm0,%xmm0
-+ pshufd $0x4e,%xmm3,%xmm3
-+ pshufd $0x93,%xmm2,%xmm2
-+ addq $0x10,%rcx
-+ cmpq %r8,%rcx
-+ jnz .Lroundloop
-+ pxor %xmm2,%xmm0
-+ pxor %xmm3,%xmm1
-+ pxor %xmm10,%xmm0
-+ pxor %xmm11,%xmm1
-+ addq $0x40,%rsi
-+ decq %rdx
-+ jnz .Lbeginofloop
-+ movdqu %xmm0,(%rdi)
-+ movdqu %xmm1,0x10(%rdi)
-+ movdqu %xmm14,0x20(%rdi)
-+.Lendofloop:
-+ ret
-+ENDPROC(blake2s_compress_ssse3)
-+#endif /* CONFIG_AS_SSSE3 */
-+
-+#ifdef CONFIG_AS_AVX512
-+ENTRY(blake2s_compress_avx512)
-+ vmovdqu (%rdi),%xmm0
-+ vmovdqu 0x10(%rdi),%xmm1
-+ vmovdqu 0x20(%rdi),%xmm4
-+ vmovq %rcx,%xmm5
-+ vmovdqa IV(%rip),%xmm14
-+ vmovdqa IV+16(%rip),%xmm15
-+ jmp .Lblake2s_compress_avx512_mainloop
-+.align 32
-+.Lblake2s_compress_avx512_mainloop:
-+ vmovdqa %xmm0,%xmm10
-+ vmovdqa %xmm1,%xmm11
-+ vpaddq %xmm5,%xmm4,%xmm4
-+ vmovdqa %xmm14,%xmm2
-+ vpxor %xmm15,%xmm4,%xmm3
-+ vmovdqu (%rsi),%ymm6
-+ vmovdqu 0x20(%rsi),%ymm7
-+ addq $0x40,%rsi
-+ leaq SIGMA2(%rip),%rax
-+ movb $0xa,%cl
-+.Lblake2s_compress_avx512_roundloop:
-+ addq $0x40,%rax
-+ vmovdqa -0x40(%rax),%ymm8
-+ vmovdqa -0x20(%rax),%ymm9
-+ vpermi2d %ymm7,%ymm6,%ymm8
-+ vpermi2d %ymm7,%ymm6,%ymm9
-+ vmovdqa %ymm8,%ymm6
-+ vmovdqa %ymm9,%ymm7
-+ vpaddd %xmm8,%xmm0,%xmm0
-+ vpaddd %xmm1,%xmm0,%xmm0
-+ vpxor %xmm0,%xmm3,%xmm3
-+ vprord $0x10,%xmm3,%xmm3
-+ vpaddd %xmm3,%xmm2,%xmm2
-+ vpxor %xmm2,%xmm1,%xmm1
-+ vprord $0xc,%xmm1,%xmm1
-+ vextracti128 $0x1,%ymm8,%xmm8
-+ vpaddd %xmm8,%xmm0,%xmm0
-+ vpaddd %xmm1,%xmm0,%xmm0
-+ vpxor %xmm0,%xmm3,%xmm3
-+ vprord $0x8,%xmm3,%xmm3
-+ vpaddd %xmm3,%xmm2,%xmm2
-+ vpxor %xmm2,%xmm1,%xmm1
-+ vprord $0x7,%xmm1,%xmm1
-+ vpshufd $0x93,%xmm0,%xmm0
-+ vpshufd $0x4e,%xmm3,%xmm3
-+ vpshufd $0x39,%xmm2,%xmm2
-+ vpaddd %xmm9,%xmm0,%xmm0
-+ vpaddd %xmm1,%xmm0,%xmm0
-+ vpxor %xmm0,%xmm3,%xmm3
-+ vprord $0x10,%xmm3,%xmm3
-+ vpaddd %xmm3,%xmm2,%xmm2
-+ vpxor %xmm2,%xmm1,%xmm1
-+ vprord $0xc,%xmm1,%xmm1
-+ vextracti128 $0x1,%ymm9,%xmm9
-+ vpaddd %xmm9,%xmm0,%xmm0
-+ vpaddd %xmm1,%xmm0,%xmm0
-+ vpxor %xmm0,%xmm3,%xmm3
-+ vprord $0x8,%xmm3,%xmm3
-+ vpaddd %xmm3,%xmm2,%xmm2
-+ vpxor %xmm2,%xmm1,%xmm1
-+ vprord $0x7,%xmm1,%xmm1
-+ vpshufd $0x39,%xmm0,%xmm0
-+ vpshufd $0x4e,%xmm3,%xmm3
-+ vpshufd $0x93,%xmm2,%xmm2
-+ decb %cl
-+ jne .Lblake2s_compress_avx512_roundloop
-+ vpxor %xmm10,%xmm0,%xmm0
-+ vpxor %xmm11,%xmm1,%xmm1
-+ vpxor %xmm2,%xmm0,%xmm0
-+ vpxor %xmm3,%xmm1,%xmm1
-+ decq %rdx
-+ jne .Lblake2s_compress_avx512_mainloop
-+ vmovdqu %xmm0,(%rdi)
-+ vmovdqu %xmm1,0x10(%rdi)
-+ vmovdqu %xmm4,0x20(%rdi)
-+ vzeroupper
-+ retq
-+ENDPROC(blake2s_compress_avx512)
-+#endif /* CONFIG_AS_AVX512 */
---- /dev/null
-+++ b/arch/x86/crypto/blake2s-glue.c
-@@ -0,0 +1,233 @@
-+// SPDX-License-Identifier: GPL-2.0 OR MIT
-+/*
-+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
-+ */
-+
-+#include <crypto/internal/blake2s.h>
-+#include <crypto/internal/simd.h>
-+#include <crypto/internal/hash.h>
-+
-+#include <linux/types.h>
-+#include <linux/jump_label.h>
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+
-+#include <asm/cpufeature.h>
-+#include <asm/fpu/api.h>
-+#include <asm/processor.h>
-+#include <asm/simd.h>
-+
-+asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state,
-+ const u8 *block, const size_t nblocks,
-+ const u32 inc);
-+asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
-+ const u8 *block, const size_t nblocks,
-+ const u32 inc);
-+
-+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3);
-+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512);
-+
-+void blake2s_compress_arch(struct blake2s_state *state,
-+ const u8 *block, size_t nblocks,
-+ const u32 inc)
-+{
-+ /* SIMD disables preemption, so relax after processing each page. */
-+ BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
-+
-+ if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
-+ blake2s_compress_generic(state, block, nblocks, inc);
-+ return;
-+ }
-+
-+ for (;;) {
-+ const size_t blocks = min_t(size_t, nblocks,
-+ PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
-+
-+ kernel_fpu_begin();
-+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
-+ static_branch_likely(&blake2s_use_avx512))
-+ blake2s_compress_avx512(state, block, blocks, inc);
-+ else
-+ blake2s_compress_ssse3(state, block, blocks, inc);
-+ kernel_fpu_end();
-+
-+ nblocks -= blocks;
-+ if (!nblocks)
-+ break;
-+ block += blocks * BLAKE2S_BLOCK_SIZE;
-+ }
-+}
-+EXPORT_SYMBOL(blake2s_compress_arch);
-+
-+static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key,
-+ unsigned int keylen)
-+{
-+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
-+
-+ if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) {
-+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-+ return -EINVAL;
-+ }
-+
-+ memcpy(tctx->key, key, keylen);
-+ tctx->keylen = keylen;
-+
-+ return 0;
-+}
-+
-+static int crypto_blake2s_init(struct shash_desc *desc)
-+{
-+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
-+ struct blake2s_state *state = shash_desc_ctx(desc);
-+ const int outlen = crypto_shash_digestsize(desc->tfm);
-+
-+ if (tctx->keylen)
-+ blake2s_init_key(state, outlen, tctx->key, tctx->keylen);
-+ else
-+ blake2s_init(state, outlen);
-+
-+ return 0;
-+}
-+
-+static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in,
-+ unsigned int inlen)
-+{
-+ struct blake2s_state *state = shash_desc_ctx(desc);
-+ const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
-+
-+ if (unlikely(!inlen))
-+ return 0;
-+ if (inlen > fill) {
-+ memcpy(state->buf + state->buflen, in, fill);
-+ blake2s_compress_arch(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
-+ state->buflen = 0;
-+ in += fill;
-+ inlen -= fill;
-+ }
-+ if (inlen > BLAKE2S_BLOCK_SIZE) {
-+ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
-+ /* Hash one less (full) block than strictly possible */
-+ blake2s_compress_arch(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
-+ in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
-+ inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
-+ }
-+ memcpy(state->buf + state->buflen, in, inlen);
-+ state->buflen += inlen;
-+
-+ return 0;
-+}
-+
-+static int crypto_blake2s_final(struct shash_desc *desc, u8 *out)
-+{
-+ struct blake2s_state *state = shash_desc_ctx(desc);
-+
-+ blake2s_set_lastblock(state);
-+ memset(state->buf + state->buflen, 0,
-+ BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
-+ blake2s_compress_arch(state, state->buf, 1, state->buflen);
-+ cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
-+ memcpy(out, state->h, state->outlen);
-+ memzero_explicit(state, sizeof(*state));
-+
-+ return 0;
-+}
-+
-+static struct shash_alg blake2s_algs[] = {{
-+ .base.cra_name = "blake2s-128",
-+ .base.cra_driver_name = "blake2s-128-x86",
-+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
-+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
-+ .base.cra_priority = 200,
-+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
-+ .base.cra_module = THIS_MODULE,
-+
-+ .digestsize = BLAKE2S_128_HASH_SIZE,
-+ .setkey = crypto_blake2s_setkey,
-+ .init = crypto_blake2s_init,
-+ .update = crypto_blake2s_update,
-+ .final = crypto_blake2s_final,
-+ .descsize = sizeof(struct blake2s_state),
-+}, {
-+ .base.cra_name = "blake2s-160",
-+ .base.cra_driver_name = "blake2s-160-x86",
-+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
-+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
-+ .base.cra_priority = 200,
-+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
-+ .base.cra_module = THIS_MODULE,
-+
-+ .digestsize = BLAKE2S_160_HASH_SIZE,
-+ .setkey = crypto_blake2s_setkey,
-+ .init = crypto_blake2s_init,
-+ .update = crypto_blake2s_update,
-+ .final = crypto_blake2s_final,
-+ .descsize = sizeof(struct blake2s_state),
-+}, {
-+ .base.cra_name = "blake2s-224",
-+ .base.cra_driver_name = "blake2s-224-x86",
-+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
-+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
-+ .base.cra_priority = 200,
-+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
-+ .base.cra_module = THIS_MODULE,
-+
-+ .digestsize = BLAKE2S_224_HASH_SIZE,
-+ .setkey = crypto_blake2s_setkey,
-+ .init = crypto_blake2s_init,
-+ .update = crypto_blake2s_update,
-+ .final = crypto_blake2s_final,
-+ .descsize = sizeof(struct blake2s_state),
-+}, {
-+ .base.cra_name = "blake2s-256",
-+ .base.cra_driver_name = "blake2s-256-x86",
-+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
-+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
-+ .base.cra_priority = 200,
-+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
-+ .base.cra_module = THIS_MODULE,
-+
-+ .digestsize = BLAKE2S_256_HASH_SIZE,
-+ .setkey = crypto_blake2s_setkey,
-+ .init = crypto_blake2s_init,
-+ .update = crypto_blake2s_update,
-+ .final = crypto_blake2s_final,
-+ .descsize = sizeof(struct blake2s_state),
-+}};
-+
-+static int __init blake2s_mod_init(void)
-+{
-+ if (!boot_cpu_has(X86_FEATURE_SSSE3))
-+ return 0;
-+
-+ static_branch_enable(&blake2s_use_ssse3);
-+
-+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
-+ boot_cpu_has(X86_FEATURE_AVX) &&
-+ boot_cpu_has(X86_FEATURE_AVX2) &&
-+ boot_cpu_has(X86_FEATURE_AVX512F) &&
-+ boot_cpu_has(X86_FEATURE_AVX512VL) &&
-+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
-+ XFEATURE_MASK_AVX512, NULL))
-+ static_branch_enable(&blake2s_use_avx512);
-+
-+ return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
-+}
-+
-+static void __exit blake2s_mod_exit(void)
-+{
-+ if (boot_cpu_has(X86_FEATURE_SSSE3))
-+ crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
-+}
-+
-+module_init(blake2s_mod_init);
-+module_exit(blake2s_mod_exit);
-+
-+MODULE_ALIAS_CRYPTO("blake2s-128");
-+MODULE_ALIAS_CRYPTO("blake2s-128-x86");
-+MODULE_ALIAS_CRYPTO("blake2s-160");
-+MODULE_ALIAS_CRYPTO("blake2s-160-x86");
-+MODULE_ALIAS_CRYPTO("blake2s-224");
-+MODULE_ALIAS_CRYPTO("blake2s-224-x86");
-+MODULE_ALIAS_CRYPTO("blake2s-256");
-+MODULE_ALIAS_CRYPTO("blake2s-256-x86");
-+MODULE_LICENSE("GPL v2");
---- a/crypto/Kconfig
-+++ b/crypto/Kconfig
-@@ -657,6 +657,12 @@ config CRYPTO_BLAKE2S
-
- See https://blake2.net for further information.
-
-+config CRYPTO_BLAKE2S_X86
-+ tristate "BLAKE2s digest algorithm (x86 accelerated version)"
-+ depends on X86 && 64BIT
-+ select CRYPTO_LIB_BLAKE2S_GENERIC
-+ select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
-+
- config CRYPTO_CRCT10DIF
- tristate "CRCT10DIF algorithm"
- select CRYPTO_HASH