diff options
Diffstat (limited to 'target/linux/generic/backport-5.4/080-wireguard-0024-crypto-blake2s-x86_64-SIMD-implementation.patch')
-rw-r--r-- | target/linux/generic/backport-5.4/080-wireguard-0024-crypto-blake2s-x86_64-SIMD-implementation.patch | 557 |
1 files changed, 0 insertions, 557 deletions
diff --git a/target/linux/generic/backport-5.4/080-wireguard-0024-crypto-blake2s-x86_64-SIMD-implementation.patch b/target/linux/generic/backport-5.4/080-wireguard-0024-crypto-blake2s-x86_64-SIMD-implementation.patch deleted file mode 100644 index 04405581d2..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0024-crypto-blake2s-x86_64-SIMD-implementation.patch +++ /dev/null @@ -1,557 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "Jason A. Donenfeld" <Jason@zx2c4.com> -Date: Fri, 8 Nov 2019 13:22:31 +0100 -Subject: [PATCH] crypto: blake2s - x86_64 SIMD implementation - -commit ed0356eda153f6a95649e11feb7b07083caf9e20 upstream. - -These implementations from Samuel Neves support AVX and AVX-512VL. -Originally this used AVX-512F, but Skylake thermal throttling made -AVX-512VL more attractive and possible to do with negligable difference. - -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> -Signed-off-by: Samuel Neves <sneves@dei.uc.pt> -Co-developed-by: Samuel Neves <sneves@dei.uc.pt> -[ardb: move to arch/x86/crypto, wire into lib/crypto framework] -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/x86/crypto/Makefile | 2 + - arch/x86/crypto/blake2s-core.S | 258 +++++++++++++++++++++++++++++++++ - arch/x86/crypto/blake2s-glue.c | 233 +++++++++++++++++++++++++++++ - crypto/Kconfig | 6 + - 4 files changed, 499 insertions(+) - create mode 100644 arch/x86/crypto/blake2s-core.S - create mode 100644 arch/x86/crypto/blake2s-glue.c - ---- a/arch/x86/crypto/Makefile -+++ b/arch/x86/crypto/Makefile -@@ -48,6 +48,7 @@ ifeq ($(avx_supported),yes) - obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o - obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o - obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o -+ obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o - endif - - # These modules require assembler to support AVX2. -@@ -70,6 +71,7 @@ serpent-sse2-x86_64-y := serpent-sse2-x8 - aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o - - nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o -+blake2s-x86_64-y := blake2s-core.o blake2s-glue.o - - ifeq ($(avx_supported),yes) - camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \ ---- /dev/null -+++ b/arch/x86/crypto/blake2s-core.S -@@ -0,0 +1,258 @@ -+/* SPDX-License-Identifier: GPL-2.0 OR MIT */ -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ * Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved. -+ */ -+ -+#include <linux/linkage.h> -+ -+.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32 -+.align 32 -+IV: .octa 0xA54FF53A3C6EF372BB67AE856A09E667 -+ .octa 0x5BE0CD191F83D9AB9B05688C510E527F -+.section .rodata.cst16.ROT16, "aM", @progbits, 16 -+.align 16 -+ROT16: .octa 0x0D0C0F0E09080B0A0504070601000302 -+.section .rodata.cst16.ROR328, "aM", @progbits, 16 -+.align 16 -+ROR328: .octa 0x0C0F0E0D080B0A090407060500030201 -+.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160 -+.align 64 -+SIGMA: -+.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 -+.byte 14, 4, 9, 13, 10, 8, 15, 6, 5, 1, 0, 11, 3, 12, 2, 7 -+.byte 11, 12, 5, 15, 8, 0, 2, 13, 9, 10, 3, 7, 4, 14, 6, 1 -+.byte 7, 3, 13, 11, 9, 1, 12, 14, 15, 2, 5, 4, 8, 6, 10, 0 -+.byte 9, 5, 2, 10, 0, 7, 4, 15, 3, 14, 11, 6, 13, 1, 12, 8 -+.byte 2, 6, 0, 8, 12, 10, 11, 3, 1, 4, 7, 15, 9, 13, 5, 14 -+.byte 12, 1, 14, 4, 5, 15, 13, 10, 8, 0, 6, 9, 11, 7, 3, 2 -+.byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6 -+.byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4 -+.byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12 -+#ifdef CONFIG_AS_AVX512 -+.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640 -+.align 64 -+SIGMA2: -+.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 -+.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7 -+.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9 -+.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5 -+.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12 -+.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9 -+.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0 -+.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10 -+.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14 -+.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9 -+#endif /* CONFIG_AS_AVX512 */ -+ -+.text -+#ifdef CONFIG_AS_SSSE3 -+ENTRY(blake2s_compress_ssse3) -+ testq %rdx,%rdx -+ je .Lendofloop -+ movdqu (%rdi),%xmm0 -+ movdqu 0x10(%rdi),%xmm1 -+ movdqa ROT16(%rip),%xmm12 -+ movdqa ROR328(%rip),%xmm13 -+ movdqu 0x20(%rdi),%xmm14 -+ movq %rcx,%xmm15 -+ leaq SIGMA+0xa0(%rip),%r8 -+ jmp .Lbeginofloop -+ .align 32 -+.Lbeginofloop: -+ movdqa %xmm0,%xmm10 -+ movdqa %xmm1,%xmm11 -+ paddq %xmm15,%xmm14 -+ movdqa IV(%rip),%xmm2 -+ movdqa %xmm14,%xmm3 -+ pxor IV+0x10(%rip),%xmm3 -+ leaq SIGMA(%rip),%rcx -+.Lroundloop: -+ movzbl (%rcx),%eax -+ movd (%rsi,%rax,4),%xmm4 -+ movzbl 0x1(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm5 -+ movzbl 0x2(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm6 -+ movzbl 0x3(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm7 -+ punpckldq %xmm5,%xmm4 -+ punpckldq %xmm7,%xmm6 -+ punpcklqdq %xmm6,%xmm4 -+ paddd %xmm4,%xmm0 -+ paddd %xmm1,%xmm0 -+ pxor %xmm0,%xmm3 -+ pshufb %xmm12,%xmm3 -+ paddd %xmm3,%xmm2 -+ pxor %xmm2,%xmm1 -+ movdqa %xmm1,%xmm8 -+ psrld $0xc,%xmm1 -+ pslld $0x14,%xmm8 -+ por %xmm8,%xmm1 -+ movzbl 0x4(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm5 -+ movzbl 0x5(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm6 -+ movzbl 0x6(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm7 -+ movzbl 0x7(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm4 -+ punpckldq %xmm6,%xmm5 -+ punpckldq %xmm4,%xmm7 -+ punpcklqdq %xmm7,%xmm5 -+ paddd %xmm5,%xmm0 -+ paddd %xmm1,%xmm0 -+ pxor %xmm0,%xmm3 -+ pshufb %xmm13,%xmm3 -+ paddd %xmm3,%xmm2 -+ pxor %xmm2,%xmm1 -+ movdqa %xmm1,%xmm8 -+ psrld $0x7,%xmm1 -+ pslld $0x19,%xmm8 -+ por %xmm8,%xmm1 -+ pshufd $0x93,%xmm0,%xmm0 -+ pshufd $0x4e,%xmm3,%xmm3 -+ pshufd $0x39,%xmm2,%xmm2 -+ movzbl 0x8(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm6 -+ movzbl 0x9(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm7 -+ movzbl 0xa(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm4 -+ movzbl 0xb(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm5 -+ punpckldq %xmm7,%xmm6 -+ punpckldq %xmm5,%xmm4 -+ punpcklqdq %xmm4,%xmm6 -+ paddd %xmm6,%xmm0 -+ paddd %xmm1,%xmm0 -+ pxor %xmm0,%xmm3 -+ pshufb %xmm12,%xmm3 -+ paddd %xmm3,%xmm2 -+ pxor %xmm2,%xmm1 -+ movdqa %xmm1,%xmm8 -+ psrld $0xc,%xmm1 -+ pslld $0x14,%xmm8 -+ por %xmm8,%xmm1 -+ movzbl 0xc(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm7 -+ movzbl 0xd(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm4 -+ movzbl 0xe(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm5 -+ movzbl 0xf(%rcx),%eax -+ movd (%rsi,%rax,4),%xmm6 -+ punpckldq %xmm4,%xmm7 -+ punpckldq %xmm6,%xmm5 -+ punpcklqdq %xmm5,%xmm7 -+ paddd %xmm7,%xmm0 -+ paddd %xmm1,%xmm0 -+ pxor %xmm0,%xmm3 -+ pshufb %xmm13,%xmm3 -+ paddd %xmm3,%xmm2 -+ pxor %xmm2,%xmm1 -+ movdqa %xmm1,%xmm8 -+ psrld $0x7,%xmm1 -+ pslld $0x19,%xmm8 -+ por %xmm8,%xmm1 -+ pshufd $0x39,%xmm0,%xmm0 -+ pshufd $0x4e,%xmm3,%xmm3 -+ pshufd $0x93,%xmm2,%xmm2 -+ addq $0x10,%rcx -+ cmpq %r8,%rcx -+ jnz .Lroundloop -+ pxor %xmm2,%xmm0 -+ pxor %xmm3,%xmm1 -+ pxor %xmm10,%xmm0 -+ pxor %xmm11,%xmm1 -+ addq $0x40,%rsi -+ decq %rdx -+ jnz .Lbeginofloop -+ movdqu %xmm0,(%rdi) -+ movdqu %xmm1,0x10(%rdi) -+ movdqu %xmm14,0x20(%rdi) -+.Lendofloop: -+ ret -+ENDPROC(blake2s_compress_ssse3) -+#endif /* CONFIG_AS_SSSE3 */ -+ -+#ifdef CONFIG_AS_AVX512 -+ENTRY(blake2s_compress_avx512) -+ vmovdqu (%rdi),%xmm0 -+ vmovdqu 0x10(%rdi),%xmm1 -+ vmovdqu 0x20(%rdi),%xmm4 -+ vmovq %rcx,%xmm5 -+ vmovdqa IV(%rip),%xmm14 -+ vmovdqa IV+16(%rip),%xmm15 -+ jmp .Lblake2s_compress_avx512_mainloop -+.align 32 -+.Lblake2s_compress_avx512_mainloop: -+ vmovdqa %xmm0,%xmm10 -+ vmovdqa %xmm1,%xmm11 -+ vpaddq %xmm5,%xmm4,%xmm4 -+ vmovdqa %xmm14,%xmm2 -+ vpxor %xmm15,%xmm4,%xmm3 -+ vmovdqu (%rsi),%ymm6 -+ vmovdqu 0x20(%rsi),%ymm7 -+ addq $0x40,%rsi -+ leaq SIGMA2(%rip),%rax -+ movb $0xa,%cl -+.Lblake2s_compress_avx512_roundloop: -+ addq $0x40,%rax -+ vmovdqa -0x40(%rax),%ymm8 -+ vmovdqa -0x20(%rax),%ymm9 -+ vpermi2d %ymm7,%ymm6,%ymm8 -+ vpermi2d %ymm7,%ymm6,%ymm9 -+ vmovdqa %ymm8,%ymm6 -+ vmovdqa %ymm9,%ymm7 -+ vpaddd %xmm8,%xmm0,%xmm0 -+ vpaddd %xmm1,%xmm0,%xmm0 -+ vpxor %xmm0,%xmm3,%xmm3 -+ vprord $0x10,%xmm3,%xmm3 -+ vpaddd %xmm3,%xmm2,%xmm2 -+ vpxor %xmm2,%xmm1,%xmm1 -+ vprord $0xc,%xmm1,%xmm1 -+ vextracti128 $0x1,%ymm8,%xmm8 -+ vpaddd %xmm8,%xmm0,%xmm0 -+ vpaddd %xmm1,%xmm0,%xmm0 -+ vpxor %xmm0,%xmm3,%xmm3 -+ vprord $0x8,%xmm3,%xmm3 -+ vpaddd %xmm3,%xmm2,%xmm2 -+ vpxor %xmm2,%xmm1,%xmm1 -+ vprord $0x7,%xmm1,%xmm1 -+ vpshufd $0x93,%xmm0,%xmm0 -+ vpshufd $0x4e,%xmm3,%xmm3 -+ vpshufd $0x39,%xmm2,%xmm2 -+ vpaddd %xmm9,%xmm0,%xmm0 -+ vpaddd %xmm1,%xmm0,%xmm0 -+ vpxor %xmm0,%xmm3,%xmm3 -+ vprord $0x10,%xmm3,%xmm3 -+ vpaddd %xmm3,%xmm2,%xmm2 -+ vpxor %xmm2,%xmm1,%xmm1 -+ vprord $0xc,%xmm1,%xmm1 -+ vextracti128 $0x1,%ymm9,%xmm9 -+ vpaddd %xmm9,%xmm0,%xmm0 -+ vpaddd %xmm1,%xmm0,%xmm0 -+ vpxor %xmm0,%xmm3,%xmm3 -+ vprord $0x8,%xmm3,%xmm3 -+ vpaddd %xmm3,%xmm2,%xmm2 -+ vpxor %xmm2,%xmm1,%xmm1 -+ vprord $0x7,%xmm1,%xmm1 -+ vpshufd $0x39,%xmm0,%xmm0 -+ vpshufd $0x4e,%xmm3,%xmm3 -+ vpshufd $0x93,%xmm2,%xmm2 -+ decb %cl -+ jne .Lblake2s_compress_avx512_roundloop -+ vpxor %xmm10,%xmm0,%xmm0 -+ vpxor %xmm11,%xmm1,%xmm1 -+ vpxor %xmm2,%xmm0,%xmm0 -+ vpxor %xmm3,%xmm1,%xmm1 -+ decq %rdx -+ jne .Lblake2s_compress_avx512_mainloop -+ vmovdqu %xmm0,(%rdi) -+ vmovdqu %xmm1,0x10(%rdi) -+ vmovdqu %xmm4,0x20(%rdi) -+ vzeroupper -+ retq -+ENDPROC(blake2s_compress_avx512) -+#endif /* CONFIG_AS_AVX512 */ ---- /dev/null -+++ b/arch/x86/crypto/blake2s-glue.c -@@ -0,0 +1,233 @@ -+// SPDX-License-Identifier: GPL-2.0 OR MIT -+/* -+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. -+ */ -+ -+#include <crypto/internal/blake2s.h> -+#include <crypto/internal/simd.h> -+#include <crypto/internal/hash.h> -+ -+#include <linux/types.h> -+#include <linux/jump_label.h> -+#include <linux/kernel.h> -+#include <linux/module.h> -+ -+#include <asm/cpufeature.h> -+#include <asm/fpu/api.h> -+#include <asm/processor.h> -+#include <asm/simd.h> -+ -+asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state, -+ const u8 *block, const size_t nblocks, -+ const u32 inc); -+asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, -+ const u8 *block, const size_t nblocks, -+ const u32 inc); -+ -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3); -+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512); -+ -+void blake2s_compress_arch(struct blake2s_state *state, -+ const u8 *block, size_t nblocks, -+ const u32 inc) -+{ -+ /* SIMD disables preemption, so relax after processing each page. */ -+ BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8); -+ -+ if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) { -+ blake2s_compress_generic(state, block, nblocks, inc); -+ return; -+ } -+ -+ for (;;) { -+ const size_t blocks = min_t(size_t, nblocks, -+ PAGE_SIZE / BLAKE2S_BLOCK_SIZE); -+ -+ kernel_fpu_begin(); -+ if (IS_ENABLED(CONFIG_AS_AVX512) && -+ static_branch_likely(&blake2s_use_avx512)) -+ blake2s_compress_avx512(state, block, blocks, inc); -+ else -+ blake2s_compress_ssse3(state, block, blocks, inc); -+ kernel_fpu_end(); -+ -+ nblocks -= blocks; -+ if (!nblocks) -+ break; -+ block += blocks * BLAKE2S_BLOCK_SIZE; -+ } -+} -+EXPORT_SYMBOL(blake2s_compress_arch); -+ -+static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key, -+ unsigned int keylen) -+{ -+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm); -+ -+ if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) { -+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); -+ return -EINVAL; -+ } -+ -+ memcpy(tctx->key, key, keylen); -+ tctx->keylen = keylen; -+ -+ return 0; -+} -+ -+static int crypto_blake2s_init(struct shash_desc *desc) -+{ -+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); -+ struct blake2s_state *state = shash_desc_ctx(desc); -+ const int outlen = crypto_shash_digestsize(desc->tfm); -+ -+ if (tctx->keylen) -+ blake2s_init_key(state, outlen, tctx->key, tctx->keylen); -+ else -+ blake2s_init(state, outlen); -+ -+ return 0; -+} -+ -+static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in, -+ unsigned int inlen) -+{ -+ struct blake2s_state *state = shash_desc_ctx(desc); -+ const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; -+ -+ if (unlikely(!inlen)) -+ return 0; -+ if (inlen > fill) { -+ memcpy(state->buf + state->buflen, in, fill); -+ blake2s_compress_arch(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); -+ state->buflen = 0; -+ in += fill; -+ inlen -= fill; -+ } -+ if (inlen > BLAKE2S_BLOCK_SIZE) { -+ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); -+ /* Hash one less (full) block than strictly possible */ -+ blake2s_compress_arch(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); -+ in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); -+ inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); -+ } -+ memcpy(state->buf + state->buflen, in, inlen); -+ state->buflen += inlen; -+ -+ return 0; -+} -+ -+static int crypto_blake2s_final(struct shash_desc *desc, u8 *out) -+{ -+ struct blake2s_state *state = shash_desc_ctx(desc); -+ -+ blake2s_set_lastblock(state); -+ memset(state->buf + state->buflen, 0, -+ BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ -+ blake2s_compress_arch(state, state->buf, 1, state->buflen); -+ cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); -+ memcpy(out, state->h, state->outlen); -+ memzero_explicit(state, sizeof(*state)); -+ -+ return 0; -+} -+ -+static struct shash_alg blake2s_algs[] = {{ -+ .base.cra_name = "blake2s-128", -+ .base.cra_driver_name = "blake2s-128-x86", -+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, -+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), -+ .base.cra_priority = 200, -+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, -+ .base.cra_module = THIS_MODULE, -+ -+ .digestsize = BLAKE2S_128_HASH_SIZE, -+ .setkey = crypto_blake2s_setkey, -+ .init = crypto_blake2s_init, -+ .update = crypto_blake2s_update, -+ .final = crypto_blake2s_final, -+ .descsize = sizeof(struct blake2s_state), -+}, { -+ .base.cra_name = "blake2s-160", -+ .base.cra_driver_name = "blake2s-160-x86", -+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, -+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), -+ .base.cra_priority = 200, -+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, -+ .base.cra_module = THIS_MODULE, -+ -+ .digestsize = BLAKE2S_160_HASH_SIZE, -+ .setkey = crypto_blake2s_setkey, -+ .init = crypto_blake2s_init, -+ .update = crypto_blake2s_update, -+ .final = crypto_blake2s_final, -+ .descsize = sizeof(struct blake2s_state), -+}, { -+ .base.cra_name = "blake2s-224", -+ .base.cra_driver_name = "blake2s-224-x86", -+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, -+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), -+ .base.cra_priority = 200, -+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, -+ .base.cra_module = THIS_MODULE, -+ -+ .digestsize = BLAKE2S_224_HASH_SIZE, -+ .setkey = crypto_blake2s_setkey, -+ .init = crypto_blake2s_init, -+ .update = crypto_blake2s_update, -+ .final = crypto_blake2s_final, -+ .descsize = sizeof(struct blake2s_state), -+}, { -+ .base.cra_name = "blake2s-256", -+ .base.cra_driver_name = "blake2s-256-x86", -+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, -+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), -+ .base.cra_priority = 200, -+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, -+ .base.cra_module = THIS_MODULE, -+ -+ .digestsize = BLAKE2S_256_HASH_SIZE, -+ .setkey = crypto_blake2s_setkey, -+ .init = crypto_blake2s_init, -+ .update = crypto_blake2s_update, -+ .final = crypto_blake2s_final, -+ .descsize = sizeof(struct blake2s_state), -+}}; -+ -+static int __init blake2s_mod_init(void) -+{ -+ if (!boot_cpu_has(X86_FEATURE_SSSE3)) -+ return 0; -+ -+ static_branch_enable(&blake2s_use_ssse3); -+ -+ if (IS_ENABLED(CONFIG_AS_AVX512) && -+ boot_cpu_has(X86_FEATURE_AVX) && -+ boot_cpu_has(X86_FEATURE_AVX2) && -+ boot_cpu_has(X86_FEATURE_AVX512F) && -+ boot_cpu_has(X86_FEATURE_AVX512VL) && -+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | -+ XFEATURE_MASK_AVX512, NULL)) -+ static_branch_enable(&blake2s_use_avx512); -+ -+ return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); -+} -+ -+static void __exit blake2s_mod_exit(void) -+{ -+ if (boot_cpu_has(X86_FEATURE_SSSE3)) -+ crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); -+} -+ -+module_init(blake2s_mod_init); -+module_exit(blake2s_mod_exit); -+ -+MODULE_ALIAS_CRYPTO("blake2s-128"); -+MODULE_ALIAS_CRYPTO("blake2s-128-x86"); -+MODULE_ALIAS_CRYPTO("blake2s-160"); -+MODULE_ALIAS_CRYPTO("blake2s-160-x86"); -+MODULE_ALIAS_CRYPTO("blake2s-224"); -+MODULE_ALIAS_CRYPTO("blake2s-224-x86"); -+MODULE_ALIAS_CRYPTO("blake2s-256"); -+MODULE_ALIAS_CRYPTO("blake2s-256-x86"); -+MODULE_LICENSE("GPL v2"); ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -657,6 +657,12 @@ config CRYPTO_BLAKE2S - - See https://blake2.net for further information. - -+config CRYPTO_BLAKE2S_X86 -+ tristate "BLAKE2s digest algorithm (x86 accelerated version)" -+ depends on X86 && 64BIT -+ select CRYPTO_LIB_BLAKE2S_GENERIC -+ select CRYPTO_ARCH_HAVE_LIB_BLAKE2S -+ - config CRYPTO_CRCT10DIF - tristate "CRCT10DIF algorithm" - select CRYPTO_HASH |