diff options
Diffstat (limited to 'target/linux/generic/backport-5.4/080-wireguard-0011-crypto-mips-chacha-wire-up-accelerated-32r2-code-fro.patch')
-rw-r--r-- | target/linux/generic/backport-5.4/080-wireguard-0011-crypto-mips-chacha-wire-up-accelerated-32r2-code-fro.patch | 559 |
1 files changed, 0 insertions, 559 deletions
diff --git a/target/linux/generic/backport-5.4/080-wireguard-0011-crypto-mips-chacha-wire-up-accelerated-32r2-code-fro.patch b/target/linux/generic/backport-5.4/080-wireguard-0011-crypto-mips-chacha-wire-up-accelerated-32r2-code-fro.patch deleted file mode 100644 index 0d24ce29e5..0000000000 --- a/target/linux/generic/backport-5.4/080-wireguard-0011-crypto-mips-chacha-wire-up-accelerated-32r2-code-fro.patch +++ /dev/null @@ -1,559 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ardb@kernel.org> -Date: Fri, 8 Nov 2019 13:22:17 +0100 -Subject: [PATCH] crypto: mips/chacha - wire up accelerated 32r2 code from Zinc -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -commit 3a2f58f3ba4f6f44e33d1a48240d5eadb882cb59 upstream. - -This integrates the accelerated MIPS 32r2 implementation of ChaCha -into both the API and library interfaces of the kernel crypto stack. - -The significance of this is that, in addition to becoming available -as an accelerated library implementation, it can also be used by -existing crypto API code such as Adiantum (for block encryption on -ultra low performance cores) or IPsec using chacha20poly1305. These -are use cases that have already opted into using the abstract crypto -API. In order to support Adiantum, the core assembler routine has -been adapted to take the round count as a function argument rather -than hardcoding it to 20. - -Co-developed-by: René van Dorst <opensource@vdorst.com> -Signed-off-by: René van Dorst <opensource@vdorst.com> -Signed-off-by: Ard Biesheuvel <ardb@kernel.org> -Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> -Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> ---- - arch/mips/Makefile | 2 +- - arch/mips/crypto/Makefile | 4 + - arch/mips/crypto/chacha-core.S | 159 ++++++++++++++++++++++++--------- - arch/mips/crypto/chacha-glue.c | 150 +++++++++++++++++++++++++++++++ - crypto/Kconfig | 6 ++ - 5 files changed, 277 insertions(+), 44 deletions(-) - create mode 100644 arch/mips/crypto/chacha-glue.c - ---- a/arch/mips/Makefile -+++ b/arch/mips/Makefile -@@ -334,7 +334,7 @@ libs-$(CONFIG_MIPS_FP_SUPPORT) += arch/m - # See arch/mips/Kbuild for content of core part of the kernel - core-y += arch/mips/ - --drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/ -+drivers-y += arch/mips/crypto/ - drivers-$(CONFIG_OPROFILE) += arch/mips/oprofile/ - - # suspend and hibernation support ---- a/arch/mips/crypto/Makefile -+++ b/arch/mips/crypto/Makefile -@@ -4,3 +4,7 @@ - # - - obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o -+ -+obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o -+chacha-mips-y := chacha-core.o chacha-glue.o -+AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots ---- a/arch/mips/crypto/chacha-core.S -+++ b/arch/mips/crypto/chacha-core.S -@@ -125,7 +125,7 @@ - #define CONCAT3(a,b,c) _CONCAT3(a,b,c) - - #define STORE_UNALIGNED(x) \ --CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ -+CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ - .if (x != 12); \ - lw T0, (x*4)(STATE); \ - .endif; \ -@@ -142,7 +142,7 @@ CONCAT3(.Lchacha20_mips_xor_unaligned_, - swr X ## x, (x*4)+LSB ## (OUT); - - #define STORE_ALIGNED(x) \ --CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ -+CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ - .if (x != 12); \ - lw T0, (x*4)(STATE); \ - .endif; \ -@@ -162,9 +162,9 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PL - * Every jumptable entry must be equal in size. - */ - #define JMPTBL_ALIGNED(x) \ --.Lchacha20_mips_jmptbl_aligned_ ## x: ; \ -+.Lchacha_mips_jmptbl_aligned_ ## x: ; \ - .set noreorder; \ -- b .Lchacha20_mips_xor_aligned_ ## x ## _b; \ -+ b .Lchacha_mips_xor_aligned_ ## x ## _b; \ - .if (x == 12); \ - addu SAVED_X, X ## x, NONCE_0; \ - .else; \ -@@ -173,9 +173,9 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PL - .set reorder - - #define JMPTBL_UNALIGNED(x) \ --.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \ -+.Lchacha_mips_jmptbl_unaligned_ ## x: ; \ - .set noreorder; \ -- b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \ -+ b .Lchacha_mips_xor_unaligned_ ## x ## _b; \ - .if (x == 12); \ - addu SAVED_X, X ## x, NONCE_0; \ - .else; \ -@@ -200,15 +200,18 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PL - .text - .set reorder - .set noat --.globl chacha20_mips --.ent chacha20_mips --chacha20_mips: -+.globl chacha_crypt_arch -+.ent chacha_crypt_arch -+chacha_crypt_arch: - .frame $sp, STACK_SIZE, $ra - -+ /* Load number of rounds */ -+ lw $at, 16($sp) -+ - addiu $sp, -STACK_SIZE - - /* Return bytes = 0. */ -- beqz BYTES, .Lchacha20_mips_end -+ beqz BYTES, .Lchacha_mips_end - - lw NONCE_0, 48(STATE) - -@@ -228,18 +231,15 @@ chacha20_mips: - or IS_UNALIGNED, IN, OUT - andi IS_UNALIGNED, 0x3 - -- /* Set number of rounds */ -- li $at, 20 -- -- b .Lchacha20_rounds_start -+ b .Lchacha_rounds_start - - .align 4 --.Loop_chacha20_rounds: -+.Loop_chacha_rounds: - addiu IN, CHACHA20_BLOCK_SIZE - addiu OUT, CHACHA20_BLOCK_SIZE - addiu NONCE_0, 1 - --.Lchacha20_rounds_start: -+.Lchacha_rounds_start: - lw X0, 0(STATE) - lw X1, 4(STATE) - lw X2, 8(STATE) -@@ -259,7 +259,7 @@ chacha20_mips: - lw X14, 56(STATE) - lw X15, 60(STATE) - --.Loop_chacha20_xor_rounds: -+.Loop_chacha_xor_rounds: - addiu $at, -2 - AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); - AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); -@@ -269,31 +269,31 @@ chacha20_mips: - AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); - AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); - AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); -- bnez $at, .Loop_chacha20_xor_rounds -+ bnez $at, .Loop_chacha_xor_rounds - - addiu BYTES, -(CHACHA20_BLOCK_SIZE) - - /* Is data src/dst unaligned? Jump */ -- bnez IS_UNALIGNED, .Loop_chacha20_unaligned -+ bnez IS_UNALIGNED, .Loop_chacha_unaligned - - /* Set number rounds here to fill delayslot. */ -- li $at, 20 -+ lw $at, (STACK_SIZE+16)($sp) - - /* BYTES < 0, it has no full block. */ -- bltz BYTES, .Lchacha20_mips_no_full_block_aligned -+ bltz BYTES, .Lchacha_mips_no_full_block_aligned - - FOR_EACH_WORD_REV(STORE_ALIGNED) - - /* BYTES > 0? Loop again. */ -- bgtz BYTES, .Loop_chacha20_rounds -+ bgtz BYTES, .Loop_chacha_rounds - - /* Place this here to fill delay slot */ - addiu NONCE_0, 1 - - /* BYTES < 0? Handle last bytes */ -- bltz BYTES, .Lchacha20_mips_xor_bytes -+ bltz BYTES, .Lchacha_mips_xor_bytes - --.Lchacha20_mips_xor_done: -+.Lchacha_mips_xor_done: - /* Restore used registers */ - lw $s0, 0($sp) - lw $s1, 4($sp) -@@ -307,11 +307,11 @@ chacha20_mips: - /* Write NONCE_0 back to right location in state */ - sw NONCE_0, 48(STATE) - --.Lchacha20_mips_end: -+.Lchacha_mips_end: - addiu $sp, STACK_SIZE - jr $ra - --.Lchacha20_mips_no_full_block_aligned: -+.Lchacha_mips_no_full_block_aligned: - /* Restore the offset on BYTES */ - addiu BYTES, CHACHA20_BLOCK_SIZE - -@@ -319,7 +319,7 @@ chacha20_mips: - andi $at, BYTES, MASK_U32 - - /* Load upper half of jump table addr */ -- lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0) -+ lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0) - - /* Calculate lower half jump table offset */ - ins T0, $at, 1, 6 -@@ -328,7 +328,7 @@ chacha20_mips: - addu T1, STATE, $at - - /* Add lower half jump table addr */ -- addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0) -+ addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0) - - /* Read value from STATE */ - lw SAVED_CA, 0(T1) -@@ -342,31 +342,31 @@ chacha20_mips: - FOR_EACH_WORD(JMPTBL_ALIGNED) - - --.Loop_chacha20_unaligned: -+.Loop_chacha_unaligned: - /* Set number rounds here to fill delayslot. */ -- li $at, 20 -+ lw $at, (STACK_SIZE+16)($sp) - - /* BYTES > 0, it has no full block. */ -- bltz BYTES, .Lchacha20_mips_no_full_block_unaligned -+ bltz BYTES, .Lchacha_mips_no_full_block_unaligned - - FOR_EACH_WORD_REV(STORE_UNALIGNED) - - /* BYTES > 0? Loop again. */ -- bgtz BYTES, .Loop_chacha20_rounds -+ bgtz BYTES, .Loop_chacha_rounds - - /* Write NONCE_0 back to right location in state */ - sw NONCE_0, 48(STATE) - - .set noreorder - /* Fall through to byte handling */ -- bgez BYTES, .Lchacha20_mips_xor_done --.Lchacha20_mips_xor_unaligned_0_b: --.Lchacha20_mips_xor_aligned_0_b: -+ bgez BYTES, .Lchacha_mips_xor_done -+.Lchacha_mips_xor_unaligned_0_b: -+.Lchacha_mips_xor_aligned_0_b: - /* Place this here to fill delay slot */ - addiu NONCE_0, 1 - .set reorder - --.Lchacha20_mips_xor_bytes: -+.Lchacha_mips_xor_bytes: - addu IN, $at - addu OUT, $at - /* First byte */ -@@ -376,22 +376,22 @@ chacha20_mips: - ROTR(SAVED_X) - xor T1, SAVED_X - sb T1, 0(OUT) -- beqz $at, .Lchacha20_mips_xor_done -+ beqz $at, .Lchacha_mips_xor_done - /* Second byte */ - lbu T1, 1(IN) - addiu $at, BYTES, 2 - ROTx SAVED_X, 8 - xor T1, SAVED_X - sb T1, 1(OUT) -- beqz $at, .Lchacha20_mips_xor_done -+ beqz $at, .Lchacha_mips_xor_done - /* Third byte */ - lbu T1, 2(IN) - ROTx SAVED_X, 8 - xor T1, SAVED_X - sb T1, 2(OUT) -- b .Lchacha20_mips_xor_done -+ b .Lchacha_mips_xor_done - --.Lchacha20_mips_no_full_block_unaligned: -+.Lchacha_mips_no_full_block_unaligned: - /* Restore the offset on BYTES */ - addiu BYTES, CHACHA20_BLOCK_SIZE - -@@ -399,7 +399,7 @@ chacha20_mips: - andi $at, BYTES, MASK_U32 - - /* Load upper half of jump table addr */ -- lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0) -+ lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0) - - /* Calculate lower half jump table offset */ - ins T0, $at, 1, 6 -@@ -408,7 +408,7 @@ chacha20_mips: - addu T1, STATE, $at - - /* Add lower half jump table addr */ -- addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0) -+ addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0) - - /* Read value from STATE */ - lw SAVED_CA, 0(T1) -@@ -420,5 +420,78 @@ chacha20_mips: - - /* Jump table */ - FOR_EACH_WORD(JMPTBL_UNALIGNED) --.end chacha20_mips -+.end chacha_crypt_arch -+.set at -+ -+/* Input arguments -+ * STATE $a0 -+ * OUT $a1 -+ * NROUND $a2 -+ */ -+ -+#undef X12 -+#undef X13 -+#undef X14 -+#undef X15 -+ -+#define X12 $a3 -+#define X13 $at -+#define X14 $v0 -+#define X15 STATE -+ -+.set noat -+.globl hchacha_block_arch -+.ent hchacha_block_arch -+hchacha_block_arch: -+ .frame $sp, STACK_SIZE, $ra -+ -+ addiu $sp, -STACK_SIZE -+ -+ /* Save X11(s6) */ -+ sw X11, 0($sp) -+ -+ lw X0, 0(STATE) -+ lw X1, 4(STATE) -+ lw X2, 8(STATE) -+ lw X3, 12(STATE) -+ lw X4, 16(STATE) -+ lw X5, 20(STATE) -+ lw X6, 24(STATE) -+ lw X7, 28(STATE) -+ lw X8, 32(STATE) -+ lw X9, 36(STATE) -+ lw X10, 40(STATE) -+ lw X11, 44(STATE) -+ lw X12, 48(STATE) -+ lw X13, 52(STATE) -+ lw X14, 56(STATE) -+ lw X15, 60(STATE) -+ -+.Loop_hchacha_xor_rounds: -+ addiu $a2, -2 -+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); -+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); -+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); -+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); -+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); -+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); -+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); -+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); -+ bnez $a2, .Loop_hchacha_xor_rounds -+ -+ /* Restore used register */ -+ lw X11, 0($sp) -+ -+ sw X0, 0(OUT) -+ sw X1, 4(OUT) -+ sw X2, 8(OUT) -+ sw X3, 12(OUT) -+ sw X12, 16(OUT) -+ sw X13, 20(OUT) -+ sw X14, 24(OUT) -+ sw X15, 28(OUT) -+ -+ addiu $sp, STACK_SIZE -+ jr $ra -+.end hchacha_block_arch - .set at ---- /dev/null -+++ b/arch/mips/crypto/chacha-glue.c -@@ -0,0 +1,150 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * MIPS accelerated ChaCha and XChaCha stream ciphers, -+ * including ChaCha20 (RFC7539) -+ * -+ * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> -+ */ -+ -+#include <asm/byteorder.h> -+#include <crypto/algapi.h> -+#include <crypto/internal/chacha.h> -+#include <crypto/internal/skcipher.h> -+#include <linux/kernel.h> -+#include <linux/module.h> -+ -+asmlinkage void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, -+ unsigned int bytes, int nrounds); -+EXPORT_SYMBOL(chacha_crypt_arch); -+ -+asmlinkage void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds); -+EXPORT_SYMBOL(hchacha_block_arch); -+ -+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) -+{ -+ chacha_init_generic(state, key, iv); -+} -+EXPORT_SYMBOL(chacha_init_arch); -+ -+static int chacha_mips_stream_xor(struct skcipher_request *req, -+ const struct chacha_ctx *ctx, const u8 *iv) -+{ -+ struct skcipher_walk walk; -+ u32 state[16]; -+ int err; -+ -+ err = skcipher_walk_virt(&walk, req, false); -+ -+ chacha_init_generic(state, ctx->key, iv); -+ -+ while (walk.nbytes > 0) { -+ unsigned int nbytes = walk.nbytes; -+ -+ if (nbytes < walk.total) -+ nbytes = round_down(nbytes, walk.stride); -+ -+ chacha_crypt(state, walk.dst.virt.addr, walk.src.virt.addr, -+ nbytes, ctx->nrounds); -+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes); -+ } -+ -+ return err; -+} -+ -+static int chacha_mips(struct skcipher_request *req) -+{ -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); -+ -+ return chacha_mips_stream_xor(req, ctx, req->iv); -+} -+ -+static int xchacha_mips(struct skcipher_request *req) -+{ -+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); -+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); -+ struct chacha_ctx subctx; -+ u32 state[16]; -+ u8 real_iv[16]; -+ -+ chacha_init_generic(state, ctx->key, req->iv); -+ -+ hchacha_block(state, subctx.key, ctx->nrounds); -+ subctx.nrounds = ctx->nrounds; -+ -+ memcpy(&real_iv[0], req->iv + 24, 8); -+ memcpy(&real_iv[8], req->iv + 16, 8); -+ return chacha_mips_stream_xor(req, &subctx, real_iv); -+} -+ -+static struct skcipher_alg algs[] = { -+ { -+ .base.cra_name = "chacha20", -+ .base.cra_driver_name = "chacha20-mips", -+ .base.cra_priority = 200, -+ .base.cra_blocksize = 1, -+ .base.cra_ctxsize = sizeof(struct chacha_ctx), -+ .base.cra_module = THIS_MODULE, -+ -+ .min_keysize = CHACHA_KEY_SIZE, -+ .max_keysize = CHACHA_KEY_SIZE, -+ .ivsize = CHACHA_IV_SIZE, -+ .chunksize = CHACHA_BLOCK_SIZE, -+ .setkey = chacha20_setkey, -+ .encrypt = chacha_mips, -+ .decrypt = chacha_mips, -+ }, { -+ .base.cra_name = "xchacha20", -+ .base.cra_driver_name = "xchacha20-mips", -+ .base.cra_priority = 200, -+ .base.cra_blocksize = 1, -+ .base.cra_ctxsize = sizeof(struct chacha_ctx), -+ .base.cra_module = THIS_MODULE, -+ -+ .min_keysize = CHACHA_KEY_SIZE, -+ .max_keysize = CHACHA_KEY_SIZE, -+ .ivsize = XCHACHA_IV_SIZE, -+ .chunksize = CHACHA_BLOCK_SIZE, -+ .setkey = chacha20_setkey, -+ .encrypt = xchacha_mips, -+ .decrypt = xchacha_mips, -+ }, { -+ .base.cra_name = "xchacha12", -+ .base.cra_driver_name = "xchacha12-mips", -+ .base.cra_priority = 200, -+ .base.cra_blocksize = 1, -+ .base.cra_ctxsize = sizeof(struct chacha_ctx), -+ .base.cra_module = THIS_MODULE, -+ -+ .min_keysize = CHACHA_KEY_SIZE, -+ .max_keysize = CHACHA_KEY_SIZE, -+ .ivsize = XCHACHA_IV_SIZE, -+ .chunksize = CHACHA_BLOCK_SIZE, -+ .setkey = chacha12_setkey, -+ .encrypt = xchacha_mips, -+ .decrypt = xchacha_mips, -+ } -+}; -+ -+static int __init chacha_simd_mod_init(void) -+{ -+ return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); -+} -+ -+static void __exit chacha_simd_mod_fini(void) -+{ -+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); -+} -+ -+module_init(chacha_simd_mod_init); -+module_exit(chacha_simd_mod_fini); -+ -+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (MIPS accelerated)"); -+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); -+MODULE_LICENSE("GPL v2"); -+MODULE_ALIAS_CRYPTO("chacha20"); -+MODULE_ALIAS_CRYPTO("chacha20-mips"); -+MODULE_ALIAS_CRYPTO("xchacha20"); -+MODULE_ALIAS_CRYPTO("xchacha20-mips"); -+MODULE_ALIAS_CRYPTO("xchacha12"); -+MODULE_ALIAS_CRYPTO("xchacha12-mips"); ---- a/crypto/Kconfig -+++ b/crypto/Kconfig -@@ -1423,6 +1423,12 @@ config CRYPTO_CHACHA20_X86_64 - SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20, - XChaCha20, and XChaCha12 stream ciphers. - -+config CRYPTO_CHACHA_MIPS -+ tristate "ChaCha stream cipher algorithms (MIPS 32r2 optimized)" -+ depends on CPU_MIPS32_R2 -+ select CRYPTO_BLKCIPHER -+ select CRYPTO_ARCH_HAVE_LIB_CHACHA -+ - config CRYPTO_SEED - tristate "SEED cipher algorithm" - select CRYPTO_ALGAPI |