diff options
Diffstat (limited to 'target/linux/generic/backport-5.4/080-wireguard-0008-crypto-arm-chacha-remove-dependency-on-generic-ChaCh.patch')
-rw-r--r-- | target/linux/generic/backport-5.4/080-wireguard-0008-crypto-arm-chacha-remove-dependency-on-generic-ChaCh.patch | 691 |
1 files changed, 691 insertions, 0 deletions
diff --git a/target/linux/generic/backport-5.4/080-wireguard-0008-crypto-arm-chacha-remove-dependency-on-generic-ChaCh.patch b/target/linux/generic/backport-5.4/080-wireguard-0008-crypto-arm-chacha-remove-dependency-on-generic-ChaCh.patch new file mode 100644 index 0000000000..88c9738dbc --- /dev/null +++ b/target/linux/generic/backport-5.4/080-wireguard-0008-crypto-arm-chacha-remove-dependency-on-generic-ChaCh.patch @@ -0,0 +1,691 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel <ardb@kernel.org> +Date: Fri, 8 Nov 2019 13:22:14 +0100 +Subject: [PATCH] crypto: arm/chacha - remove dependency on generic ChaCha + driver + +commit b36d8c09e710c71f6a9690b6586fea2d1c9e1e27 upstream. + +Instead of falling back to the generic ChaCha skcipher driver for +non-SIMD cases, use a fast scalar implementation for ARM authored +by Eric Biggers. This removes the module dependency on chacha-generic +altogether, which also simplifies things when we expose the ChaCha +library interface from this module. + +Signed-off-by: Ard Biesheuvel <ardb@kernel.org> +Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> +Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> +--- + arch/arm/crypto/Kconfig | 4 +- + arch/arm/crypto/Makefile | 3 +- + arch/arm/crypto/chacha-glue.c | 304 +++++++++++++++++++++++++++ + arch/arm/crypto/chacha-neon-glue.c | 202 ------------------ + arch/arm/crypto/chacha-scalar-core.S | 65 +++--- + arch/arm64/crypto/chacha-neon-glue.c | 2 +- + 6 files changed, 340 insertions(+), 240 deletions(-) + create mode 100644 arch/arm/crypto/chacha-glue.c + delete mode 100644 arch/arm/crypto/chacha-neon-glue.c + +--- a/arch/arm/crypto/Kconfig ++++ b/arch/arm/crypto/Kconfig +@@ -127,10 +127,8 @@ config CRYPTO_CRC32_ARM_CE + select CRYPTO_HASH + + config CRYPTO_CHACHA20_NEON +- tristate "NEON accelerated ChaCha stream cipher algorithms" +- depends on KERNEL_MODE_NEON ++ tristate "NEON and scalar accelerated ChaCha stream cipher algorithms" + select CRYPTO_BLKCIPHER +- select CRYPTO_CHACHA20 + + config CRYPTO_NHPOLY1305_NEON + tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)" +--- a/arch/arm/crypto/Makefile ++++ b/arch/arm/crypto/Makefile +@@ -53,7 +53,8 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glu + ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o + crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o + crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o +-chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o ++chacha-neon-y := chacha-scalar-core.o chacha-glue.o ++chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o + nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o + + ifdef REGENERATE_ARM_CRYPTO +--- /dev/null ++++ b/arch/arm/crypto/chacha-glue.c +@@ -0,0 +1,304 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * ARM NEON accelerated ChaCha and XChaCha stream ciphers, ++ * including ChaCha20 (RFC7539) ++ * ++ * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> ++ * Copyright (C) 2015 Martin Willi ++ */ ++ ++#include <crypto/algapi.h> ++#include <crypto/internal/chacha.h> ++#include <crypto/internal/simd.h> ++#include <crypto/internal/skcipher.h> ++#include <linux/kernel.h> ++#include <linux/module.h> ++ ++#include <asm/cputype.h> ++#include <asm/hwcap.h> ++#include <asm/neon.h> ++#include <asm/simd.h> ++ ++asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, ++ int nrounds); ++asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, ++ int nrounds); ++asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); ++asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); ++ ++asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, ++ const u32 *state, int nrounds); ++ ++static inline bool neon_usable(void) ++{ ++ return crypto_simd_usable(); ++} ++ ++static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, ++ unsigned int bytes, int nrounds) ++{ ++ u8 buf[CHACHA_BLOCK_SIZE]; ++ ++ while (bytes >= CHACHA_BLOCK_SIZE * 4) { ++ chacha_4block_xor_neon(state, dst, src, nrounds); ++ bytes -= CHACHA_BLOCK_SIZE * 4; ++ src += CHACHA_BLOCK_SIZE * 4; ++ dst += CHACHA_BLOCK_SIZE * 4; ++ state[12] += 4; ++ } ++ while (bytes >= CHACHA_BLOCK_SIZE) { ++ chacha_block_xor_neon(state, dst, src, nrounds); ++ bytes -= CHACHA_BLOCK_SIZE; ++ src += CHACHA_BLOCK_SIZE; ++ dst += CHACHA_BLOCK_SIZE; ++ state[12]++; ++ } ++ if (bytes) { ++ memcpy(buf, src, bytes); ++ chacha_block_xor_neon(state, buf, buf, nrounds); ++ memcpy(dst, buf, bytes); ++ } ++} ++ ++static int chacha_stream_xor(struct skcipher_request *req, ++ const struct chacha_ctx *ctx, const u8 *iv, ++ bool neon) ++{ ++ struct skcipher_walk walk; ++ u32 state[16]; ++ int err; ++ ++ err = skcipher_walk_virt(&walk, req, false); ++ ++ chacha_init_generic(state, ctx->key, iv); ++ ++ while (walk.nbytes > 0) { ++ unsigned int nbytes = walk.nbytes; ++ ++ if (nbytes < walk.total) ++ nbytes = round_down(nbytes, walk.stride); ++ ++ if (!neon) { ++ chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, ++ nbytes, state, ctx->nrounds); ++ state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); ++ } else { ++ kernel_neon_begin(); ++ chacha_doneon(state, walk.dst.virt.addr, ++ walk.src.virt.addr, nbytes, ctx->nrounds); ++ kernel_neon_end(); ++ } ++ err = skcipher_walk_done(&walk, walk.nbytes - nbytes); ++ } ++ ++ return err; ++} ++ ++static int do_chacha(struct skcipher_request *req, bool neon) ++{ ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); ++ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); ++ ++ return chacha_stream_xor(req, ctx, req->iv, neon); ++} ++ ++static int chacha_arm(struct skcipher_request *req) ++{ ++ return do_chacha(req, false); ++} ++ ++static int chacha_neon(struct skcipher_request *req) ++{ ++ return do_chacha(req, neon_usable()); ++} ++ ++static int do_xchacha(struct skcipher_request *req, bool neon) ++{ ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); ++ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); ++ struct chacha_ctx subctx; ++ u32 state[16]; ++ u8 real_iv[16]; ++ ++ chacha_init_generic(state, ctx->key, req->iv); ++ ++ if (!neon) { ++ hchacha_block_arm(state, subctx.key, ctx->nrounds); ++ } else { ++ kernel_neon_begin(); ++ hchacha_block_neon(state, subctx.key, ctx->nrounds); ++ kernel_neon_end(); ++ } ++ subctx.nrounds = ctx->nrounds; ++ ++ memcpy(&real_iv[0], req->iv + 24, 8); ++ memcpy(&real_iv[8], req->iv + 16, 8); ++ return chacha_stream_xor(req, &subctx, real_iv, neon); ++} ++ ++static int xchacha_arm(struct skcipher_request *req) ++{ ++ return do_xchacha(req, false); ++} ++ ++static int xchacha_neon(struct skcipher_request *req) ++{ ++ return do_xchacha(req, neon_usable()); ++} ++ ++static struct skcipher_alg arm_algs[] = { ++ { ++ .base.cra_name = "chacha20", ++ .base.cra_driver_name = "chacha20-arm", ++ .base.cra_priority = 200, ++ .base.cra_blocksize = 1, ++ .base.cra_ctxsize = sizeof(struct chacha_ctx), ++ .base.cra_module = THIS_MODULE, ++ ++ .min_keysize = CHACHA_KEY_SIZE, ++ .max_keysize = CHACHA_KEY_SIZE, ++ .ivsize = CHACHA_IV_SIZE, ++ .chunksize = CHACHA_BLOCK_SIZE, ++ .setkey = chacha20_setkey, ++ .encrypt = chacha_arm, ++ .decrypt = chacha_arm, ++ }, { ++ .base.cra_name = "xchacha20", ++ .base.cra_driver_name = "xchacha20-arm", ++ .base.cra_priority = 200, ++ .base.cra_blocksize = 1, ++ .base.cra_ctxsize = sizeof(struct chacha_ctx), ++ .base.cra_module = THIS_MODULE, ++ ++ .min_keysize = CHACHA_KEY_SIZE, ++ .max_keysize = CHACHA_KEY_SIZE, ++ .ivsize = XCHACHA_IV_SIZE, ++ .chunksize = CHACHA_BLOCK_SIZE, ++ .setkey = chacha20_setkey, ++ .encrypt = xchacha_arm, ++ .decrypt = xchacha_arm, ++ }, { ++ .base.cra_name = "xchacha12", ++ .base.cra_driver_name = "xchacha12-arm", ++ .base.cra_priority = 200, ++ .base.cra_blocksize = 1, ++ .base.cra_ctxsize = sizeof(struct chacha_ctx), ++ .base.cra_module = THIS_MODULE, ++ ++ .min_keysize = CHACHA_KEY_SIZE, ++ .max_keysize = CHACHA_KEY_SIZE, ++ .ivsize = XCHACHA_IV_SIZE, ++ .chunksize = CHACHA_BLOCK_SIZE, ++ .setkey = chacha12_setkey, ++ .encrypt = xchacha_arm, ++ .decrypt = xchacha_arm, ++ }, ++}; ++ ++static struct skcipher_alg neon_algs[] = { ++ { ++ .base.cra_name = "chacha20", ++ .base.cra_driver_name = "chacha20-neon", ++ .base.cra_priority = 300, ++ .base.cra_blocksize = 1, ++ .base.cra_ctxsize = sizeof(struct chacha_ctx), ++ .base.cra_module = THIS_MODULE, ++ ++ .min_keysize = CHACHA_KEY_SIZE, ++ .max_keysize = CHACHA_KEY_SIZE, ++ .ivsize = CHACHA_IV_SIZE, ++ .chunksize = CHACHA_BLOCK_SIZE, ++ .walksize = 4 * CHACHA_BLOCK_SIZE, ++ .setkey = chacha20_setkey, ++ .encrypt = chacha_neon, ++ .decrypt = chacha_neon, ++ }, { ++ .base.cra_name = "xchacha20", ++ .base.cra_driver_name = "xchacha20-neon", ++ .base.cra_priority = 300, ++ .base.cra_blocksize = 1, ++ .base.cra_ctxsize = sizeof(struct chacha_ctx), ++ .base.cra_module = THIS_MODULE, ++ ++ .min_keysize = CHACHA_KEY_SIZE, ++ .max_keysize = CHACHA_KEY_SIZE, ++ .ivsize = XCHACHA_IV_SIZE, ++ .chunksize = CHACHA_BLOCK_SIZE, ++ .walksize = 4 * CHACHA_BLOCK_SIZE, ++ .setkey = chacha20_setkey, ++ .encrypt = xchacha_neon, ++ .decrypt = xchacha_neon, ++ }, { ++ .base.cra_name = "xchacha12", ++ .base.cra_driver_name = "xchacha12-neon", ++ .base.cra_priority = 300, ++ .base.cra_blocksize = 1, ++ .base.cra_ctxsize = sizeof(struct chacha_ctx), ++ .base.cra_module = THIS_MODULE, ++ ++ .min_keysize = CHACHA_KEY_SIZE, ++ .max_keysize = CHACHA_KEY_SIZE, ++ .ivsize = XCHACHA_IV_SIZE, ++ .chunksize = CHACHA_BLOCK_SIZE, ++ .walksize = 4 * CHACHA_BLOCK_SIZE, ++ .setkey = chacha12_setkey, ++ .encrypt = xchacha_neon, ++ .decrypt = xchacha_neon, ++ } ++}; ++ ++static int __init chacha_simd_mod_init(void) ++{ ++ int err; ++ ++ err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); ++ if (err) ++ return err; ++ ++ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { ++ int i; ++ ++ switch (read_cpuid_part()) { ++ case ARM_CPU_PART_CORTEX_A7: ++ case ARM_CPU_PART_CORTEX_A5: ++ /* ++ * The Cortex-A7 and Cortex-A5 do not perform well with ++ * the NEON implementation but do incredibly with the ++ * scalar one and use less power. ++ */ ++ for (i = 0; i < ARRAY_SIZE(neon_algs); i++) ++ neon_algs[i].base.cra_priority = 0; ++ break; ++ } ++ ++ err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); ++ if (err) ++ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); ++ } ++ return err; ++} ++ ++static void __exit chacha_simd_mod_fini(void) ++{ ++ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); ++ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) ++ crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); ++} ++ ++module_init(chacha_simd_mod_init); ++module_exit(chacha_simd_mod_fini); ++ ++MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)"); ++MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); ++MODULE_LICENSE("GPL v2"); ++MODULE_ALIAS_CRYPTO("chacha20"); ++MODULE_ALIAS_CRYPTO("chacha20-arm"); ++MODULE_ALIAS_CRYPTO("xchacha20"); ++MODULE_ALIAS_CRYPTO("xchacha20-arm"); ++MODULE_ALIAS_CRYPTO("xchacha12"); ++MODULE_ALIAS_CRYPTO("xchacha12-arm"); ++#ifdef CONFIG_KERNEL_MODE_NEON ++MODULE_ALIAS_CRYPTO("chacha20-neon"); ++MODULE_ALIAS_CRYPTO("xchacha20-neon"); ++MODULE_ALIAS_CRYPTO("xchacha12-neon"); ++#endif +--- a/arch/arm/crypto/chacha-neon-glue.c ++++ /dev/null +@@ -1,202 +0,0 @@ +-/* +- * ARM NEON accelerated ChaCha and XChaCha stream ciphers, +- * including ChaCha20 (RFC7539) +- * +- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org> +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License version 2 as +- * published by the Free Software Foundation. +- * +- * Based on: +- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code +- * +- * Copyright (C) 2015 Martin Willi +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2 of the License, or +- * (at your option) any later version. +- */ +- +-#include <crypto/algapi.h> +-#include <crypto/internal/chacha.h> +-#include <crypto/internal/simd.h> +-#include <crypto/internal/skcipher.h> +-#include <linux/kernel.h> +-#include <linux/module.h> +- +-#include <asm/hwcap.h> +-#include <asm/neon.h> +-#include <asm/simd.h> +- +-asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, +- int nrounds); +-asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, +- int nrounds); +-asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); +- +-static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, +- unsigned int bytes, int nrounds) +-{ +- u8 buf[CHACHA_BLOCK_SIZE]; +- +- while (bytes >= CHACHA_BLOCK_SIZE * 4) { +- chacha_4block_xor_neon(state, dst, src, nrounds); +- bytes -= CHACHA_BLOCK_SIZE * 4; +- src += CHACHA_BLOCK_SIZE * 4; +- dst += CHACHA_BLOCK_SIZE * 4; +- state[12] += 4; +- } +- while (bytes >= CHACHA_BLOCK_SIZE) { +- chacha_block_xor_neon(state, dst, src, nrounds); +- bytes -= CHACHA_BLOCK_SIZE; +- src += CHACHA_BLOCK_SIZE; +- dst += CHACHA_BLOCK_SIZE; +- state[12]++; +- } +- if (bytes) { +- memcpy(buf, src, bytes); +- chacha_block_xor_neon(state, buf, buf, nrounds); +- memcpy(dst, buf, bytes); +- } +-} +- +-static int chacha_neon_stream_xor(struct skcipher_request *req, +- const struct chacha_ctx *ctx, const u8 *iv) +-{ +- struct skcipher_walk walk; +- u32 state[16]; +- int err; +- +- err = skcipher_walk_virt(&walk, req, false); +- +- crypto_chacha_init(state, ctx, iv); +- +- while (walk.nbytes > 0) { +- unsigned int nbytes = walk.nbytes; +- +- if (nbytes < walk.total) +- nbytes = round_down(nbytes, walk.stride); +- +- kernel_neon_begin(); +- chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr, +- nbytes, ctx->nrounds); +- kernel_neon_end(); +- err = skcipher_walk_done(&walk, walk.nbytes - nbytes); +- } +- +- return err; +-} +- +-static int chacha_neon(struct skcipher_request *req) +-{ +- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); +- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); +- +- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) +- return crypto_chacha_crypt(req); +- +- return chacha_neon_stream_xor(req, ctx, req->iv); +-} +- +-static int xchacha_neon(struct skcipher_request *req) +-{ +- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); +- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); +- struct chacha_ctx subctx; +- u32 state[16]; +- u8 real_iv[16]; +- +- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) +- return crypto_xchacha_crypt(req); +- +- crypto_chacha_init(state, ctx, req->iv); +- +- kernel_neon_begin(); +- hchacha_block_neon(state, subctx.key, ctx->nrounds); +- kernel_neon_end(); +- subctx.nrounds = ctx->nrounds; +- +- memcpy(&real_iv[0], req->iv + 24, 8); +- memcpy(&real_iv[8], req->iv + 16, 8); +- return chacha_neon_stream_xor(req, &subctx, real_iv); +-} +- +-static struct skcipher_alg algs[] = { +- { +- .base.cra_name = "chacha20", +- .base.cra_driver_name = "chacha20-neon", +- .base.cra_priority = 300, +- .base.cra_blocksize = 1, +- .base.cra_ctxsize = sizeof(struct chacha_ctx), +- .base.cra_module = THIS_MODULE, +- +- .min_keysize = CHACHA_KEY_SIZE, +- .max_keysize = CHACHA_KEY_SIZE, +- .ivsize = CHACHA_IV_SIZE, +- .chunksize = CHACHA_BLOCK_SIZE, +- .walksize = 4 * CHACHA_BLOCK_SIZE, +- .setkey = crypto_chacha20_setkey, +- .encrypt = chacha_neon, +- .decrypt = chacha_neon, +- }, { +- .base.cra_name = "xchacha20", +- .base.cra_driver_name = "xchacha20-neon", +- .base.cra_priority = 300, +- .base.cra_blocksize = 1, +- .base.cra_ctxsize = sizeof(struct chacha_ctx), +- .base.cra_module = THIS_MODULE, +- +- .min_keysize = CHACHA_KEY_SIZE, +- .max_keysize = CHACHA_KEY_SIZE, +- .ivsize = XCHACHA_IV_SIZE, +- .chunksize = CHACHA_BLOCK_SIZE, +- .walksize = 4 * CHACHA_BLOCK_SIZE, +- .setkey = crypto_chacha20_setkey, +- .encrypt = xchacha_neon, +- .decrypt = xchacha_neon, +- }, { +- .base.cra_name = "xchacha12", +- .base.cra_driver_name = "xchacha12-neon", +- .base.cra_priority = 300, +- .base.cra_blocksize = 1, +- .base.cra_ctxsize = sizeof(struct chacha_ctx), +- .base.cra_module = THIS_MODULE, +- +- .min_keysize = CHACHA_KEY_SIZE, +- .max_keysize = CHACHA_KEY_SIZE, +- .ivsize = XCHACHA_IV_SIZE, +- .chunksize = CHACHA_BLOCK_SIZE, +- .walksize = 4 * CHACHA_BLOCK_SIZE, +- .setkey = crypto_chacha12_setkey, +- .encrypt = xchacha_neon, +- .decrypt = xchacha_neon, +- } +-}; +- +-static int __init chacha_simd_mod_init(void) +-{ +- if (!(elf_hwcap & HWCAP_NEON)) +- return -ENODEV; +- +- return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); +-} +- +-static void __exit chacha_simd_mod_fini(void) +-{ +- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); +-} +- +-module_init(chacha_simd_mod_init); +-module_exit(chacha_simd_mod_fini); +- +-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)"); +-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +-MODULE_LICENSE("GPL v2"); +-MODULE_ALIAS_CRYPTO("chacha20"); +-MODULE_ALIAS_CRYPTO("chacha20-neon"); +-MODULE_ALIAS_CRYPTO("xchacha20"); +-MODULE_ALIAS_CRYPTO("xchacha20-neon"); +-MODULE_ALIAS_CRYPTO("xchacha12"); +-MODULE_ALIAS_CRYPTO("xchacha12-neon"); +--- a/arch/arm/crypto/chacha-scalar-core.S ++++ b/arch/arm/crypto/chacha-scalar-core.S +@@ -41,14 +41,6 @@ + X14 .req r12 + X15 .req r14 + +-.Lexpand_32byte_k: +- // "expand 32-byte k" +- .word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 +- +-#ifdef __thumb2__ +-# define adrl adr +-#endif +- + .macro __rev out, in, t0, t1, t2 + .if __LINUX_ARM_ARCH__ >= 6 + rev \out, \in +@@ -391,61 +383,65 @@ + .endm // _chacha + + /* +- * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8], +- * const u32 iv[4]); ++ * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, ++ * const u32 *state, int nrounds); + */ +-ENTRY(chacha20_arm) ++ENTRY(chacha_doarm) + cmp r2, #0 // len == 0? + reteq lr + ++ ldr ip, [sp] ++ cmp ip, #12 ++ + push {r0-r2,r4-r11,lr} + + // Push state x0-x15 onto stack. + // Also store an extra copy of x10-x11 just before the state. + +- ldr r4, [sp, #48] // iv +- mov r0, sp +- sub sp, #80 +- +- // iv: x12-x15 +- ldm r4, {X12,X13,X14,X15} +- stmdb r0!, {X12,X13,X14,X15} ++ add X12, r3, #48 ++ ldm X12, {X12,X13,X14,X15} ++ push {X12,X13,X14,X15} ++ sub sp, sp, #64 + +- // key: x4-x11 +- __ldrd X8_X10, X9_X11, r3, 24 ++ __ldrd X8_X10, X9_X11, r3, 40 + __strd X8_X10, X9_X11, sp, 8 +- stmdb r0!, {X8_X10, X9_X11} +- ldm r3, {X4-X9_X11} +- stmdb r0!, {X4-X9_X11} +- +- // constants: x0-x3 +- adrl X3, .Lexpand_32byte_k +- ldm X3, {X0-X3} ++ __strd X8_X10, X9_X11, sp, 56 ++ ldm r3, {X0-X9_X11} + __strd X0, X1, sp, 16 + __strd X2, X3, sp, 24 ++ __strd X4, X5, sp, 32 ++ __strd X6, X7, sp, 40 ++ __strd X8_X10, X9_X11, sp, 48 + ++ beq 1f + _chacha 20 + +- add sp, #76 ++0: add sp, #76 + pop {r4-r11, pc} +-ENDPROC(chacha20_arm) ++ ++1: _chacha 12 ++ b 0b ++ENDPROC(chacha_doarm) + + /* +- * void hchacha20_arm(const u32 state[16], u32 out[8]); ++ * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds); + */ +-ENTRY(hchacha20_arm) ++ENTRY(hchacha_block_arm) + push {r1,r4-r11,lr} + ++ cmp r2, #12 // ChaCha12 ? ++ + mov r14, r0 + ldmia r14!, {r0-r11} // load x0-x11 + push {r10-r11} // store x10-x11 to stack + ldm r14, {r10-r12,r14} // load x12-x15 + sub sp, #8 + ++ beq 1f + _chacha_permute 20 + + // Skip over (unused0-unused1, x10-x11) +- add sp, #16 ++0: add sp, #16 + + // Fix up rotations of x12-x15 + ror X12, X12, #drot +@@ -458,4 +454,7 @@ ENTRY(hchacha20_arm) + stm r4, {X0,X1,X2,X3,X12,X13,X14,X15} + + pop {r4-r11,pc} +-ENDPROC(hchacha20_arm) ++ ++1: _chacha_permute 12 ++ b 0b ++ENDPROC(hchacha_block_arm) +--- a/arch/arm64/crypto/chacha-neon-glue.c ++++ b/arch/arm64/crypto/chacha-neon-glue.c +@@ -1,5 +1,5 @@ + /* +- * ARM NEON accelerated ChaCha and XChaCha stream ciphers, ++ * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers, + * including ChaCha20 (RFC7539) + * + * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> |