aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/backport-5.4/080-wireguard-0002-crypto-chacha-move-existing-library-code-into-lib-cr.patch
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2021-02-19 14:29:04 +0100
committerDavid Bauer <mail@david-bauer.net>2021-02-26 20:41:01 +0100
commit3888fa78802354ab7bbd19b7d061fd80a16ce06b (patch)
tree2225a6313cb6482f0cb9c09df662a0d44197350e /target/linux/generic/backport-5.4/080-wireguard-0002-crypto-chacha-move-existing-library-code-into-lib-cr.patch
parent7d4143234c4dfdd050ebc64ec8231f9d81ea65af (diff)
downloadupstream-3888fa78802354ab7bbd19b7d061fd80a16ce06b.tar.gz
upstream-3888fa78802354ab7bbd19b7d061fd80a16ce06b.tar.bz2
upstream-3888fa78802354ab7bbd19b7d061fd80a16ce06b.zip
kernel: 5.4: import wireguard backport
Rather than using the clunky, old, slower wireguard-linux-compat out of tree module, this commit does a patch-by-patch backport of upstream's wireguard to 5.4. This specific backport is in widespread use, being part of SUSE's enterprise kernel, Oracle's enterprise kernel, Google's Android kernel, Gentoo's distro kernel, and probably more I've forgotten about. It's definately the "more proper" way of adding wireguard to a kernel than the ugly compat.h hell of the wireguard-linux-compat repo. And most importantly for OpenWRT, it allows using the same module configuration code for 5.10 as for 5.4, with no need for bifurcation. These patches are from the backport tree which is maintained in the open here: https://git.zx2c4.com/wireguard-linux/log/?h=backport-5.4.y I'll be sending PRs to update this as needed. Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Diffstat (limited to 'target/linux/generic/backport-5.4/080-wireguard-0002-crypto-chacha-move-existing-library-code-into-lib-cr.patch')
-rw-r--r--target/linux/generic/backport-5.4/080-wireguard-0002-crypto-chacha-move-existing-library-code-into-lib-cr.patch669
1 files changed, 669 insertions, 0 deletions
diff --git a/target/linux/generic/backport-5.4/080-wireguard-0002-crypto-chacha-move-existing-library-code-into-lib-cr.patch b/target/linux/generic/backport-5.4/080-wireguard-0002-crypto-chacha-move-existing-library-code-into-lib-cr.patch
new file mode 100644
index 0000000000..a16ca08f56
--- /dev/null
+++ b/target/linux/generic/backport-5.4/080-wireguard-0002-crypto-chacha-move-existing-library-code-into-lib-cr.patch
@@ -0,0 +1,669 @@
+From 6f71439c260ddd0f9a21fee3e34449fe9c017ab6 Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Fri, 8 Nov 2019 13:22:08 +0100
+Subject: [PATCH 002/124] crypto: chacha - move existing library code into
+ lib/crypto
+
+commit 5fb8ef25803ef33e2eb60b626435828b937bed75 upstream.
+
+Currently, our generic ChaCha implementation consists of a permute
+function in lib/chacha.c that operates on the 64-byte ChaCha state
+directly [and which is always included into the core kernel since it
+is used by the /dev/random driver], and the crypto API plumbing to
+expose it as a skcipher.
+
+In order to support in-kernel users that need the ChaCha streamcipher
+but have no need [or tolerance] for going through the abstractions of
+the crypto API, let's expose the streamcipher bits via a library API
+as well, in a way that permits the implementation to be superseded by
+an architecture specific one if provided.
+
+So move the streamcipher code into a separate module in lib/crypto,
+and expose the init() and crypt() routines to users of the library.
+
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+---
+ arch/arm/crypto/chacha-neon-glue.c | 2 +-
+ arch/arm64/crypto/chacha-neon-glue.c | 2 +-
+ arch/x86/crypto/chacha_glue.c | 2 +-
+ crypto/Kconfig | 1 +
+ crypto/chacha_generic.c | 60 ++--------------------
+ include/crypto/chacha.h | 77 ++++++++++++++++++++++------
+ include/crypto/internal/chacha.h | 53 +++++++++++++++++++
+ lib/Makefile | 3 +-
+ lib/crypto/Kconfig | 26 ++++++++++
+ lib/crypto/Makefile | 4 ++
+ lib/{ => crypto}/chacha.c | 20 ++++----
+ lib/crypto/libchacha.c | 35 +++++++++++++
+ 12 files changed, 199 insertions(+), 86 deletions(-)
+ create mode 100644 include/crypto/internal/chacha.h
+ rename lib/{ => crypto}/chacha.c (88%)
+ create mode 100644 lib/crypto/libchacha.c
+
+--- a/arch/arm/crypto/chacha-neon-glue.c
++++ b/arch/arm/crypto/chacha-neon-glue.c
+@@ -20,7 +20,7 @@
+ */
+
+ #include <crypto/algapi.h>
+-#include <crypto/chacha.h>
++#include <crypto/internal/chacha.h>
+ #include <crypto/internal/simd.h>
+ #include <crypto/internal/skcipher.h>
+ #include <linux/kernel.h>
+--- a/arch/arm64/crypto/chacha-neon-glue.c
++++ b/arch/arm64/crypto/chacha-neon-glue.c
+@@ -20,7 +20,7 @@
+ */
+
+ #include <crypto/algapi.h>
+-#include <crypto/chacha.h>
++#include <crypto/internal/chacha.h>
+ #include <crypto/internal/simd.h>
+ #include <crypto/internal/skcipher.h>
+ #include <linux/kernel.h>
+--- a/arch/x86/crypto/chacha_glue.c
++++ b/arch/x86/crypto/chacha_glue.c
+@@ -7,7 +7,7 @@
+ */
+
+ #include <crypto/algapi.h>
+-#include <crypto/chacha.h>
++#include <crypto/internal/chacha.h>
+ #include <crypto/internal/simd.h>
+ #include <crypto/internal/skcipher.h>
+ #include <linux/kernel.h>
+--- a/crypto/Kconfig
++++ b/crypto/Kconfig
+@@ -1393,6 +1393,7 @@ config CRYPTO_SALSA20
+
+ config CRYPTO_CHACHA20
+ tristate "ChaCha stream cipher algorithms"
++ select CRYPTO_LIB_CHACHA_GENERIC
+ select CRYPTO_BLKCIPHER
+ help
+ The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms.
+--- a/crypto/chacha_generic.c
++++ b/crypto/chacha_generic.c
+@@ -8,29 +8,10 @@
+
+ #include <asm/unaligned.h>
+ #include <crypto/algapi.h>
+-#include <crypto/chacha.h>
++#include <crypto/internal/chacha.h>
+ #include <crypto/internal/skcipher.h>
+ #include <linux/module.h>
+
+-static void chacha_docrypt(u32 *state, u8 *dst, const u8 *src,
+- unsigned int bytes, int nrounds)
+-{
+- /* aligned to potentially speed up crypto_xor() */
+- u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long));
+-
+- while (bytes >= CHACHA_BLOCK_SIZE) {
+- chacha_block(state, stream, nrounds);
+- crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE);
+- bytes -= CHACHA_BLOCK_SIZE;
+- dst += CHACHA_BLOCK_SIZE;
+- src += CHACHA_BLOCK_SIZE;
+- }
+- if (bytes) {
+- chacha_block(state, stream, nrounds);
+- crypto_xor_cpy(dst, src, stream, bytes);
+- }
+-}
+-
+ static int chacha_stream_xor(struct skcipher_request *req,
+ const struct chacha_ctx *ctx, const u8 *iv)
+ {
+@@ -48,8 +29,8 @@ static int chacha_stream_xor(struct skci
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, CHACHA_BLOCK_SIZE);
+
+- chacha_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
+- nbytes, ctx->nrounds);
++ chacha_crypt_generic(state, walk.dst.virt.addr,
++ walk.src.virt.addr, nbytes, ctx->nrounds);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+@@ -58,41 +39,10 @@ static int chacha_stream_xor(struct skci
+
+ void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv)
+ {
+- state[0] = 0x61707865; /* "expa" */
+- state[1] = 0x3320646e; /* "nd 3" */
+- state[2] = 0x79622d32; /* "2-by" */
+- state[3] = 0x6b206574; /* "te k" */
+- state[4] = ctx->key[0];
+- state[5] = ctx->key[1];
+- state[6] = ctx->key[2];
+- state[7] = ctx->key[3];
+- state[8] = ctx->key[4];
+- state[9] = ctx->key[5];
+- state[10] = ctx->key[6];
+- state[11] = ctx->key[7];
+- state[12] = get_unaligned_le32(iv + 0);
+- state[13] = get_unaligned_le32(iv + 4);
+- state[14] = get_unaligned_le32(iv + 8);
+- state[15] = get_unaligned_le32(iv + 12);
++ chacha_init_generic(state, ctx->key, iv);
+ }
+ EXPORT_SYMBOL_GPL(crypto_chacha_init);
+
+-static int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
+- unsigned int keysize, int nrounds)
+-{
+- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+- int i;
+-
+- if (keysize != CHACHA_KEY_SIZE)
+- return -EINVAL;
+-
+- for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
+- ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
+-
+- ctx->nrounds = nrounds;
+- return 0;
+-}
+-
+ int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keysize)
+ {
+@@ -126,7 +76,7 @@ int crypto_xchacha_crypt(struct skcipher
+
+ /* Compute the subkey given the original key and first 128 nonce bits */
+ crypto_chacha_init(state, ctx, req->iv);
+- hchacha_block(state, subctx.key, ctx->nrounds);
++ hchacha_block_generic(state, subctx.key, ctx->nrounds);
+ subctx.nrounds = ctx->nrounds;
+
+ /* Build the real IV */
+--- a/include/crypto/chacha.h
++++ b/include/crypto/chacha.h
+@@ -15,9 +15,8 @@
+ #ifndef _CRYPTO_CHACHA_H
+ #define _CRYPTO_CHACHA_H
+
+-#include <crypto/skcipher.h>
++#include <asm/unaligned.h>
+ #include <linux/types.h>
+-#include <linux/crypto.h>
+
+ /* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */
+ #define CHACHA_IV_SIZE 16
+@@ -29,26 +28,70 @@
+ /* 192-bit nonce, then 64-bit stream position */
+ #define XCHACHA_IV_SIZE 32
+
+-struct chacha_ctx {
+- u32 key[8];
+- int nrounds;
+-};
+-
+-void chacha_block(u32 *state, u8 *stream, int nrounds);
++void chacha_block_generic(u32 *state, u8 *stream, int nrounds);
+ static inline void chacha20_block(u32 *state, u8 *stream)
+ {
+- chacha_block(state, stream, 20);
++ chacha_block_generic(state, stream, 20);
+ }
+-void hchacha_block(const u32 *in, u32 *out, int nrounds);
+
+-void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv);
++void hchacha_block_arch(const u32 *state, u32 *out, int nrounds);
++void hchacha_block_generic(const u32 *state, u32 *out, int nrounds);
++
++static inline void hchacha_block(const u32 *state, u32 *out, int nrounds)
++{
++ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
++ hchacha_block_arch(state, out, nrounds);
++ else
++ hchacha_block_generic(state, out, nrounds);
++}
+
+-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+- unsigned int keysize);
+-int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
+- unsigned int keysize);
++void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv);
++static inline void chacha_init_generic(u32 *state, const u32 *key, const u8 *iv)
++{
++ state[0] = 0x61707865; /* "expa" */
++ state[1] = 0x3320646e; /* "nd 3" */
++ state[2] = 0x79622d32; /* "2-by" */
++ state[3] = 0x6b206574; /* "te k" */
++ state[4] = key[0];
++ state[5] = key[1];
++ state[6] = key[2];
++ state[7] = key[3];
++ state[8] = key[4];
++ state[9] = key[5];
++ state[10] = key[6];
++ state[11] = key[7];
++ state[12] = get_unaligned_le32(iv + 0);
++ state[13] = get_unaligned_le32(iv + 4);
++ state[14] = get_unaligned_le32(iv + 8);
++ state[15] = get_unaligned_le32(iv + 12);
++}
+
+-int crypto_chacha_crypt(struct skcipher_request *req);
+-int crypto_xchacha_crypt(struct skcipher_request *req);
++static inline void chacha_init(u32 *state, const u32 *key, const u8 *iv)
++{
++ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
++ chacha_init_arch(state, key, iv);
++ else
++ chacha_init_generic(state, key, iv);
++}
++
++void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
++ unsigned int bytes, int nrounds);
++void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src,
++ unsigned int bytes, int nrounds);
++
++static inline void chacha_crypt(u32 *state, u8 *dst, const u8 *src,
++ unsigned int bytes, int nrounds)
++{
++ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
++ chacha_crypt_arch(state, dst, src, bytes, nrounds);
++ else
++ chacha_crypt_generic(state, dst, src, bytes, nrounds);
++}
++
++static inline void chacha20_crypt(u32 *state, u8 *dst, const u8 *src,
++ unsigned int bytes)
++{
++ chacha_crypt(state, dst, src, bytes, 20);
++}
+
+ #endif /* _CRYPTO_CHACHA_H */
+--- /dev/null
++++ b/include/crypto/internal/chacha.h
+@@ -0,0 +1,53 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++
++#ifndef _CRYPTO_INTERNAL_CHACHA_H
++#define _CRYPTO_INTERNAL_CHACHA_H
++
++#include <crypto/chacha.h>
++#include <crypto/internal/skcipher.h>
++#include <linux/crypto.h>
++
++struct chacha_ctx {
++ u32 key[8];
++ int nrounds;
++};
++
++void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv);
++
++static inline int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
++ unsigned int keysize, int nrounds)
++{
++ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
++ int i;
++
++ if (keysize != CHACHA_KEY_SIZE)
++ return -EINVAL;
++
++ for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
++ ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
++
++ ctx->nrounds = nrounds;
++ return 0;
++}
++
++static inline int chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
++ unsigned int keysize)
++{
++ return chacha_setkey(tfm, key, keysize, 20);
++}
++
++static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
++ unsigned int keysize)
++{
++ return chacha_setkey(tfm, key, keysize, 12);
++}
++
++int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
++ unsigned int keysize);
++int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
++ unsigned int keysize);
++
++int crypto_chacha_crypt(struct skcipher_request *req);
++int crypto_xchacha_crypt(struct skcipher_request *req);
++
++#endif /* _CRYPTO_CHACHA_H */
+--- a/lib/Makefile
++++ b/lib/Makefile
+@@ -26,8 +26,7 @@ endif
+
+ lib-y := ctype.o string.o vsprintf.o cmdline.o \
+ rbtree.o radix-tree.o timerqueue.o xarray.o \
+- idr.o extable.o \
+- sha1.o chacha.o irq_regs.o argv_split.o \
++ idr.o extable.o sha1.o irq_regs.o argv_split.o \
+ flex_proportions.o ratelimit.o show_mem.o \
+ is_single_threaded.o plist.o decompress.o kobject_uevent.o \
+ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
+--- a/lib/crypto/Kconfig
++++ b/lib/crypto/Kconfig
+@@ -8,6 +8,32 @@ config CRYPTO_LIB_AES
+ config CRYPTO_LIB_ARC4
+ tristate
+
++config CRYPTO_ARCH_HAVE_LIB_CHACHA
++ tristate
++ help
++ Declares whether the architecture provides an arch-specific
++ accelerated implementation of the ChaCha library interface,
++ either builtin or as a module.
++
++config CRYPTO_LIB_CHACHA_GENERIC
++ tristate
++ select CRYPTO_ALGAPI
++ help
++ This symbol can be depended upon by arch implementations of the
++ ChaCha library interface that require the generic code as a
++ fallback, e.g., for SIMD implementations. If no arch specific
++ implementation is enabled, this implementation serves the users
++ of CRYPTO_LIB_CHACHA.
++
++config CRYPTO_LIB_CHACHA
++ tristate "ChaCha library interface"
++ depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA
++ select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n
++ help
++ Enable the ChaCha library interface. This interface may be fulfilled
++ by either the generic implementation or an arch-specific one, if one
++ is available and enabled.
++
+ config CRYPTO_LIB_DES
+ tristate
+
+--- a/lib/crypto/Makefile
++++ b/lib/crypto/Makefile
+@@ -1,5 +1,9 @@
+ # SPDX-License-Identifier: GPL-2.0
+
++# chacha is used by the /dev/random driver which is always builtin
++obj-y += chacha.o
++obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o
++
+ obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
+ libaes-y := aes.o
+
+--- a/lib/chacha.c
++++ /dev/null
+@@ -1,113 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later
+-/*
+- * The "hash function" used as the core of the ChaCha stream cipher (RFC7539)
+- *
+- * Copyright (C) 2015 Martin Willi
+- */
+-
+-#include <linux/kernel.h>
+-#include <linux/export.h>
+-#include <linux/bitops.h>
+-#include <linux/cryptohash.h>
+-#include <asm/unaligned.h>
+-#include <crypto/chacha.h>
+-
+-static void chacha_permute(u32 *x, int nrounds)
+-{
+- int i;
+-
+- /* whitelist the allowed round counts */
+- WARN_ON_ONCE(nrounds != 20 && nrounds != 12);
+-
+- for (i = 0; i < nrounds; i += 2) {
+- x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16);
+- x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16);
+- x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16);
+- x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16);
+-
+- x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12);
+- x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12);
+- x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12);
+- x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12);
+-
+- x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8);
+- x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8);
+- x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8);
+- x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8);
+-
+- x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7);
+- x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7);
+- x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7);
+- x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7);
+-
+- x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16);
+- x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16);
+- x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16);
+- x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16);
+-
+- x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12);
+- x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12);
+- x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12);
+- x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12);
+-
+- x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8);
+- x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8);
+- x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8);
+- x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8);
+-
+- x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7);
+- x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7);
+- x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7);
+- x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7);
+- }
+-}
+-
+-/**
+- * chacha_block - generate one keystream block and increment block counter
+- * @state: input state matrix (16 32-bit words)
+- * @stream: output keystream block (64 bytes)
+- * @nrounds: number of rounds (20 or 12; 20 is recommended)
+- *
+- * This is the ChaCha core, a function from 64-byte strings to 64-byte strings.
+- * The caller has already converted the endianness of the input. This function
+- * also handles incrementing the block counter in the input matrix.
+- */
+-void chacha_block(u32 *state, u8 *stream, int nrounds)
+-{
+- u32 x[16];
+- int i;
+-
+- memcpy(x, state, 64);
+-
+- chacha_permute(x, nrounds);
+-
+- for (i = 0; i < ARRAY_SIZE(x); i++)
+- put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
+-
+- state[12]++;
+-}
+-EXPORT_SYMBOL(chacha_block);
+-
+-/**
+- * hchacha_block - abbreviated ChaCha core, for XChaCha
+- * @in: input state matrix (16 32-bit words)
+- * @out: output (8 32-bit words)
+- * @nrounds: number of rounds (20 or 12; 20 is recommended)
+- *
+- * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step
+- * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha
+- * skips the final addition of the initial state, and outputs only certain words
+- * of the state. It should not be used for streaming directly.
+- */
+-void hchacha_block(const u32 *in, u32 *out, int nrounds)
+-{
+- u32 x[16];
+-
+- memcpy(x, in, 64);
+-
+- chacha_permute(x, nrounds);
+-
+- memcpy(&out[0], &x[0], 16);
+- memcpy(&out[4], &x[12], 16);
+-}
+-EXPORT_SYMBOL(hchacha_block);
+--- /dev/null
++++ b/lib/crypto/chacha.c
+@@ -0,0 +1,115 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++/*
++ * The "hash function" used as the core of the ChaCha stream cipher (RFC7539)
++ *
++ * Copyright (C) 2015 Martin Willi
++ */
++
++#include <linux/bug.h>
++#include <linux/kernel.h>
++#include <linux/export.h>
++#include <linux/bitops.h>
++#include <linux/string.h>
++#include <linux/cryptohash.h>
++#include <asm/unaligned.h>
++#include <crypto/chacha.h>
++
++static void chacha_permute(u32 *x, int nrounds)
++{
++ int i;
++
++ /* whitelist the allowed round counts */
++ WARN_ON_ONCE(nrounds != 20 && nrounds != 12);
++
++ for (i = 0; i < nrounds; i += 2) {
++ x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16);
++ x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16);
++ x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16);
++ x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16);
++
++ x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12);
++ x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12);
++ x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12);
++ x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12);
++
++ x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8);
++ x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8);
++ x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8);
++ x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8);
++
++ x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7);
++ x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7);
++ x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7);
++ x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7);
++
++ x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16);
++ x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16);
++ x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16);
++ x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16);
++
++ x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12);
++ x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12);
++ x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12);
++ x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12);
++
++ x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8);
++ x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8);
++ x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8);
++ x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8);
++
++ x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7);
++ x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7);
++ x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7);
++ x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7);
++ }
++}
++
++/**
++ * chacha_block - generate one keystream block and increment block counter
++ * @state: input state matrix (16 32-bit words)
++ * @stream: output keystream block (64 bytes)
++ * @nrounds: number of rounds (20 or 12; 20 is recommended)
++ *
++ * This is the ChaCha core, a function from 64-byte strings to 64-byte strings.
++ * The caller has already converted the endianness of the input. This function
++ * also handles incrementing the block counter in the input matrix.
++ */
++void chacha_block_generic(u32 *state, u8 *stream, int nrounds)
++{
++ u32 x[16];
++ int i;
++
++ memcpy(x, state, 64);
++
++ chacha_permute(x, nrounds);
++
++ for (i = 0; i < ARRAY_SIZE(x); i++)
++ put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
++
++ state[12]++;
++}
++EXPORT_SYMBOL(chacha_block_generic);
++
++/**
++ * hchacha_block_generic - abbreviated ChaCha core, for XChaCha
++ * @state: input state matrix (16 32-bit words)
++ * @out: output (8 32-bit words)
++ * @nrounds: number of rounds (20 or 12; 20 is recommended)
++ *
++ * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step
++ * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha
++ * skips the final addition of the initial state, and outputs only certain words
++ * of the state. It should not be used for streaming directly.
++ */
++void hchacha_block_generic(const u32 *state, u32 *stream, int nrounds)
++{
++ u32 x[16];
++
++ memcpy(x, state, 64);
++
++ chacha_permute(x, nrounds);
++
++ memcpy(&stream[0], &x[0], 16);
++ memcpy(&stream[4], &x[12], 16);
++}
++EXPORT_SYMBOL(hchacha_block_generic);
+--- /dev/null
++++ b/lib/crypto/libchacha.c
+@@ -0,0 +1,35 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++/*
++ * The ChaCha stream cipher (RFC7539)
++ *
++ * Copyright (C) 2015 Martin Willi
++ */
++
++#include <linux/kernel.h>
++#include <linux/export.h>
++#include <linux/module.h>
++
++#include <crypto/algapi.h> // for crypto_xor_cpy
++#include <crypto/chacha.h>
++
++void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src,
++ unsigned int bytes, int nrounds)
++{
++ /* aligned to potentially speed up crypto_xor() */
++ u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long));
++
++ while (bytes >= CHACHA_BLOCK_SIZE) {
++ chacha_block_generic(state, stream, nrounds);
++ crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE);
++ bytes -= CHACHA_BLOCK_SIZE;
++ dst += CHACHA_BLOCK_SIZE;
++ src += CHACHA_BLOCK_SIZE;
++ }
++ if (bytes) {
++ chacha_block_generic(state, stream, nrounds);
++ crypto_xor_cpy(dst, src, stream, bytes);
++ }
++}
++EXPORT_SYMBOL(chacha_crypt_generic);
++
++MODULE_LICENSE("GPL");